1//===--- UnicodeCharRanges.h - Types and functions for character ranges ---===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9#ifndef LLVM_SUPPORT_UNICODECHARRANGES_H 10#define LLVM_SUPPORT_UNICODECHARRANGES_H 11 12#include "llvm/ADT/ArrayRef.h" 13#include "llvm/ADT/SmallPtrSet.h" 14#include "llvm/Support/Compiler.h" 15#include "llvm/Support/Debug.h" 16#include "llvm/Support/Mutex.h" 17#include "llvm/Support/MutexGuard.h" 18#include "llvm/Support/raw_ostream.h" 19 20#include <algorithm> 21 22namespace llvm { 23namespace sys { 24 25/// \brief Represents a closed range of Unicode code points [Lower, Upper]. 26struct UnicodeCharRange { 27 uint32_t Lower; 28 uint32_t Upper; 29}; 30 31inline bool operator<(uint32_t Value, UnicodeCharRange Range) { 32 return Value < Range.Lower; 33} 34inline bool operator<(UnicodeCharRange Range, uint32_t Value) { 35 return Range.Upper < Value; 36} 37 38/// \brief Holds a reference to an ordered array of UnicodeCharRange and allows 39/// to quickly check if a code point is contained in the set represented by this 40/// array. 41class UnicodeCharSet { 42public: 43 typedef llvm::ArrayRef<UnicodeCharRange> CharRanges; 44 45 /// \brief Constructs a UnicodeCharSet instance from an array of 46 /// UnicodeCharRanges. 47 /// 48 /// Array pointed by \p Ranges should have the lifetime at least as long as 49 /// the UnicodeCharSet instance, and should not change. Array is validated by 50 /// the constructor, so it makes sense to create as few UnicodeCharSet 51 /// instances per each array of ranges, as possible. 52 UnicodeCharSet(CharRanges Ranges) : Ranges(Ranges) { 53 assert(rangesAreValid()); 54 } 55 56 /// \brief Returns true if the character set contains the Unicode code point 57 /// \p C. 58 bool contains(uint32_t C) const { 59 return std::binary_search(Ranges.begin(), Ranges.end(), C); 60 } 61 62private: 63 /// \brief Returns true if each of the ranges is a proper closed range 64 /// [min, max], and if the ranges themselves are ordered and non-overlapping. 65 bool rangesAreValid() const { 66 uint32_t Prev = 0; 67 for (CharRanges::const_iterator I = Ranges.begin(), E = Ranges.end(); 68 I != E; ++I) { 69 if (I != Ranges.begin() && Prev >= I->Lower) { 70 DEBUG(llvm::dbgs() << "Upper bound 0x"); 71 DEBUG(llvm::dbgs().write_hex(Prev)); 72 DEBUG(llvm::dbgs() << " should be less than succeeding lower bound 0x"); 73 DEBUG(llvm::dbgs().write_hex(I->Lower) << "\n"); 74 return false; 75 } 76 if (I->Upper < I->Lower) { 77 DEBUG(llvm::dbgs() << "Upper bound 0x"); 78 DEBUG(llvm::dbgs().write_hex(I->Lower)); 79 DEBUG(llvm::dbgs() << " should not be less than lower bound 0x"); 80 DEBUG(llvm::dbgs().write_hex(I->Upper) << "\n"); 81 return false; 82 } 83 Prev = I->Upper; 84 } 85 86 return true; 87 } 88 89 const CharRanges Ranges; 90}; 91 92} // namespace sys 93} // namespace llvm 94 95 96#endif // LLVM_SUPPORT_UNICODECHARRANGES_H 97