1259698Sdim//===--- UnicodeCharRanges.h - Types and functions for character ranges ---===// 2259698Sdim// 3259698Sdim// The LLVM Compiler Infrastructure 4259698Sdim// 5259698Sdim// This file is distributed under the University of Illinois Open Source 6259698Sdim// License. See LICENSE.TXT for details. 7259698Sdim// 8259698Sdim//===----------------------------------------------------------------------===// 9259698Sdim#ifndef LLVM_SUPPORT_UNICODECHARRANGES_H 10259698Sdim#define LLVM_SUPPORT_UNICODECHARRANGES_H 11259698Sdim 12259698Sdim#include "llvm/ADT/ArrayRef.h" 13259698Sdim#include "llvm/ADT/SmallPtrSet.h" 14259698Sdim#include "llvm/Support/Compiler.h" 15259698Sdim#include "llvm/Support/Debug.h" 16259698Sdim#include "llvm/Support/Mutex.h" 17259698Sdim#include "llvm/Support/MutexGuard.h" 18259698Sdim#include "llvm/Support/raw_ostream.h" 19259698Sdim 20259698Sdim#include <algorithm> 21259698Sdim 22259698Sdimnamespace llvm { 23259698Sdimnamespace sys { 24259698Sdim 25259698Sdim/// \brief Represents a closed range of Unicode code points [Lower, Upper]. 26259698Sdimstruct UnicodeCharRange { 27259698Sdim uint32_t Lower; 28259698Sdim uint32_t Upper; 29259698Sdim}; 30259698Sdim 31259698Sdiminline bool operator<(uint32_t Value, UnicodeCharRange Range) { 32259698Sdim return Value < Range.Lower; 33259698Sdim} 34259698Sdiminline bool operator<(UnicodeCharRange Range, uint32_t Value) { 35259698Sdim return Range.Upper < Value; 36259698Sdim} 37259698Sdim 38259698Sdim/// \brief Holds a reference to an ordered array of UnicodeCharRange and allows 39259698Sdim/// to quickly check if a code point is contained in the set represented by this 40259698Sdim/// array. 41259698Sdimclass UnicodeCharSet { 42259698Sdimpublic: 43259698Sdim typedef llvm::ArrayRef<UnicodeCharRange> CharRanges; 44259698Sdim 45259698Sdim /// \brief Constructs a UnicodeCharSet instance from an array of 46259698Sdim /// UnicodeCharRanges. 47259698Sdim /// 48259698Sdim /// Array pointed by \p Ranges should have the lifetime at least as long as 49259698Sdim /// the UnicodeCharSet instance, and should not change. Array is validated by 50259698Sdim /// the constructor, so it makes sense to create as few UnicodeCharSet 51259698Sdim /// instances per each array of ranges, as possible. 52259698Sdim UnicodeCharSet(CharRanges Ranges) : Ranges(Ranges) { 53259698Sdim assert(rangesAreValid()); 54259698Sdim } 55259698Sdim 56259698Sdim /// \brief Returns true if the character set contains the Unicode code point 57259698Sdim /// \p C. 58259698Sdim bool contains(uint32_t C) const { 59259698Sdim return std::binary_search(Ranges.begin(), Ranges.end(), C); 60259698Sdim } 61259698Sdim 62259698Sdimprivate: 63259698Sdim /// \brief Returns true if each of the ranges is a proper closed range 64259698Sdim /// [min, max], and if the ranges themselves are ordered and non-overlapping. 65259698Sdim bool rangesAreValid() const { 66259698Sdim uint32_t Prev = 0; 67259698Sdim for (CharRanges::const_iterator I = Ranges.begin(), E = Ranges.end(); 68259698Sdim I != E; ++I) { 69259698Sdim if (I != Ranges.begin() && Prev >= I->Lower) { 70259698Sdim DEBUG(llvm::dbgs() << "Upper bound 0x"); 71259698Sdim DEBUG(llvm::dbgs().write_hex(Prev)); 72259698Sdim DEBUG(llvm::dbgs() << " should be less than succeeding lower bound 0x"); 73259698Sdim DEBUG(llvm::dbgs().write_hex(I->Lower) << "\n"); 74259698Sdim return false; 75259698Sdim } 76259698Sdim if (I->Upper < I->Lower) { 77259698Sdim DEBUG(llvm::dbgs() << "Upper bound 0x"); 78259698Sdim DEBUG(llvm::dbgs().write_hex(I->Lower)); 79259698Sdim DEBUG(llvm::dbgs() << " should not be less than lower bound 0x"); 80259698Sdim DEBUG(llvm::dbgs().write_hex(I->Upper) << "\n"); 81259698Sdim return false; 82259698Sdim } 83259698Sdim Prev = I->Upper; 84259698Sdim } 85259698Sdim 86259698Sdim return true; 87259698Sdim } 88259698Sdim 89259698Sdim const CharRanges Ranges; 90259698Sdim}; 91259698Sdim 92259698Sdim} // namespace sys 93259698Sdim} // namespace llvm 94259698Sdim 95259698Sdim 96259698Sdim#endif // LLVM_SUPPORT_UNICODECHARRANGES_H 97