StringTableBuilder.cpp revision 360784
1160814Ssimon//===- StringTableBuilder.cpp - String table building utility -------------===// 2160814Ssimon// 3160814Ssimon// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4160814Ssimon// See https://llvm.org/LICENSE.txt for license information. 5160814Ssimon// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6160814Ssimon// 7160814Ssimon//===----------------------------------------------------------------------===// 8160814Ssimon 9160814Ssimon#include "llvm/MC/StringTableBuilder.h" 10160814Ssimon#include "llvm/ADT/CachedHashString.h" 11160814Ssimon#include "llvm/ADT/SmallString.h" 12160814Ssimon#include "llvm/ADT/StringRef.h" 13160814Ssimon#include "llvm/BinaryFormat/COFF.h" 14160814Ssimon#include "llvm/Support/Endian.h" 15160814Ssimon#include "llvm/Support/MathExtras.h" 16160814Ssimon#include "llvm/Support/raw_ostream.h" 17160814Ssimon#include <cassert> 18160814Ssimon#include <cstddef> 19160814Ssimon#include <cstdint> 20160814Ssimon#include <cstring> 21160814Ssimon#include <utility> 22160814Ssimon#include <vector> 23160814Ssimon 24160814Ssimonusing namespace llvm; 25160814Ssimon 26160814SsimonStringTableBuilder::~StringTableBuilder() = default; 27160814Ssimon 28160814Ssimonvoid StringTableBuilder::initSize() { 29160814Ssimon // Account for leading bytes in table so that offsets returned from add are 30160814Ssimon // correct. 31160814Ssimon switch (K) { 32160814Ssimon case RAW: 33160814Ssimon case DWARF: 34160814Ssimon Size = 0; 35160814Ssimon break; 36160814Ssimon case MachO: 37160814Ssimon case ELF: 38160814Ssimon // Start the table with a NUL byte. 39160814Ssimon Size = 1; 40160814Ssimon break; 41160814Ssimon case XCOFF: 42160814Ssimon case WinCOFF: 43160814Ssimon // Make room to write the table size later. 44160814Ssimon Size = 4; 45160814Ssimon break; 46160814Ssimon } 47160814Ssimon} 48160814Ssimon 49160814SsimonStringTableBuilder::StringTableBuilder(Kind K, unsigned Alignment) 50160814Ssimon : K(K), Alignment(Alignment) { 51160814Ssimon initSize(); 52160814Ssimon} 53160814Ssimon 54160814Ssimonvoid StringTableBuilder::write(raw_ostream &OS) const { 55160814Ssimon assert(isFinalized()); 56160814Ssimon SmallString<0> Data; 57160814Ssimon Data.resize(getSize()); 58160814Ssimon write((uint8_t *)Data.data()); 59160814Ssimon OS << Data; 60160814Ssimon} 61160814Ssimon 62160814Ssimonusing StringPair = std::pair<CachedHashStringRef, size_t>; 63160814Ssimon 64160814Ssimonvoid StringTableBuilder::write(uint8_t *Buf) const { 65160814Ssimon assert(isFinalized()); 66160814Ssimon for (const StringPair &P : StringIndexMap) { 67160814Ssimon StringRef Data = P.first.val(); 68160814Ssimon if (!Data.empty()) 69160814Ssimon memcpy(Buf + P.second, Data.data(), Data.size()); 70160814Ssimon } 71160814Ssimon // The COFF formats store the size of the string table in the first 4 bytes. 72160814Ssimon // For Windows, the format is little-endian; for AIX, it is big-endian. 73160814Ssimon if (K == WinCOFF) 74160814Ssimon support::endian::write32le(Buf, Size); 75160814Ssimon else if (K == XCOFF) 76160814Ssimon support::endian::write32be(Buf, Size); 77160814Ssimon} 78160814Ssimon 79160814Ssimon// Returns the character at Pos from end of a string. 80160814Ssimonstatic int charTailAt(StringPair *P, size_t Pos) { 81160814Ssimon StringRef S = P->first.val(); 82160814Ssimon if (Pos >= S.size()) 83160814Ssimon return -1; 84160814Ssimon return (unsigned char)S[S.size() - Pos - 1]; 85160814Ssimon} 86160814Ssimon 87160814Ssimon// Three-way radix quicksort. This is much faster than std::sort with strcmp 88160814Ssimon// because it does not compare characters that we already know the same. 89160814Ssimonstatic void multikeySort(MutableArrayRef<StringPair *> Vec, int Pos) { 90160814Ssimontailcall: 91160814Ssimon if (Vec.size() <= 1) 92160814Ssimon return; 93160814Ssimon 94160814Ssimon // Partition items so that items in [0, I) are greater than the pivot, 95160814Ssimon // [I, J) are the same as the pivot, and [J, Vec.size()) are less than 96160814Ssimon // the pivot. 97160814Ssimon int Pivot = charTailAt(Vec[0], Pos); 98238405Sjkim size_t I = 0; 99160814Ssimon size_t J = Vec.size(); 100160814Ssimon for (size_t K = 1; K < J;) { 101160814Ssimon int C = charTailAt(Vec[K], Pos); 102160814Ssimon if (C > Pivot) 103160814Ssimon std::swap(Vec[I++], Vec[K++]); 104160814Ssimon else if (C < Pivot) 105238405Sjkim std::swap(Vec[--J], Vec[K]); 106160814Ssimon else 107160814Ssimon K++; 108160814Ssimon } 109160814Ssimon 110160814Ssimon multikeySort(Vec.slice(0, I), Pos); 111160814Ssimon multikeySort(Vec.slice(J), Pos); 112160814Ssimon 113160814Ssimon // multikeySort(Vec.slice(I, J - I), Pos + 1), but with 114160814Ssimon // tail call optimization. 115160814Ssimon if (Pivot != -1) { 116160814Ssimon Vec = Vec.slice(I, J - I); 117160814Ssimon ++Pos; 118160814Ssimon goto tailcall; 119160814Ssimon } 120160814Ssimon} 121160814Ssimon 122160814Ssimonvoid StringTableBuilder::finalize() { 123160814Ssimon assert(K != DWARF); 124160814Ssimon finalizeStringTable(/*Optimize=*/true); 125160814Ssimon} 126160814Ssimon 127160814Ssimonvoid StringTableBuilder::finalizeInOrder() { 128160814Ssimon finalizeStringTable(/*Optimize=*/false); 129160814Ssimon} 130160814Ssimon 131160814Ssimonvoid StringTableBuilder::finalizeStringTable(bool Optimize) { 132160814Ssimon Finalized = true; 133160814Ssimon 134160814Ssimon if (Optimize) { 135160814Ssimon std::vector<StringPair *> Strings; 136160814Ssimon Strings.reserve(StringIndexMap.size()); 137160814Ssimon for (StringPair &P : StringIndexMap) 138160814Ssimon Strings.push_back(&P); 139160814Ssimon 140160814Ssimon multikeySort(Strings, 0); 141160814Ssimon initSize(); 142160814Ssimon 143160814Ssimon StringRef Previous; 144160814Ssimon for (StringPair *P : Strings) { 145160814Ssimon StringRef S = P->first.val(); 146160814Ssimon if (Previous.endswith(S)) { 147160814Ssimon size_t Pos = Size - S.size() - (K != RAW); 148160814Ssimon if (!(Pos & (Alignment - 1))) { 149160814Ssimon P->second = Pos; 150160814Ssimon continue; 151160814Ssimon } 152160814Ssimon } 153160814Ssimon 154160814Ssimon Size = alignTo(Size, Alignment); 155160814Ssimon P->second = Size; 156160814Ssimon 157160814Ssimon Size += S.size(); 158160814Ssimon if (K != RAW) 159160814Ssimon ++Size; 160160814Ssimon Previous = S; 161160814Ssimon } 162160814Ssimon } 163160814Ssimon 164160814Ssimon if (K == MachO) 165160814Ssimon Size = alignTo(Size, 4); // Pad to multiple of 4. 166160814Ssimon 167160814Ssimon // The first byte in an ELF string table must be null, according to the ELF 168160814Ssimon // specification. In 'initSize()' we reserved the first byte to hold null for 169160814Ssimon // this purpose and here we actually add the string to allow 'getOffset()' to 170160814Ssimon // be called on an empty string. 171160814Ssimon if (K == ELF) 172160814Ssimon StringIndexMap[CachedHashStringRef("")] = 0; 173160814Ssimon} 174160814Ssimon 175160814Ssimonvoid StringTableBuilder::clear() { 176160814Ssimon Finalized = false; 177160814Ssimon StringIndexMap.clear(); 178160814Ssimon} 179160814Ssimon 180160814Ssimonsize_t StringTableBuilder::getOffset(CachedHashStringRef S) const { 181160814Ssimon assert(isFinalized()); 182160814Ssimon auto I = StringIndexMap.find(S); 183160814Ssimon assert(I != StringIndexMap.end() && "String is not in table!"); 184160814Ssimon return I->second; 185160814Ssimon} 186160814Ssimon 187160814Ssimonsize_t StringTableBuilder::add(CachedHashStringRef S) { 188160814Ssimon if (K == WinCOFF) 189160814Ssimon assert(S.size() > COFF::NameSize && "Short string in COFF string table!"); 190160814Ssimon 191160814Ssimon assert(!isFinalized()); 192160814Ssimon auto P = StringIndexMap.insert(std::make_pair(S, 0)); 193160814Ssimon if (P.second) { 194160814Ssimon size_t Start = alignTo(Size, Alignment); 195160814Ssimon P.first->second = Start; 196160814Ssimon Size = Start + S.size() + (K != RAW); 197160814Ssimon } 198160814Ssimon return P.first->second; 199160814Ssimon} 200160814Ssimon