StringTableBuilder.cpp revision 360784
1160814Ssimon//===- StringTableBuilder.cpp - String table building utility -------------===//
2160814Ssimon//
3160814Ssimon// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4160814Ssimon// See https://llvm.org/LICENSE.txt for license information.
5160814Ssimon// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6160814Ssimon//
7160814Ssimon//===----------------------------------------------------------------------===//
8160814Ssimon
9160814Ssimon#include "llvm/MC/StringTableBuilder.h"
10160814Ssimon#include "llvm/ADT/CachedHashString.h"
11160814Ssimon#include "llvm/ADT/SmallString.h"
12160814Ssimon#include "llvm/ADT/StringRef.h"
13160814Ssimon#include "llvm/BinaryFormat/COFF.h"
14160814Ssimon#include "llvm/Support/Endian.h"
15160814Ssimon#include "llvm/Support/MathExtras.h"
16160814Ssimon#include "llvm/Support/raw_ostream.h"
17160814Ssimon#include <cassert>
18160814Ssimon#include <cstddef>
19160814Ssimon#include <cstdint>
20160814Ssimon#include <cstring>
21160814Ssimon#include <utility>
22160814Ssimon#include <vector>
23160814Ssimon
24160814Ssimonusing namespace llvm;
25160814Ssimon
26160814SsimonStringTableBuilder::~StringTableBuilder() = default;
27160814Ssimon
28160814Ssimonvoid StringTableBuilder::initSize() {
29160814Ssimon  // Account for leading bytes in table so that offsets returned from add are
30160814Ssimon  // correct.
31160814Ssimon  switch (K) {
32160814Ssimon  case RAW:
33160814Ssimon  case DWARF:
34160814Ssimon    Size = 0;
35160814Ssimon    break;
36160814Ssimon  case MachO:
37160814Ssimon  case ELF:
38160814Ssimon    // Start the table with a NUL byte.
39160814Ssimon    Size = 1;
40160814Ssimon    break;
41160814Ssimon  case XCOFF:
42160814Ssimon  case WinCOFF:
43160814Ssimon    // Make room to write the table size later.
44160814Ssimon    Size = 4;
45160814Ssimon    break;
46160814Ssimon  }
47160814Ssimon}
48160814Ssimon
49160814SsimonStringTableBuilder::StringTableBuilder(Kind K, unsigned Alignment)
50160814Ssimon    : K(K), Alignment(Alignment) {
51160814Ssimon  initSize();
52160814Ssimon}
53160814Ssimon
54160814Ssimonvoid StringTableBuilder::write(raw_ostream &OS) const {
55160814Ssimon  assert(isFinalized());
56160814Ssimon  SmallString<0> Data;
57160814Ssimon  Data.resize(getSize());
58160814Ssimon  write((uint8_t *)Data.data());
59160814Ssimon  OS << Data;
60160814Ssimon}
61160814Ssimon
62160814Ssimonusing StringPair = std::pair<CachedHashStringRef, size_t>;
63160814Ssimon
64160814Ssimonvoid StringTableBuilder::write(uint8_t *Buf) const {
65160814Ssimon  assert(isFinalized());
66160814Ssimon  for (const StringPair &P : StringIndexMap) {
67160814Ssimon    StringRef Data = P.first.val();
68160814Ssimon    if (!Data.empty())
69160814Ssimon      memcpy(Buf + P.second, Data.data(), Data.size());
70160814Ssimon  }
71160814Ssimon  // The COFF formats store the size of the string table in the first 4 bytes.
72160814Ssimon  // For Windows, the format is little-endian; for AIX, it is big-endian.
73160814Ssimon  if (K == WinCOFF)
74160814Ssimon    support::endian::write32le(Buf, Size);
75160814Ssimon  else if (K == XCOFF)
76160814Ssimon    support::endian::write32be(Buf, Size);
77160814Ssimon}
78160814Ssimon
79160814Ssimon// Returns the character at Pos from end of a string.
80160814Ssimonstatic int charTailAt(StringPair *P, size_t Pos) {
81160814Ssimon  StringRef S = P->first.val();
82160814Ssimon  if (Pos >= S.size())
83160814Ssimon    return -1;
84160814Ssimon  return (unsigned char)S[S.size() - Pos - 1];
85160814Ssimon}
86160814Ssimon
87160814Ssimon// Three-way radix quicksort. This is much faster than std::sort with strcmp
88160814Ssimon// because it does not compare characters that we already know the same.
89160814Ssimonstatic void multikeySort(MutableArrayRef<StringPair *> Vec, int Pos) {
90160814Ssimontailcall:
91160814Ssimon  if (Vec.size() <= 1)
92160814Ssimon    return;
93160814Ssimon
94160814Ssimon  // Partition items so that items in [0, I) are greater than the pivot,
95160814Ssimon  // [I, J) are the same as the pivot, and [J, Vec.size()) are less than
96160814Ssimon  // the pivot.
97160814Ssimon  int Pivot = charTailAt(Vec[0], Pos);
98238405Sjkim  size_t I = 0;
99160814Ssimon  size_t J = Vec.size();
100160814Ssimon  for (size_t K = 1; K < J;) {
101160814Ssimon    int C = charTailAt(Vec[K], Pos);
102160814Ssimon    if (C > Pivot)
103160814Ssimon      std::swap(Vec[I++], Vec[K++]);
104160814Ssimon    else if (C < Pivot)
105238405Sjkim      std::swap(Vec[--J], Vec[K]);
106160814Ssimon    else
107160814Ssimon      K++;
108160814Ssimon  }
109160814Ssimon
110160814Ssimon  multikeySort(Vec.slice(0, I), Pos);
111160814Ssimon  multikeySort(Vec.slice(J), Pos);
112160814Ssimon
113160814Ssimon  // multikeySort(Vec.slice(I, J - I), Pos + 1), but with
114160814Ssimon  // tail call optimization.
115160814Ssimon  if (Pivot != -1) {
116160814Ssimon    Vec = Vec.slice(I, J - I);
117160814Ssimon    ++Pos;
118160814Ssimon    goto tailcall;
119160814Ssimon  }
120160814Ssimon}
121160814Ssimon
122160814Ssimonvoid StringTableBuilder::finalize() {
123160814Ssimon  assert(K != DWARF);
124160814Ssimon  finalizeStringTable(/*Optimize=*/true);
125160814Ssimon}
126160814Ssimon
127160814Ssimonvoid StringTableBuilder::finalizeInOrder() {
128160814Ssimon  finalizeStringTable(/*Optimize=*/false);
129160814Ssimon}
130160814Ssimon
131160814Ssimonvoid StringTableBuilder::finalizeStringTable(bool Optimize) {
132160814Ssimon  Finalized = true;
133160814Ssimon
134160814Ssimon  if (Optimize) {
135160814Ssimon    std::vector<StringPair *> Strings;
136160814Ssimon    Strings.reserve(StringIndexMap.size());
137160814Ssimon    for (StringPair &P : StringIndexMap)
138160814Ssimon      Strings.push_back(&P);
139160814Ssimon
140160814Ssimon    multikeySort(Strings, 0);
141160814Ssimon    initSize();
142160814Ssimon
143160814Ssimon    StringRef Previous;
144160814Ssimon    for (StringPair *P : Strings) {
145160814Ssimon      StringRef S = P->first.val();
146160814Ssimon      if (Previous.endswith(S)) {
147160814Ssimon        size_t Pos = Size - S.size() - (K != RAW);
148160814Ssimon        if (!(Pos & (Alignment - 1))) {
149160814Ssimon          P->second = Pos;
150160814Ssimon          continue;
151160814Ssimon        }
152160814Ssimon      }
153160814Ssimon
154160814Ssimon      Size = alignTo(Size, Alignment);
155160814Ssimon      P->second = Size;
156160814Ssimon
157160814Ssimon      Size += S.size();
158160814Ssimon      if (K != RAW)
159160814Ssimon        ++Size;
160160814Ssimon      Previous = S;
161160814Ssimon    }
162160814Ssimon  }
163160814Ssimon
164160814Ssimon  if (K == MachO)
165160814Ssimon    Size = alignTo(Size, 4); // Pad to multiple of 4.
166160814Ssimon
167160814Ssimon  // The first byte in an ELF string table must be null, according to the ELF
168160814Ssimon  // specification. In 'initSize()' we reserved the first byte to hold null for
169160814Ssimon  // this purpose and here we actually add the string to allow 'getOffset()' to
170160814Ssimon  // be called on an empty string.
171160814Ssimon  if (K == ELF)
172160814Ssimon    StringIndexMap[CachedHashStringRef("")] = 0;
173160814Ssimon}
174160814Ssimon
175160814Ssimonvoid StringTableBuilder::clear() {
176160814Ssimon  Finalized = false;
177160814Ssimon  StringIndexMap.clear();
178160814Ssimon}
179160814Ssimon
180160814Ssimonsize_t StringTableBuilder::getOffset(CachedHashStringRef S) const {
181160814Ssimon  assert(isFinalized());
182160814Ssimon  auto I = StringIndexMap.find(S);
183160814Ssimon  assert(I != StringIndexMap.end() && "String is not in table!");
184160814Ssimon  return I->second;
185160814Ssimon}
186160814Ssimon
187160814Ssimonsize_t StringTableBuilder::add(CachedHashStringRef S) {
188160814Ssimon  if (K == WinCOFF)
189160814Ssimon    assert(S.size() > COFF::NameSize && "Short string in COFF string table!");
190160814Ssimon
191160814Ssimon  assert(!isFinalized());
192160814Ssimon  auto P = StringIndexMap.insert(std::make_pair(S, 0));
193160814Ssimon  if (P.second) {
194160814Ssimon    size_t Start = alignTo(Size, Alignment);
195160814Ssimon    P.first->second = Start;
196160814Ssimon    Size = Start + S.size() + (K != RAW);
197160814Ssimon  }
198160814Ssimon  return P.first->second;
199160814Ssimon}
200160814Ssimon