BitstreamReader.cpp revision 360784
1//===- BitstreamReader.cpp - BitstreamReader implementation ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "llvm/Bitstream/BitstreamReader.h"
10#include "llvm/ADT/StringRef.h"
11#include <cassert>
12#include <string>
13
14using namespace llvm;
15
16//===----------------------------------------------------------------------===//
17//  BitstreamCursor implementation
18//===----------------------------------------------------------------------===//
19
20/// Having read the ENTER_SUBBLOCK abbrevid, enter the block.
21Error BitstreamCursor::EnterSubBlock(unsigned BlockID, unsigned *NumWordsP) {
22  // Save the current block's state on BlockScope.
23  BlockScope.push_back(Block(CurCodeSize));
24  BlockScope.back().PrevAbbrevs.swap(CurAbbrevs);
25
26  // Add the abbrevs specific to this block to the CurAbbrevs list.
27  if (BlockInfo) {
28    if (const BitstreamBlockInfo::BlockInfo *Info =
29            BlockInfo->getBlockInfo(BlockID)) {
30      CurAbbrevs.insert(CurAbbrevs.end(), Info->Abbrevs.begin(),
31                        Info->Abbrevs.end());
32    }
33  }
34
35  // Get the codesize of this block.
36  Expected<uint32_t> MaybeVBR = ReadVBR(bitc::CodeLenWidth);
37  if (!MaybeVBR)
38    return MaybeVBR.takeError();
39  CurCodeSize = MaybeVBR.get();
40
41  if (CurCodeSize > MaxChunkSize)
42    return llvm::createStringError(
43        std::errc::illegal_byte_sequence,
44        "can't read more than %zu at a time, trying to read %u", +MaxChunkSize,
45        CurCodeSize);
46
47  SkipToFourByteBoundary();
48  Expected<word_t> MaybeNum = Read(bitc::BlockSizeWidth);
49  if (!MaybeNum)
50    return MaybeNum.takeError();
51  word_t NumWords = MaybeNum.get();
52  if (NumWordsP)
53    *NumWordsP = NumWords;
54
55  if (CurCodeSize == 0)
56    return llvm::createStringError(
57        std::errc::illegal_byte_sequence,
58        "can't enter sub-block: current code size is 0");
59  if (AtEndOfStream())
60    return llvm::createStringError(
61        std::errc::illegal_byte_sequence,
62        "can't enter sub block: already at end of stream");
63
64  return Error::success();
65}
66
67static Expected<uint64_t> readAbbreviatedField(BitstreamCursor &Cursor,
68                                               const BitCodeAbbrevOp &Op) {
69  assert(!Op.isLiteral() && "Not to be used with literals!");
70
71  // Decode the value as we are commanded.
72  switch (Op.getEncoding()) {
73  case BitCodeAbbrevOp::Array:
74  case BitCodeAbbrevOp::Blob:
75    llvm_unreachable("Should not reach here");
76  case BitCodeAbbrevOp::Fixed:
77    assert((unsigned)Op.getEncodingData() <= Cursor.MaxChunkSize);
78    return Cursor.Read((unsigned)Op.getEncodingData());
79  case BitCodeAbbrevOp::VBR:
80    assert((unsigned)Op.getEncodingData() <= Cursor.MaxChunkSize);
81    return Cursor.ReadVBR64((unsigned)Op.getEncodingData());
82  case BitCodeAbbrevOp::Char6:
83    if (Expected<unsigned> Res = Cursor.Read(6))
84      return BitCodeAbbrevOp::DecodeChar6(Res.get());
85    else
86      return Res.takeError();
87  }
88  llvm_unreachable("invalid abbreviation encoding");
89}
90
91/// skipRecord - Read the current record and discard it.
92Expected<unsigned> BitstreamCursor::skipRecord(unsigned AbbrevID) {
93  // Skip unabbreviated records by reading past their entries.
94  if (AbbrevID == bitc::UNABBREV_RECORD) {
95    Expected<uint32_t> MaybeCode = ReadVBR(6);
96    if (!MaybeCode)
97      return MaybeCode.takeError();
98    unsigned Code = MaybeCode.get();
99    Expected<uint32_t> MaybeVBR = ReadVBR(6);
100    if (!MaybeVBR)
101      return MaybeVBR.get();
102    unsigned NumElts = MaybeVBR.get();
103    for (unsigned i = 0; i != NumElts; ++i)
104      if (Expected<uint64_t> Res = ReadVBR64(6))
105        ; // Skip!
106      else
107        return Res.takeError();
108    return Code;
109  }
110
111  const BitCodeAbbrev *Abbv = getAbbrev(AbbrevID);
112  const BitCodeAbbrevOp &CodeOp = Abbv->getOperandInfo(0);
113  unsigned Code;
114  if (CodeOp.isLiteral())
115    Code = CodeOp.getLiteralValue();
116  else {
117    if (CodeOp.getEncoding() == BitCodeAbbrevOp::Array ||
118        CodeOp.getEncoding() == BitCodeAbbrevOp::Blob)
119      return llvm::createStringError(
120          std::errc::illegal_byte_sequence,
121          "Abbreviation starts with an Array or a Blob");
122    Expected<uint64_t> MaybeCode = readAbbreviatedField(*this, CodeOp);
123    if (!MaybeCode)
124      return MaybeCode.takeError();
125    Code = MaybeCode.get();
126  }
127
128  for (unsigned i = 1, e = Abbv->getNumOperandInfos(); i < e; ++i) {
129    const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i);
130    if (Op.isLiteral())
131      continue;
132
133    if (Op.getEncoding() != BitCodeAbbrevOp::Array &&
134        Op.getEncoding() != BitCodeAbbrevOp::Blob) {
135      if (Expected<uint64_t> MaybeField = readAbbreviatedField(*this, Op))
136        continue;
137      else
138        return MaybeField.takeError();
139    }
140
141    if (Op.getEncoding() == BitCodeAbbrevOp::Array) {
142      // Array case.  Read the number of elements as a vbr6.
143      Expected<uint32_t> MaybeNum = ReadVBR(6);
144      if (!MaybeNum)
145        return MaybeNum.takeError();
146      unsigned NumElts = MaybeNum.get();
147
148      // Get the element encoding.
149      assert(i+2 == e && "array op not second to last?");
150      const BitCodeAbbrevOp &EltEnc = Abbv->getOperandInfo(++i);
151
152      // Read all the elements.
153      // Decode the value as we are commanded.
154      switch (EltEnc.getEncoding()) {
155      default:
156        report_fatal_error("Array element type can't be an Array or a Blob");
157      case BitCodeAbbrevOp::Fixed:
158        assert((unsigned)EltEnc.getEncodingData() <= MaxChunkSize);
159        if (Error Err = JumpToBit(GetCurrentBitNo() +
160                                  NumElts * EltEnc.getEncodingData()))
161          return std::move(Err);
162        break;
163      case BitCodeAbbrevOp::VBR:
164        assert((unsigned)EltEnc.getEncodingData() <= MaxChunkSize);
165        for (; NumElts; --NumElts)
166          if (Expected<uint64_t> Res =
167                  ReadVBR64((unsigned)EltEnc.getEncodingData()))
168            ; // Skip!
169          else
170            return Res.takeError();
171        break;
172      case BitCodeAbbrevOp::Char6:
173        if (Error Err = JumpToBit(GetCurrentBitNo() + NumElts * 6))
174          return std::move(Err);
175        break;
176      }
177      continue;
178    }
179
180    assert(Op.getEncoding() == BitCodeAbbrevOp::Blob);
181    // Blob case.  Read the number of bytes as a vbr6.
182    Expected<uint32_t> MaybeNum = ReadVBR(6);
183    if (!MaybeNum)
184      return MaybeNum.takeError();
185    unsigned NumElts = MaybeNum.get();
186    SkipToFourByteBoundary();  // 32-bit alignment
187
188    // Figure out where the end of this blob will be including tail padding.
189    size_t NewEnd = GetCurrentBitNo()+((NumElts+3)&~3)*8;
190
191    // If this would read off the end of the bitcode file, just set the
192    // record to empty and return.
193    if (!canSkipToPos(NewEnd/8)) {
194      skipToEnd();
195      break;
196    }
197
198    // Skip over the blob.
199    if (Error Err = JumpToBit(NewEnd))
200      return std::move(Err);
201  }
202  return Code;
203}
204
205Expected<unsigned> BitstreamCursor::readRecord(unsigned AbbrevID,
206                                               SmallVectorImpl<uint64_t> &Vals,
207                                               StringRef *Blob) {
208  if (AbbrevID == bitc::UNABBREV_RECORD) {
209    Expected<uint32_t> MaybeCode = ReadVBR(6);
210    if (!MaybeCode)
211      return MaybeCode.takeError();
212    uint32_t Code = MaybeCode.get();
213    Expected<uint32_t> MaybeNumElts = ReadVBR(6);
214    if (!MaybeNumElts)
215      return MaybeNumElts.takeError();
216    uint32_t NumElts = MaybeNumElts.get();
217
218    for (unsigned i = 0; i != NumElts; ++i)
219      if (Expected<uint64_t> MaybeVal = ReadVBR64(6))
220        Vals.push_back(MaybeVal.get());
221      else
222        return MaybeVal.takeError();
223    return Code;
224  }
225
226  const BitCodeAbbrev *Abbv = getAbbrev(AbbrevID);
227
228  // Read the record code first.
229  assert(Abbv->getNumOperandInfos() != 0 && "no record code in abbreviation?");
230  const BitCodeAbbrevOp &CodeOp = Abbv->getOperandInfo(0);
231  unsigned Code;
232  if (CodeOp.isLiteral())
233    Code = CodeOp.getLiteralValue();
234  else {
235    if (CodeOp.getEncoding() == BitCodeAbbrevOp::Array ||
236        CodeOp.getEncoding() == BitCodeAbbrevOp::Blob)
237      report_fatal_error("Abbreviation starts with an Array or a Blob");
238    if (Expected<uint64_t> MaybeCode = readAbbreviatedField(*this, CodeOp))
239      Code = MaybeCode.get();
240    else
241      return MaybeCode.takeError();
242  }
243
244  for (unsigned i = 1, e = Abbv->getNumOperandInfos(); i != e; ++i) {
245    const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i);
246    if (Op.isLiteral()) {
247      Vals.push_back(Op.getLiteralValue());
248      continue;
249    }
250
251    if (Op.getEncoding() != BitCodeAbbrevOp::Array &&
252        Op.getEncoding() != BitCodeAbbrevOp::Blob) {
253      if (Expected<uint64_t> MaybeVal = readAbbreviatedField(*this, Op))
254        Vals.push_back(MaybeVal.get());
255      else
256        return MaybeVal.takeError();
257      continue;
258    }
259
260    if (Op.getEncoding() == BitCodeAbbrevOp::Array) {
261      // Array case.  Read the number of elements as a vbr6.
262      Expected<uint32_t> MaybeNumElts = ReadVBR(6);
263      if (!MaybeNumElts)
264        return MaybeNumElts.takeError();
265      uint32_t NumElts = MaybeNumElts.get();
266
267      // Get the element encoding.
268      if (i + 2 != e)
269        report_fatal_error("Array op not second to last");
270      const BitCodeAbbrevOp &EltEnc = Abbv->getOperandInfo(++i);
271      if (!EltEnc.isEncoding())
272        report_fatal_error(
273            "Array element type has to be an encoding of a type");
274
275      // Read all the elements.
276      switch (EltEnc.getEncoding()) {
277      default:
278        report_fatal_error("Array element type can't be an Array or a Blob");
279      case BitCodeAbbrevOp::Fixed:
280        for (; NumElts; --NumElts)
281          if (Expected<SimpleBitstreamCursor::word_t> MaybeVal =
282                  Read((unsigned)EltEnc.getEncodingData()))
283            Vals.push_back(MaybeVal.get());
284          else
285            return MaybeVal.takeError();
286        break;
287      case BitCodeAbbrevOp::VBR:
288        for (; NumElts; --NumElts)
289          if (Expected<uint64_t> MaybeVal =
290                  ReadVBR64((unsigned)EltEnc.getEncodingData()))
291            Vals.push_back(MaybeVal.get());
292          else
293            return MaybeVal.takeError();
294        break;
295      case BitCodeAbbrevOp::Char6:
296        for (; NumElts; --NumElts)
297          if (Expected<SimpleBitstreamCursor::word_t> MaybeVal = Read(6))
298            Vals.push_back(BitCodeAbbrevOp::DecodeChar6(MaybeVal.get()));
299          else
300            return MaybeVal.takeError();
301      }
302      continue;
303    }
304
305    assert(Op.getEncoding() == BitCodeAbbrevOp::Blob);
306    // Blob case.  Read the number of bytes as a vbr6.
307    Expected<uint32_t> MaybeNumElts = ReadVBR(6);
308    if (!MaybeNumElts)
309      return MaybeNumElts.takeError();
310    uint32_t NumElts = MaybeNumElts.get();
311    SkipToFourByteBoundary();  // 32-bit alignment
312
313    // Figure out where the end of this blob will be including tail padding.
314    size_t CurBitPos = GetCurrentBitNo();
315    size_t NewEnd = CurBitPos+((NumElts+3)&~3)*8;
316
317    // If this would read off the end of the bitcode file, just set the
318    // record to empty and return.
319    if (!canSkipToPos(NewEnd/8)) {
320      Vals.append(NumElts, 0);
321      skipToEnd();
322      break;
323    }
324
325    // Otherwise, inform the streamer that we need these bytes in memory.  Skip
326    // over tail padding first, in case jumping to NewEnd invalidates the Blob
327    // pointer.
328    if (Error Err = JumpToBit(NewEnd))
329      return std::move(Err);
330    const char *Ptr = (const char *)getPointerToBit(CurBitPos, NumElts);
331
332    // If we can return a reference to the data, do so to avoid copying it.
333    if (Blob) {
334      *Blob = StringRef(Ptr, NumElts);
335    } else {
336      // Otherwise, unpack into Vals with zero extension.
337      for (; NumElts; --NumElts)
338        Vals.push_back((unsigned char)*Ptr++);
339    }
340  }
341
342  return Code;
343}
344
345Error BitstreamCursor::ReadAbbrevRecord() {
346  auto Abbv = std::make_shared<BitCodeAbbrev>();
347  Expected<uint32_t> MaybeNumOpInfo = ReadVBR(5);
348  if (!MaybeNumOpInfo)
349    return MaybeNumOpInfo.takeError();
350  unsigned NumOpInfo = MaybeNumOpInfo.get();
351  for (unsigned i = 0; i != NumOpInfo; ++i) {
352    Expected<word_t> MaybeIsLiteral = Read(1);
353    if (!MaybeIsLiteral)
354      return MaybeIsLiteral.takeError();
355    bool IsLiteral = MaybeIsLiteral.get();
356    if (IsLiteral) {
357      Expected<uint64_t> MaybeOp = ReadVBR64(8);
358      if (!MaybeOp)
359        return MaybeOp.takeError();
360      Abbv->Add(BitCodeAbbrevOp(MaybeOp.get()));
361      continue;
362    }
363
364    Expected<word_t> MaybeEncoding = Read(3);
365    if (!MaybeEncoding)
366      return MaybeEncoding.takeError();
367    BitCodeAbbrevOp::Encoding E =
368        (BitCodeAbbrevOp::Encoding)MaybeEncoding.get();
369    if (BitCodeAbbrevOp::hasEncodingData(E)) {
370      Expected<uint64_t> MaybeData = ReadVBR64(5);
371      if (!MaybeData)
372        return MaybeData.takeError();
373      uint64_t Data = MaybeData.get();
374
375      // As a special case, handle fixed(0) (i.e., a fixed field with zero bits)
376      // and vbr(0) as a literal zero.  This is decoded the same way, and avoids
377      // a slow path in Read() to have to handle reading zero bits.
378      if ((E == BitCodeAbbrevOp::Fixed || E == BitCodeAbbrevOp::VBR) &&
379          Data == 0) {
380        Abbv->Add(BitCodeAbbrevOp(0));
381        continue;
382      }
383
384      if ((E == BitCodeAbbrevOp::Fixed || E == BitCodeAbbrevOp::VBR) &&
385          Data > MaxChunkSize)
386        report_fatal_error(
387            "Fixed or VBR abbrev record with size > MaxChunkData");
388
389      Abbv->Add(BitCodeAbbrevOp(E, Data));
390    } else
391      Abbv->Add(BitCodeAbbrevOp(E));
392  }
393
394  if (Abbv->getNumOperandInfos() == 0)
395    report_fatal_error("Abbrev record with no operands");
396  CurAbbrevs.push_back(std::move(Abbv));
397
398  return Error::success();
399}
400
401Expected<Optional<BitstreamBlockInfo>>
402BitstreamCursor::ReadBlockInfoBlock(bool ReadBlockInfoNames) {
403  if (llvm::Error Err = EnterSubBlock(bitc::BLOCKINFO_BLOCK_ID))
404    return std::move(Err);
405
406  BitstreamBlockInfo NewBlockInfo;
407
408  SmallVector<uint64_t, 64> Record;
409  BitstreamBlockInfo::BlockInfo *CurBlockInfo = nullptr;
410
411  // Read all the records for this module.
412  while (true) {
413    Expected<BitstreamEntry> MaybeEntry =
414        advanceSkippingSubblocks(AF_DontAutoprocessAbbrevs);
415    if (!MaybeEntry)
416      return MaybeEntry.takeError();
417    BitstreamEntry Entry = MaybeEntry.get();
418
419    switch (Entry.Kind) {
420    case llvm::BitstreamEntry::SubBlock: // Handled for us already.
421    case llvm::BitstreamEntry::Error:
422      return None;
423    case llvm::BitstreamEntry::EndBlock:
424      return std::move(NewBlockInfo);
425    case llvm::BitstreamEntry::Record:
426      // The interesting case.
427      break;
428    }
429
430    // Read abbrev records, associate them with CurBID.
431    if (Entry.ID == bitc::DEFINE_ABBREV) {
432      if (!CurBlockInfo) return None;
433      if (Error Err = ReadAbbrevRecord())
434        return std::move(Err);
435
436      // ReadAbbrevRecord installs the abbrev in CurAbbrevs.  Move it to the
437      // appropriate BlockInfo.
438      CurBlockInfo->Abbrevs.push_back(std::move(CurAbbrevs.back()));
439      CurAbbrevs.pop_back();
440      continue;
441    }
442
443    // Read a record.
444    Record.clear();
445    Expected<unsigned> MaybeBlockInfo = readRecord(Entry.ID, Record);
446    if (!MaybeBlockInfo)
447      return MaybeBlockInfo.takeError();
448    switch (MaybeBlockInfo.get()) {
449    default:
450      break; // Default behavior, ignore unknown content.
451    case bitc::BLOCKINFO_CODE_SETBID:
452      if (Record.size() < 1)
453        return None;
454      CurBlockInfo = &NewBlockInfo.getOrCreateBlockInfo((unsigned)Record[0]);
455      break;
456    case bitc::BLOCKINFO_CODE_BLOCKNAME: {
457      if (!CurBlockInfo)
458        return None;
459      if (!ReadBlockInfoNames)
460        break; // Ignore name.
461      std::string Name;
462      for (unsigned i = 0, e = Record.size(); i != e; ++i)
463        Name += (char)Record[i];
464      CurBlockInfo->Name = Name;
465      break;
466    }
467      case bitc::BLOCKINFO_CODE_SETRECORDNAME: {
468        if (!CurBlockInfo) return None;
469        if (!ReadBlockInfoNames)
470          break; // Ignore name.
471        std::string Name;
472        for (unsigned i = 1, e = Record.size(); i != e; ++i)
473          Name += (char)Record[i];
474        CurBlockInfo->RecordNames.push_back(std::make_pair((unsigned)Record[0],
475                                                           Name));
476        break;
477      }
478      }
479  }
480}
481