1//===--- COFFModuleDefinition.cpp - Simple DEF parser ---------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Windows-specific.
10// A parser for the module-definition file (.def file).
11//
12// The format of module-definition files are described in this document:
13// https://msdn.microsoft.com/en-us/library/28d6s79h.aspx
14//
15//===----------------------------------------------------------------------===//
16
17#include "llvm/Object/COFFModuleDefinition.h"
18#include "llvm/ADT/StringRef.h"
19#include "llvm/ADT/StringSwitch.h"
20#include "llvm/Object/COFFImportFile.h"
21#include "llvm/Object/Error.h"
22#include "llvm/Support/Error.h"
23#include "llvm/Support/Path.h"
24
25using namespace llvm::COFF;
26using namespace llvm;
27
28namespace llvm {
29namespace object {
30
31enum Kind {
32  Unknown,
33  Eof,
34  Identifier,
35  Comma,
36  Equal,
37  EqualEqual,
38  KwBase,
39  KwConstant,
40  KwData,
41  KwExports,
42  KwHeapsize,
43  KwLibrary,
44  KwName,
45  KwNoname,
46  KwPrivate,
47  KwStacksize,
48  KwVersion,
49};
50
51struct Token {
52  explicit Token(Kind T = Unknown, StringRef S = "") : K(T), Value(S) {}
53  Kind K;
54  StringRef Value;
55};
56
57static bool isDecorated(StringRef Sym, bool MingwDef) {
58  // In def files, the symbols can either be listed decorated or undecorated.
59  //
60  // - For cdecl symbols, only the undecorated form is allowed.
61  // - For fastcall and vectorcall symbols, both fully decorated or
62  //   undecorated forms can be present.
63  // - For stdcall symbols in non-MinGW environments, the decorated form is
64  //   fully decorated with leading underscore and trailing stack argument
65  //   size - like "_Func@0".
66  // - In MinGW def files, a decorated stdcall symbol does not include the
67  //   leading underscore though, like "Func@0".
68
69  // This function controls whether a leading underscore should be added to
70  // the given symbol name or not. For MinGW, treat a stdcall symbol name such
71  // as "Func@0" as undecorated, i.e. a leading underscore must be added.
72  // For non-MinGW, look for '@' in the whole string and consider "_Func@0"
73  // as decorated, i.e. don't add any more leading underscores.
74  // We can't check for a leading underscore here, since function names
75  // themselves can start with an underscore, while a second one still needs
76  // to be added.
77  return Sym.starts_with("@") || Sym.contains("@@") || Sym.starts_with("?") ||
78         (!MingwDef && Sym.contains('@'));
79}
80
81class Lexer {
82public:
83  Lexer(StringRef S) : Buf(S) {}
84
85  Token lex() {
86    Buf = Buf.trim();
87    if (Buf.empty())
88      return Token(Eof);
89
90    switch (Buf[0]) {
91    case '\0':
92      return Token(Eof);
93    case ';': {
94      size_t End = Buf.find('\n');
95      Buf = (End == Buf.npos) ? "" : Buf.drop_front(End);
96      return lex();
97    }
98    case '=':
99      Buf = Buf.drop_front();
100      if (Buf.starts_with("=")) {
101        Buf = Buf.drop_front();
102        return Token(EqualEqual, "==");
103      }
104      return Token(Equal, "=");
105    case ',':
106      Buf = Buf.drop_front();
107      return Token(Comma, ",");
108    case '"': {
109      StringRef S;
110      std::tie(S, Buf) = Buf.substr(1).split('"');
111      return Token(Identifier, S);
112    }
113    default: {
114      size_t End = Buf.find_first_of("=,;\r\n \t\v");
115      StringRef Word = Buf.substr(0, End);
116      Kind K = llvm::StringSwitch<Kind>(Word)
117                   .Case("BASE", KwBase)
118                   .Case("CONSTANT", KwConstant)
119                   .Case("DATA", KwData)
120                   .Case("EXPORTS", KwExports)
121                   .Case("HEAPSIZE", KwHeapsize)
122                   .Case("LIBRARY", KwLibrary)
123                   .Case("NAME", KwName)
124                   .Case("NONAME", KwNoname)
125                   .Case("PRIVATE", KwPrivate)
126                   .Case("STACKSIZE", KwStacksize)
127                   .Case("VERSION", KwVersion)
128                   .Default(Identifier);
129      Buf = (End == Buf.npos) ? "" : Buf.drop_front(End);
130      return Token(K, Word);
131    }
132    }
133  }
134
135private:
136  StringRef Buf;
137};
138
139class Parser {
140public:
141  explicit Parser(StringRef S, MachineTypes M, bool B, bool AU)
142      : Lex(S), Machine(M), MingwDef(B), AddUnderscores(AU) {
143    if (Machine != IMAGE_FILE_MACHINE_I386)
144      AddUnderscores = false;
145  }
146
147  Expected<COFFModuleDefinition> parse() {
148    do {
149      if (Error Err = parseOne())
150        return std::move(Err);
151    } while (Tok.K != Eof);
152    return Info;
153  }
154
155private:
156  void read() {
157    if (Stack.empty()) {
158      Tok = Lex.lex();
159      return;
160    }
161    Tok = Stack.back();
162    Stack.pop_back();
163  }
164
165  Error readAsInt(uint64_t *I) {
166    read();
167    if (Tok.K != Identifier || Tok.Value.getAsInteger(10, *I))
168      return createError("integer expected");
169    return Error::success();
170  }
171
172  Error expect(Kind Expected, StringRef Msg) {
173    read();
174    if (Tok.K != Expected)
175      return createError(Msg);
176    return Error::success();
177  }
178
179  void unget() { Stack.push_back(Tok); }
180
181  Error parseOne() {
182    read();
183    switch (Tok.K) {
184    case Eof:
185      return Error::success();
186    case KwExports:
187      for (;;) {
188        read();
189        if (Tok.K != Identifier) {
190          unget();
191          return Error::success();
192        }
193        if (Error Err = parseExport())
194          return Err;
195      }
196    case KwHeapsize:
197      return parseNumbers(&Info.HeapReserve, &Info.HeapCommit);
198    case KwStacksize:
199      return parseNumbers(&Info.StackReserve, &Info.StackCommit);
200    case KwLibrary:
201    case KwName: {
202      bool IsDll = Tok.K == KwLibrary; // Check before parseName.
203      std::string Name;
204      if (Error Err = parseName(&Name, &Info.ImageBase))
205        return Err;
206
207      Info.ImportName = Name;
208
209      // Set the output file, but don't override /out if it was already passed.
210      if (Info.OutputFile.empty()) {
211        Info.OutputFile = Name;
212        // Append the appropriate file extension if not already present.
213        if (!sys::path::has_extension(Name))
214          Info.OutputFile += IsDll ? ".dll" : ".exe";
215      }
216
217      return Error::success();
218    }
219    case KwVersion:
220      return parseVersion(&Info.MajorImageVersion, &Info.MinorImageVersion);
221    default:
222      return createError("unknown directive: " + Tok.Value);
223    }
224  }
225
226  Error parseExport() {
227    COFFShortExport E;
228    E.Name = std::string(Tok.Value);
229    read();
230    if (Tok.K == Equal) {
231      read();
232      if (Tok.K != Identifier)
233        return createError("identifier expected, but got " + Tok.Value);
234      E.ExtName = E.Name;
235      E.Name = std::string(Tok.Value);
236    } else {
237      unget();
238    }
239
240    if (AddUnderscores) {
241      if (!isDecorated(E.Name, MingwDef))
242        E.Name = (std::string("_").append(E.Name));
243      if (!E.ExtName.empty() && !isDecorated(E.ExtName, MingwDef))
244        E.ExtName = (std::string("_").append(E.ExtName));
245    }
246
247    for (;;) {
248      read();
249      if (Tok.K == Identifier && Tok.Value[0] == '@') {
250        if (Tok.Value == "@") {
251          // "foo @ 10"
252          read();
253          Tok.Value.getAsInteger(10, E.Ordinal);
254        } else if (Tok.Value.drop_front().getAsInteger(10, E.Ordinal)) {
255          // "foo \n @bar" - Not an ordinal modifier at all, but the next
256          // export (fastcall decorated) - complete the current one.
257          unget();
258          Info.Exports.push_back(E);
259          return Error::success();
260        }
261        // "foo @10"
262        read();
263        if (Tok.K == KwNoname) {
264          E.Noname = true;
265        } else {
266          unget();
267        }
268        continue;
269      }
270      if (Tok.K == KwData) {
271        E.Data = true;
272        continue;
273      }
274      if (Tok.K == KwConstant) {
275        E.Constant = true;
276        continue;
277      }
278      if (Tok.K == KwPrivate) {
279        E.Private = true;
280        continue;
281      }
282      if (Tok.K == EqualEqual) {
283        read();
284        E.AliasTarget = std::string(Tok.Value);
285        if (AddUnderscores && !isDecorated(E.AliasTarget, MingwDef))
286          E.AliasTarget = std::string("_").append(E.AliasTarget);
287        continue;
288      }
289      unget();
290      Info.Exports.push_back(E);
291      return Error::success();
292    }
293  }
294
295  // HEAPSIZE/STACKSIZE reserve[,commit]
296  Error parseNumbers(uint64_t *Reserve, uint64_t *Commit) {
297    if (Error Err = readAsInt(Reserve))
298      return Err;
299    read();
300    if (Tok.K != Comma) {
301      unget();
302      Commit = nullptr;
303      return Error::success();
304    }
305    if (Error Err = readAsInt(Commit))
306      return Err;
307    return Error::success();
308  }
309
310  // NAME outputPath [BASE=address]
311  Error parseName(std::string *Out, uint64_t *Baseaddr) {
312    read();
313    if (Tok.K == Identifier) {
314      *Out = std::string(Tok.Value);
315    } else {
316      *Out = "";
317      unget();
318      return Error::success();
319    }
320    read();
321    if (Tok.K == KwBase) {
322      if (Error Err = expect(Equal, "'=' expected"))
323        return Err;
324      if (Error Err = readAsInt(Baseaddr))
325        return Err;
326    } else {
327      unget();
328      *Baseaddr = 0;
329    }
330    return Error::success();
331  }
332
333  // VERSION major[.minor]
334  Error parseVersion(uint32_t *Major, uint32_t *Minor) {
335    read();
336    if (Tok.K != Identifier)
337      return createError("identifier expected, but got " + Tok.Value);
338    StringRef V1, V2;
339    std::tie(V1, V2) = Tok.Value.split('.');
340    if (V1.getAsInteger(10, *Major))
341      return createError("integer expected, but got " + Tok.Value);
342    if (V2.empty())
343      *Minor = 0;
344    else if (V2.getAsInteger(10, *Minor))
345      return createError("integer expected, but got " + Tok.Value);
346    return Error::success();
347  }
348
349  Lexer Lex;
350  Token Tok;
351  std::vector<Token> Stack;
352  MachineTypes Machine;
353  COFFModuleDefinition Info;
354  bool MingwDef;
355  bool AddUnderscores;
356};
357
358Expected<COFFModuleDefinition> parseCOFFModuleDefinition(MemoryBufferRef MB,
359                                                         MachineTypes Machine,
360                                                         bool MingwDef,
361                                                         bool AddUnderscores) {
362  return Parser(MB.getBuffer(), Machine, MingwDef, AddUnderscores).parse();
363}
364
365} // namespace object
366} // namespace llvm
367