FormatString.h revision 263508
1//= FormatString.h - Analysis of printf/fprintf format strings --*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines APIs for analyzing the format strings of printf, fscanf,
11// and friends.
12//
13// The structure of format strings for fprintf are described in C99 7.19.6.1.
14//
15// The structure of format strings for fscanf are described in C99 7.19.6.2.
16//
17//===----------------------------------------------------------------------===//
18
19#ifndef LLVM_CLANG_FORMAT_H
20#define LLVM_CLANG_FORMAT_H
21
22#include "clang/AST/CanonicalType.h"
23
24namespace clang {
25
26class TargetInfo;
27
28//===----------------------------------------------------------------------===//
29/// Common components of both fprintf and fscanf format strings.
30namespace analyze_format_string {
31
32/// Class representing optional flags with location and representation
33/// information.
34class OptionalFlag {
35public:
36  OptionalFlag(const char *Representation)
37      : representation(Representation), flag(false) {}
38  bool isSet() { return flag; }
39  void set() { flag = true; }
40  void clear() { flag = false; }
41  void setPosition(const char *position) {
42    assert(position);
43    this->position = position;
44  }
45  const char *getPosition() const {
46    assert(position);
47    return position;
48  }
49  const char *toString() const { return representation; }
50
51  // Overloaded operators for bool like qualities
52  LLVM_EXPLICIT operator bool() const { return flag; }
53  OptionalFlag& operator=(const bool &rhs) {
54    flag = rhs;
55    return *this;  // Return a reference to myself.
56  }
57private:
58  const char *representation;
59  const char *position;
60  bool flag;
61};
62
63/// Represents the length modifier in a format string in scanf/printf.
64class LengthModifier {
65public:
66  enum Kind {
67    None,
68    AsChar,       // 'hh'
69    AsShort,      // 'h'
70    AsLong,       // 'l'
71    AsLongLong,   // 'll'
72    AsQuad,       // 'q' (BSD, deprecated, for 64-bit integer types)
73    AsIntMax,     // 'j'
74    AsSizeT,      // 'z'
75    AsPtrDiff,    // 't'
76    AsInt32,      // 'I32' (MSVCRT, like __int32)
77    AsInt3264,    // 'I'   (MSVCRT, like __int3264 from MIDL)
78    AsInt64,      // 'I64' (MSVCRT, like __int64)
79    AsLongDouble, // 'L'
80    AsAllocate,   // for '%as', GNU extension to C90 scanf
81    AsMAllocate,  // for '%ms', GNU extension to scanf
82    AsWideChar = AsLong // for '%ls', only makes sense for printf
83  };
84
85  LengthModifier()
86    : Position(0), kind(None) {}
87  LengthModifier(const char *pos, Kind k)
88    : Position(pos), kind(k) {}
89
90  const char *getStart() const {
91    return Position;
92  }
93
94  unsigned getLength() const {
95    switch (kind) {
96      default:
97        return 1;
98      case AsLongLong:
99      case AsChar:
100        return 2;
101      case AsInt32:
102      case AsInt64:
103        return 3;
104      case None:
105        return 0;
106    }
107  }
108
109  Kind getKind() const { return kind; }
110  void setKind(Kind k) { kind = k; }
111
112  const char *toString() const;
113
114private:
115  const char *Position;
116  Kind kind;
117};
118
119class ConversionSpecifier {
120public:
121  enum Kind {
122    InvalidSpecifier = 0,
123      // C99 conversion specifiers.
124    cArg,
125    dArg,
126    DArg, // Apple extension
127    iArg,
128    IntArgBeg = dArg, IntArgEnd = iArg,
129
130    oArg,
131    OArg, // Apple extension
132    uArg,
133    UArg, // Apple extension
134    xArg,
135    XArg,
136    UIntArgBeg = oArg, UIntArgEnd = XArg,
137
138    fArg,
139    FArg,
140    eArg,
141    EArg,
142    gArg,
143    GArg,
144    aArg,
145    AArg,
146    DoubleArgBeg = fArg, DoubleArgEnd = AArg,
147
148    sArg,
149    pArg,
150    nArg,
151    PercentArg,
152    CArg,
153    SArg,
154
155    // ** Printf-specific **
156
157    // Objective-C specific specifiers.
158    ObjCObjArg,  // '@'
159    ObjCBeg = ObjCObjArg, ObjCEnd = ObjCObjArg,
160
161    // FreeBSD specific specifiers
162    FreeBSDbArg,
163    FreeBSDDArg,
164    FreeBSDrArg,
165
166    // GlibC specific specifiers.
167    PrintErrno,   // 'm'
168
169    PrintfConvBeg = ObjCObjArg, PrintfConvEnd = PrintErrno,
170
171    // ** Scanf-specific **
172    ScanListArg, // '['
173    ScanfConvBeg = ScanListArg, ScanfConvEnd = ScanListArg
174  };
175
176  ConversionSpecifier(bool isPrintf = true)
177    : IsPrintf(isPrintf), Position(0), EndScanList(0), kind(InvalidSpecifier) {}
178
179  ConversionSpecifier(bool isPrintf, const char *pos, Kind k)
180    : IsPrintf(isPrintf), Position(pos), EndScanList(0), kind(k) {}
181
182  const char *getStart() const {
183    return Position;
184  }
185
186  StringRef getCharacters() const {
187    return StringRef(getStart(), getLength());
188  }
189
190  bool consumesDataArgument() const {
191    switch (kind) {
192      case PrintErrno:
193        assert(IsPrintf);
194        return false;
195      case PercentArg:
196        return false;
197      default:
198        return true;
199    }
200  }
201
202  Kind getKind() const { return kind; }
203  void setKind(Kind k) { kind = k; }
204  unsigned getLength() const {
205    return EndScanList ? EndScanList - Position : 1;
206  }
207
208  bool isIntArg() const { return kind >= IntArgBeg && kind <= IntArgEnd; }
209  bool isUIntArg() const { return kind >= UIntArgBeg && kind <= UIntArgEnd; }
210  bool isAnyIntArg() const { return kind >= IntArgBeg && kind <= UIntArgEnd; }
211  const char *toString() const;
212
213  bool isPrintfKind() const { return IsPrintf; }
214
215  Optional<ConversionSpecifier> getStandardSpecifier() const;
216
217protected:
218  bool IsPrintf;
219  const char *Position;
220  const char *EndScanList;
221  Kind kind;
222};
223
224class ArgType {
225public:
226  enum Kind { UnknownTy, InvalidTy, SpecificTy, ObjCPointerTy, CPointerTy,
227              AnyCharTy, CStrTy, WCStrTy, WIntTy };
228private:
229  const Kind K;
230  QualType T;
231  const char *Name;
232  bool Ptr;
233public:
234  ArgType(Kind k = UnknownTy, const char *n = 0) : K(k), Name(n), Ptr(false) {}
235  ArgType(QualType t, const char *n = 0)
236      : K(SpecificTy), T(t), Name(n), Ptr(false) {}
237  ArgType(CanQualType t) : K(SpecificTy), T(t), Name(0), Ptr(false) {}
238
239  static ArgType Invalid() { return ArgType(InvalidTy); }
240  bool isValid() const { return K != InvalidTy; }
241
242  /// Create an ArgType which corresponds to the type pointer to A.
243  static ArgType PtrTo(const ArgType& A) {
244    assert(A.K >= InvalidTy && "ArgType cannot be pointer to invalid/unknown");
245    ArgType Res = A;
246    Res.Ptr = true;
247    return Res;
248  }
249
250  bool matchesType(ASTContext &C, QualType argTy) const;
251
252  QualType getRepresentativeType(ASTContext &C) const;
253
254  std::string getRepresentativeTypeName(ASTContext &C) const;
255};
256
257class OptionalAmount {
258public:
259  enum HowSpecified { NotSpecified, Constant, Arg, Invalid };
260
261  OptionalAmount(HowSpecified howSpecified,
262                 unsigned amount,
263                 const char *amountStart,
264                 unsigned amountLength,
265                 bool usesPositionalArg)
266  : start(amountStart), length(amountLength), hs(howSpecified), amt(amount),
267  UsesPositionalArg(usesPositionalArg), UsesDotPrefix(0) {}
268
269  OptionalAmount(bool valid = true)
270  : start(0),length(0), hs(valid ? NotSpecified : Invalid), amt(0),
271  UsesPositionalArg(0), UsesDotPrefix(0) {}
272
273  bool isInvalid() const {
274    return hs == Invalid;
275  }
276
277  HowSpecified getHowSpecified() const { return hs; }
278  void setHowSpecified(HowSpecified h) { hs = h; }
279
280  bool hasDataArgument() const { return hs == Arg; }
281
282  unsigned getArgIndex() const {
283    assert(hasDataArgument());
284    return amt;
285  }
286
287  unsigned getConstantAmount() const {
288    assert(hs == Constant);
289    return amt;
290  }
291
292  const char *getStart() const {
293      // We include the . character if it is given.
294    return start - UsesDotPrefix;
295  }
296
297  unsigned getConstantLength() const {
298    assert(hs == Constant);
299    return length + UsesDotPrefix;
300  }
301
302  ArgType getArgType(ASTContext &Ctx) const;
303
304  void toString(raw_ostream &os) const;
305
306  bool usesPositionalArg() const { return (bool) UsesPositionalArg; }
307  unsigned getPositionalArgIndex() const {
308    assert(hasDataArgument());
309    return amt + 1;
310  }
311
312  bool usesDotPrefix() const { return UsesDotPrefix; }
313  void setUsesDotPrefix() { UsesDotPrefix = true; }
314
315private:
316  const char *start;
317  unsigned length;
318  HowSpecified hs;
319  unsigned amt;
320  bool UsesPositionalArg : 1;
321  bool UsesDotPrefix;
322};
323
324
325class FormatSpecifier {
326protected:
327  LengthModifier LM;
328  OptionalAmount FieldWidth;
329  ConversionSpecifier CS;
330  /// Positional arguments, an IEEE extension:
331  ///  IEEE Std 1003.1, 2004 Edition
332  ///  http://www.opengroup.org/onlinepubs/009695399/functions/printf.html
333  bool UsesPositionalArg;
334  unsigned argIndex;
335public:
336  FormatSpecifier(bool isPrintf)
337    : CS(isPrintf), UsesPositionalArg(false), argIndex(0) {}
338
339  void setLengthModifier(LengthModifier lm) {
340    LM = lm;
341  }
342
343  void setUsesPositionalArg() { UsesPositionalArg = true; }
344
345  void setArgIndex(unsigned i) {
346    argIndex = i;
347  }
348
349  unsigned getArgIndex() const {
350    return argIndex;
351  }
352
353  unsigned getPositionalArgIndex() const {
354    return argIndex + 1;
355  }
356
357  const LengthModifier &getLengthModifier() const {
358    return LM;
359  }
360
361  const OptionalAmount &getFieldWidth() const {
362    return FieldWidth;
363  }
364
365  void setFieldWidth(const OptionalAmount &Amt) {
366    FieldWidth = Amt;
367  }
368
369  bool usesPositionalArg() const { return UsesPositionalArg; }
370
371  bool hasValidLengthModifier(const TargetInfo &Target) const;
372
373  bool hasStandardLengthModifier() const;
374
375  Optional<LengthModifier> getCorrectedLengthModifier() const;
376
377  bool hasStandardConversionSpecifier(const LangOptions &LangOpt) const;
378
379  bool hasStandardLengthConversionCombination() const;
380
381  /// For a TypedefType QT, if it is a named integer type such as size_t,
382  /// assign the appropriate value to LM and return true.
383  static bool namedTypeToLengthModifier(QualType QT, LengthModifier &LM);
384};
385
386} // end analyze_format_string namespace
387
388//===----------------------------------------------------------------------===//
389/// Pieces specific to fprintf format strings.
390
391namespace analyze_printf {
392
393class PrintfConversionSpecifier :
394  public analyze_format_string::ConversionSpecifier  {
395public:
396  PrintfConversionSpecifier()
397    : ConversionSpecifier(true, 0, InvalidSpecifier) {}
398
399  PrintfConversionSpecifier(const char *pos, Kind k)
400    : ConversionSpecifier(true, pos, k) {}
401
402  bool isObjCArg() const { return kind >= ObjCBeg && kind <= ObjCEnd; }
403  bool isDoubleArg() const { return kind >= DoubleArgBeg &&
404                                    kind <= DoubleArgEnd; }
405  unsigned getLength() const {
406      // Conversion specifiers currently only are represented by
407      // single characters, but we be flexible.
408    return 1;
409  }
410
411  static bool classof(const analyze_format_string::ConversionSpecifier *CS) {
412    return CS->isPrintfKind();
413  }
414};
415
416using analyze_format_string::ArgType;
417using analyze_format_string::LengthModifier;
418using analyze_format_string::OptionalAmount;
419using analyze_format_string::OptionalFlag;
420
421class PrintfSpecifier : public analyze_format_string::FormatSpecifier {
422  OptionalFlag HasThousandsGrouping; // ''', POSIX extension.
423  OptionalFlag IsLeftJustified; // '-'
424  OptionalFlag HasPlusPrefix; // '+'
425  OptionalFlag HasSpacePrefix; // ' '
426  OptionalFlag HasAlternativeForm; // '#'
427  OptionalFlag HasLeadingZeroes; // '0'
428  OptionalAmount Precision;
429public:
430  PrintfSpecifier() :
431    FormatSpecifier(/* isPrintf = */ true),
432    HasThousandsGrouping("'"), IsLeftJustified("-"), HasPlusPrefix("+"),
433    HasSpacePrefix(" "), HasAlternativeForm("#"), HasLeadingZeroes("0") {}
434
435  static PrintfSpecifier Parse(const char *beg, const char *end);
436
437    // Methods for incrementally constructing the PrintfSpecifier.
438  void setConversionSpecifier(const PrintfConversionSpecifier &cs) {
439    CS = cs;
440  }
441  void setHasThousandsGrouping(const char *position) {
442    HasThousandsGrouping = true;
443    HasThousandsGrouping.setPosition(position);
444  }
445  void setIsLeftJustified(const char *position) {
446    IsLeftJustified = true;
447    IsLeftJustified.setPosition(position);
448  }
449  void setHasPlusPrefix(const char *position) {
450    HasPlusPrefix = true;
451    HasPlusPrefix.setPosition(position);
452  }
453  void setHasSpacePrefix(const char *position) {
454    HasSpacePrefix = true;
455    HasSpacePrefix.setPosition(position);
456  }
457  void setHasAlternativeForm(const char *position) {
458    HasAlternativeForm = true;
459    HasAlternativeForm.setPosition(position);
460  }
461  void setHasLeadingZeros(const char *position) {
462    HasLeadingZeroes = true;
463    HasLeadingZeroes.setPosition(position);
464  }
465  void setUsesPositionalArg() { UsesPositionalArg = true; }
466
467    // Methods for querying the format specifier.
468
469  const PrintfConversionSpecifier &getConversionSpecifier() const {
470    return cast<PrintfConversionSpecifier>(CS);
471  }
472
473  void setPrecision(const OptionalAmount &Amt) {
474    Precision = Amt;
475    Precision.setUsesDotPrefix();
476  }
477
478  const OptionalAmount &getPrecision() const {
479    return Precision;
480  }
481
482  bool consumesDataArgument() const {
483    return getConversionSpecifier().consumesDataArgument();
484  }
485
486  /// \brief Returns the builtin type that a data argument
487  /// paired with this format specifier should have.  This method
488  /// will return null if the format specifier does not have
489  /// a matching data argument or the matching argument matches
490  /// more than one type.
491  ArgType getArgType(ASTContext &Ctx, bool IsObjCLiteral) const;
492
493  const OptionalFlag &hasThousandsGrouping() const {
494      return HasThousandsGrouping;
495  }
496  const OptionalFlag &isLeftJustified() const { return IsLeftJustified; }
497  const OptionalFlag &hasPlusPrefix() const { return HasPlusPrefix; }
498  const OptionalFlag &hasAlternativeForm() const { return HasAlternativeForm; }
499  const OptionalFlag &hasLeadingZeros() const { return HasLeadingZeroes; }
500  const OptionalFlag &hasSpacePrefix() const { return HasSpacePrefix; }
501  bool usesPositionalArg() const { return UsesPositionalArg; }
502
503  /// Changes the specifier and length according to a QualType, retaining any
504  /// flags or options. Returns true on success, or false when a conversion
505  /// was not successful.
506  bool fixType(QualType QT, const LangOptions &LangOpt, ASTContext &Ctx,
507               bool IsObjCLiteral);
508
509  void toString(raw_ostream &os) const;
510
511  // Validation methods - to check if any element results in undefined behavior
512  bool hasValidPlusPrefix() const;
513  bool hasValidAlternativeForm() const;
514  bool hasValidLeadingZeros() const;
515  bool hasValidSpacePrefix() const;
516  bool hasValidLeftJustified() const;
517  bool hasValidThousandsGroupingPrefix() const;
518
519  bool hasValidPrecision() const;
520  bool hasValidFieldWidth() const;
521};
522}  // end analyze_printf namespace
523
524//===----------------------------------------------------------------------===//
525/// Pieces specific to fscanf format strings.
526
527namespace analyze_scanf {
528
529class ScanfConversionSpecifier :
530    public analyze_format_string::ConversionSpecifier  {
531public:
532  ScanfConversionSpecifier()
533    : ConversionSpecifier(false, 0, InvalidSpecifier) {}
534
535  ScanfConversionSpecifier(const char *pos, Kind k)
536    : ConversionSpecifier(false, pos, k) {}
537
538  void setEndScanList(const char *pos) { EndScanList = pos; }
539
540  static bool classof(const analyze_format_string::ConversionSpecifier *CS) {
541    return !CS->isPrintfKind();
542  }
543};
544
545using analyze_format_string::ArgType;
546using analyze_format_string::LengthModifier;
547using analyze_format_string::OptionalAmount;
548using analyze_format_string::OptionalFlag;
549
550class ScanfSpecifier : public analyze_format_string::FormatSpecifier {
551  OptionalFlag SuppressAssignment; // '*'
552public:
553  ScanfSpecifier() :
554    FormatSpecifier(/* isPrintf = */ false),
555    SuppressAssignment("*") {}
556
557  void setSuppressAssignment(const char *position) {
558    SuppressAssignment = true;
559    SuppressAssignment.setPosition(position);
560  }
561
562  const OptionalFlag &getSuppressAssignment() const {
563    return SuppressAssignment;
564  }
565
566  void setConversionSpecifier(const ScanfConversionSpecifier &cs) {
567    CS = cs;
568  }
569
570  const ScanfConversionSpecifier &getConversionSpecifier() const {
571    return cast<ScanfConversionSpecifier>(CS);
572  }
573
574  bool consumesDataArgument() const {
575    return CS.consumesDataArgument() && !SuppressAssignment;
576  }
577
578  ArgType getArgType(ASTContext &Ctx) const;
579
580  bool fixType(QualType QT, const LangOptions &LangOpt, ASTContext &Ctx);
581
582  void toString(raw_ostream &os) const;
583
584  static ScanfSpecifier Parse(const char *beg, const char *end);
585};
586
587} // end analyze_scanf namespace
588
589//===----------------------------------------------------------------------===//
590// Parsing and processing of format strings (both fprintf and fscanf).
591
592namespace analyze_format_string {
593
594enum PositionContext { FieldWidthPos = 0, PrecisionPos = 1 };
595
596class FormatStringHandler {
597public:
598  FormatStringHandler() {}
599  virtual ~FormatStringHandler();
600
601  virtual void HandleNullChar(const char *nullCharacter) {}
602
603  virtual void HandlePosition(const char *startPos, unsigned posLen) {}
604
605  virtual void HandleInvalidPosition(const char *startPos, unsigned posLen,
606                                     PositionContext p) {}
607
608  virtual void HandleZeroPosition(const char *startPos, unsigned posLen) {}
609
610  virtual void HandleIncompleteSpecifier(const char *startSpecifier,
611                                         unsigned specifierLen) {}
612
613  // Printf-specific handlers.
614
615  virtual bool HandleInvalidPrintfConversionSpecifier(
616                                      const analyze_printf::PrintfSpecifier &FS,
617                                      const char *startSpecifier,
618                                      unsigned specifierLen) {
619    return true;
620  }
621
622  virtual bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS,
623                                     const char *startSpecifier,
624                                     unsigned specifierLen) {
625    return true;
626  }
627
628    // Scanf-specific handlers.
629
630  virtual bool HandleInvalidScanfConversionSpecifier(
631                                        const analyze_scanf::ScanfSpecifier &FS,
632                                        const char *startSpecifier,
633                                        unsigned specifierLen) {
634    return true;
635  }
636
637  virtual bool HandleScanfSpecifier(const analyze_scanf::ScanfSpecifier &FS,
638                                    const char *startSpecifier,
639                                    unsigned specifierLen) {
640    return true;
641  }
642
643  virtual void HandleIncompleteScanList(const char *start, const char *end) {}
644};
645
646bool ParsePrintfString(FormatStringHandler &H,
647                       const char *beg, const char *end, const LangOptions &LO,
648                       const TargetInfo &Target);
649
650bool ParseScanfString(FormatStringHandler &H,
651                      const char *beg, const char *end, const LangOptions &LO,
652                      const TargetInfo &Target);
653
654} // end analyze_format_string namespace
655} // end clang namespace
656#endif
657