FormatString.h revision 263508
1//= FormatString.h - Analysis of printf/fprintf format strings --*- C++ -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines APIs for analyzing the format strings of printf, fscanf, 11// and friends. 12// 13// The structure of format strings for fprintf are described in C99 7.19.6.1. 14// 15// The structure of format strings for fscanf are described in C99 7.19.6.2. 16// 17//===----------------------------------------------------------------------===// 18 19#ifndef LLVM_CLANG_FORMAT_H 20#define LLVM_CLANG_FORMAT_H 21 22#include "clang/AST/CanonicalType.h" 23 24namespace clang { 25 26class TargetInfo; 27 28//===----------------------------------------------------------------------===// 29/// Common components of both fprintf and fscanf format strings. 30namespace analyze_format_string { 31 32/// Class representing optional flags with location and representation 33/// information. 34class OptionalFlag { 35public: 36 OptionalFlag(const char *Representation) 37 : representation(Representation), flag(false) {} 38 bool isSet() { return flag; } 39 void set() { flag = true; } 40 void clear() { flag = false; } 41 void setPosition(const char *position) { 42 assert(position); 43 this->position = position; 44 } 45 const char *getPosition() const { 46 assert(position); 47 return position; 48 } 49 const char *toString() const { return representation; } 50 51 // Overloaded operators for bool like qualities 52 LLVM_EXPLICIT operator bool() const { return flag; } 53 OptionalFlag& operator=(const bool &rhs) { 54 flag = rhs; 55 return *this; // Return a reference to myself. 56 } 57private: 58 const char *representation; 59 const char *position; 60 bool flag; 61}; 62 63/// Represents the length modifier in a format string in scanf/printf. 64class LengthModifier { 65public: 66 enum Kind { 67 None, 68 AsChar, // 'hh' 69 AsShort, // 'h' 70 AsLong, // 'l' 71 AsLongLong, // 'll' 72 AsQuad, // 'q' (BSD, deprecated, for 64-bit integer types) 73 AsIntMax, // 'j' 74 AsSizeT, // 'z' 75 AsPtrDiff, // 't' 76 AsInt32, // 'I32' (MSVCRT, like __int32) 77 AsInt3264, // 'I' (MSVCRT, like __int3264 from MIDL) 78 AsInt64, // 'I64' (MSVCRT, like __int64) 79 AsLongDouble, // 'L' 80 AsAllocate, // for '%as', GNU extension to C90 scanf 81 AsMAllocate, // for '%ms', GNU extension to scanf 82 AsWideChar = AsLong // for '%ls', only makes sense for printf 83 }; 84 85 LengthModifier() 86 : Position(0), kind(None) {} 87 LengthModifier(const char *pos, Kind k) 88 : Position(pos), kind(k) {} 89 90 const char *getStart() const { 91 return Position; 92 } 93 94 unsigned getLength() const { 95 switch (kind) { 96 default: 97 return 1; 98 case AsLongLong: 99 case AsChar: 100 return 2; 101 case AsInt32: 102 case AsInt64: 103 return 3; 104 case None: 105 return 0; 106 } 107 } 108 109 Kind getKind() const { return kind; } 110 void setKind(Kind k) { kind = k; } 111 112 const char *toString() const; 113 114private: 115 const char *Position; 116 Kind kind; 117}; 118 119class ConversionSpecifier { 120public: 121 enum Kind { 122 InvalidSpecifier = 0, 123 // C99 conversion specifiers. 124 cArg, 125 dArg, 126 DArg, // Apple extension 127 iArg, 128 IntArgBeg = dArg, IntArgEnd = iArg, 129 130 oArg, 131 OArg, // Apple extension 132 uArg, 133 UArg, // Apple extension 134 xArg, 135 XArg, 136 UIntArgBeg = oArg, UIntArgEnd = XArg, 137 138 fArg, 139 FArg, 140 eArg, 141 EArg, 142 gArg, 143 GArg, 144 aArg, 145 AArg, 146 DoubleArgBeg = fArg, DoubleArgEnd = AArg, 147 148 sArg, 149 pArg, 150 nArg, 151 PercentArg, 152 CArg, 153 SArg, 154 155 // ** Printf-specific ** 156 157 // Objective-C specific specifiers. 158 ObjCObjArg, // '@' 159 ObjCBeg = ObjCObjArg, ObjCEnd = ObjCObjArg, 160 161 // FreeBSD specific specifiers 162 FreeBSDbArg, 163 FreeBSDDArg, 164 FreeBSDrArg, 165 166 // GlibC specific specifiers. 167 PrintErrno, // 'm' 168 169 PrintfConvBeg = ObjCObjArg, PrintfConvEnd = PrintErrno, 170 171 // ** Scanf-specific ** 172 ScanListArg, // '[' 173 ScanfConvBeg = ScanListArg, ScanfConvEnd = ScanListArg 174 }; 175 176 ConversionSpecifier(bool isPrintf = true) 177 : IsPrintf(isPrintf), Position(0), EndScanList(0), kind(InvalidSpecifier) {} 178 179 ConversionSpecifier(bool isPrintf, const char *pos, Kind k) 180 : IsPrintf(isPrintf), Position(pos), EndScanList(0), kind(k) {} 181 182 const char *getStart() const { 183 return Position; 184 } 185 186 StringRef getCharacters() const { 187 return StringRef(getStart(), getLength()); 188 } 189 190 bool consumesDataArgument() const { 191 switch (kind) { 192 case PrintErrno: 193 assert(IsPrintf); 194 return false; 195 case PercentArg: 196 return false; 197 default: 198 return true; 199 } 200 } 201 202 Kind getKind() const { return kind; } 203 void setKind(Kind k) { kind = k; } 204 unsigned getLength() const { 205 return EndScanList ? EndScanList - Position : 1; 206 } 207 208 bool isIntArg() const { return kind >= IntArgBeg && kind <= IntArgEnd; } 209 bool isUIntArg() const { return kind >= UIntArgBeg && kind <= UIntArgEnd; } 210 bool isAnyIntArg() const { return kind >= IntArgBeg && kind <= UIntArgEnd; } 211 const char *toString() const; 212 213 bool isPrintfKind() const { return IsPrintf; } 214 215 Optional<ConversionSpecifier> getStandardSpecifier() const; 216 217protected: 218 bool IsPrintf; 219 const char *Position; 220 const char *EndScanList; 221 Kind kind; 222}; 223 224class ArgType { 225public: 226 enum Kind { UnknownTy, InvalidTy, SpecificTy, ObjCPointerTy, CPointerTy, 227 AnyCharTy, CStrTy, WCStrTy, WIntTy }; 228private: 229 const Kind K; 230 QualType T; 231 const char *Name; 232 bool Ptr; 233public: 234 ArgType(Kind k = UnknownTy, const char *n = 0) : K(k), Name(n), Ptr(false) {} 235 ArgType(QualType t, const char *n = 0) 236 : K(SpecificTy), T(t), Name(n), Ptr(false) {} 237 ArgType(CanQualType t) : K(SpecificTy), T(t), Name(0), Ptr(false) {} 238 239 static ArgType Invalid() { return ArgType(InvalidTy); } 240 bool isValid() const { return K != InvalidTy; } 241 242 /// Create an ArgType which corresponds to the type pointer to A. 243 static ArgType PtrTo(const ArgType& A) { 244 assert(A.K >= InvalidTy && "ArgType cannot be pointer to invalid/unknown"); 245 ArgType Res = A; 246 Res.Ptr = true; 247 return Res; 248 } 249 250 bool matchesType(ASTContext &C, QualType argTy) const; 251 252 QualType getRepresentativeType(ASTContext &C) const; 253 254 std::string getRepresentativeTypeName(ASTContext &C) const; 255}; 256 257class OptionalAmount { 258public: 259 enum HowSpecified { NotSpecified, Constant, Arg, Invalid }; 260 261 OptionalAmount(HowSpecified howSpecified, 262 unsigned amount, 263 const char *amountStart, 264 unsigned amountLength, 265 bool usesPositionalArg) 266 : start(amountStart), length(amountLength), hs(howSpecified), amt(amount), 267 UsesPositionalArg(usesPositionalArg), UsesDotPrefix(0) {} 268 269 OptionalAmount(bool valid = true) 270 : start(0),length(0), hs(valid ? NotSpecified : Invalid), amt(0), 271 UsesPositionalArg(0), UsesDotPrefix(0) {} 272 273 bool isInvalid() const { 274 return hs == Invalid; 275 } 276 277 HowSpecified getHowSpecified() const { return hs; } 278 void setHowSpecified(HowSpecified h) { hs = h; } 279 280 bool hasDataArgument() const { return hs == Arg; } 281 282 unsigned getArgIndex() const { 283 assert(hasDataArgument()); 284 return amt; 285 } 286 287 unsigned getConstantAmount() const { 288 assert(hs == Constant); 289 return amt; 290 } 291 292 const char *getStart() const { 293 // We include the . character if it is given. 294 return start - UsesDotPrefix; 295 } 296 297 unsigned getConstantLength() const { 298 assert(hs == Constant); 299 return length + UsesDotPrefix; 300 } 301 302 ArgType getArgType(ASTContext &Ctx) const; 303 304 void toString(raw_ostream &os) const; 305 306 bool usesPositionalArg() const { return (bool) UsesPositionalArg; } 307 unsigned getPositionalArgIndex() const { 308 assert(hasDataArgument()); 309 return amt + 1; 310 } 311 312 bool usesDotPrefix() const { return UsesDotPrefix; } 313 void setUsesDotPrefix() { UsesDotPrefix = true; } 314 315private: 316 const char *start; 317 unsigned length; 318 HowSpecified hs; 319 unsigned amt; 320 bool UsesPositionalArg : 1; 321 bool UsesDotPrefix; 322}; 323 324 325class FormatSpecifier { 326protected: 327 LengthModifier LM; 328 OptionalAmount FieldWidth; 329 ConversionSpecifier CS; 330 /// Positional arguments, an IEEE extension: 331 /// IEEE Std 1003.1, 2004 Edition 332 /// http://www.opengroup.org/onlinepubs/009695399/functions/printf.html 333 bool UsesPositionalArg; 334 unsigned argIndex; 335public: 336 FormatSpecifier(bool isPrintf) 337 : CS(isPrintf), UsesPositionalArg(false), argIndex(0) {} 338 339 void setLengthModifier(LengthModifier lm) { 340 LM = lm; 341 } 342 343 void setUsesPositionalArg() { UsesPositionalArg = true; } 344 345 void setArgIndex(unsigned i) { 346 argIndex = i; 347 } 348 349 unsigned getArgIndex() const { 350 return argIndex; 351 } 352 353 unsigned getPositionalArgIndex() const { 354 return argIndex + 1; 355 } 356 357 const LengthModifier &getLengthModifier() const { 358 return LM; 359 } 360 361 const OptionalAmount &getFieldWidth() const { 362 return FieldWidth; 363 } 364 365 void setFieldWidth(const OptionalAmount &Amt) { 366 FieldWidth = Amt; 367 } 368 369 bool usesPositionalArg() const { return UsesPositionalArg; } 370 371 bool hasValidLengthModifier(const TargetInfo &Target) const; 372 373 bool hasStandardLengthModifier() const; 374 375 Optional<LengthModifier> getCorrectedLengthModifier() const; 376 377 bool hasStandardConversionSpecifier(const LangOptions &LangOpt) const; 378 379 bool hasStandardLengthConversionCombination() const; 380 381 /// For a TypedefType QT, if it is a named integer type such as size_t, 382 /// assign the appropriate value to LM and return true. 383 static bool namedTypeToLengthModifier(QualType QT, LengthModifier &LM); 384}; 385 386} // end analyze_format_string namespace 387 388//===----------------------------------------------------------------------===// 389/// Pieces specific to fprintf format strings. 390 391namespace analyze_printf { 392 393class PrintfConversionSpecifier : 394 public analyze_format_string::ConversionSpecifier { 395public: 396 PrintfConversionSpecifier() 397 : ConversionSpecifier(true, 0, InvalidSpecifier) {} 398 399 PrintfConversionSpecifier(const char *pos, Kind k) 400 : ConversionSpecifier(true, pos, k) {} 401 402 bool isObjCArg() const { return kind >= ObjCBeg && kind <= ObjCEnd; } 403 bool isDoubleArg() const { return kind >= DoubleArgBeg && 404 kind <= DoubleArgEnd; } 405 unsigned getLength() const { 406 // Conversion specifiers currently only are represented by 407 // single characters, but we be flexible. 408 return 1; 409 } 410 411 static bool classof(const analyze_format_string::ConversionSpecifier *CS) { 412 return CS->isPrintfKind(); 413 } 414}; 415 416using analyze_format_string::ArgType; 417using analyze_format_string::LengthModifier; 418using analyze_format_string::OptionalAmount; 419using analyze_format_string::OptionalFlag; 420 421class PrintfSpecifier : public analyze_format_string::FormatSpecifier { 422 OptionalFlag HasThousandsGrouping; // ''', POSIX extension. 423 OptionalFlag IsLeftJustified; // '-' 424 OptionalFlag HasPlusPrefix; // '+' 425 OptionalFlag HasSpacePrefix; // ' ' 426 OptionalFlag HasAlternativeForm; // '#' 427 OptionalFlag HasLeadingZeroes; // '0' 428 OptionalAmount Precision; 429public: 430 PrintfSpecifier() : 431 FormatSpecifier(/* isPrintf = */ true), 432 HasThousandsGrouping("'"), IsLeftJustified("-"), HasPlusPrefix("+"), 433 HasSpacePrefix(" "), HasAlternativeForm("#"), HasLeadingZeroes("0") {} 434 435 static PrintfSpecifier Parse(const char *beg, const char *end); 436 437 // Methods for incrementally constructing the PrintfSpecifier. 438 void setConversionSpecifier(const PrintfConversionSpecifier &cs) { 439 CS = cs; 440 } 441 void setHasThousandsGrouping(const char *position) { 442 HasThousandsGrouping = true; 443 HasThousandsGrouping.setPosition(position); 444 } 445 void setIsLeftJustified(const char *position) { 446 IsLeftJustified = true; 447 IsLeftJustified.setPosition(position); 448 } 449 void setHasPlusPrefix(const char *position) { 450 HasPlusPrefix = true; 451 HasPlusPrefix.setPosition(position); 452 } 453 void setHasSpacePrefix(const char *position) { 454 HasSpacePrefix = true; 455 HasSpacePrefix.setPosition(position); 456 } 457 void setHasAlternativeForm(const char *position) { 458 HasAlternativeForm = true; 459 HasAlternativeForm.setPosition(position); 460 } 461 void setHasLeadingZeros(const char *position) { 462 HasLeadingZeroes = true; 463 HasLeadingZeroes.setPosition(position); 464 } 465 void setUsesPositionalArg() { UsesPositionalArg = true; } 466 467 // Methods for querying the format specifier. 468 469 const PrintfConversionSpecifier &getConversionSpecifier() const { 470 return cast<PrintfConversionSpecifier>(CS); 471 } 472 473 void setPrecision(const OptionalAmount &Amt) { 474 Precision = Amt; 475 Precision.setUsesDotPrefix(); 476 } 477 478 const OptionalAmount &getPrecision() const { 479 return Precision; 480 } 481 482 bool consumesDataArgument() const { 483 return getConversionSpecifier().consumesDataArgument(); 484 } 485 486 /// \brief Returns the builtin type that a data argument 487 /// paired with this format specifier should have. This method 488 /// will return null if the format specifier does not have 489 /// a matching data argument or the matching argument matches 490 /// more than one type. 491 ArgType getArgType(ASTContext &Ctx, bool IsObjCLiteral) const; 492 493 const OptionalFlag &hasThousandsGrouping() const { 494 return HasThousandsGrouping; 495 } 496 const OptionalFlag &isLeftJustified() const { return IsLeftJustified; } 497 const OptionalFlag &hasPlusPrefix() const { return HasPlusPrefix; } 498 const OptionalFlag &hasAlternativeForm() const { return HasAlternativeForm; } 499 const OptionalFlag &hasLeadingZeros() const { return HasLeadingZeroes; } 500 const OptionalFlag &hasSpacePrefix() const { return HasSpacePrefix; } 501 bool usesPositionalArg() const { return UsesPositionalArg; } 502 503 /// Changes the specifier and length according to a QualType, retaining any 504 /// flags or options. Returns true on success, or false when a conversion 505 /// was not successful. 506 bool fixType(QualType QT, const LangOptions &LangOpt, ASTContext &Ctx, 507 bool IsObjCLiteral); 508 509 void toString(raw_ostream &os) const; 510 511 // Validation methods - to check if any element results in undefined behavior 512 bool hasValidPlusPrefix() const; 513 bool hasValidAlternativeForm() const; 514 bool hasValidLeadingZeros() const; 515 bool hasValidSpacePrefix() const; 516 bool hasValidLeftJustified() const; 517 bool hasValidThousandsGroupingPrefix() const; 518 519 bool hasValidPrecision() const; 520 bool hasValidFieldWidth() const; 521}; 522} // end analyze_printf namespace 523 524//===----------------------------------------------------------------------===// 525/// Pieces specific to fscanf format strings. 526 527namespace analyze_scanf { 528 529class ScanfConversionSpecifier : 530 public analyze_format_string::ConversionSpecifier { 531public: 532 ScanfConversionSpecifier() 533 : ConversionSpecifier(false, 0, InvalidSpecifier) {} 534 535 ScanfConversionSpecifier(const char *pos, Kind k) 536 : ConversionSpecifier(false, pos, k) {} 537 538 void setEndScanList(const char *pos) { EndScanList = pos; } 539 540 static bool classof(const analyze_format_string::ConversionSpecifier *CS) { 541 return !CS->isPrintfKind(); 542 } 543}; 544 545using analyze_format_string::ArgType; 546using analyze_format_string::LengthModifier; 547using analyze_format_string::OptionalAmount; 548using analyze_format_string::OptionalFlag; 549 550class ScanfSpecifier : public analyze_format_string::FormatSpecifier { 551 OptionalFlag SuppressAssignment; // '*' 552public: 553 ScanfSpecifier() : 554 FormatSpecifier(/* isPrintf = */ false), 555 SuppressAssignment("*") {} 556 557 void setSuppressAssignment(const char *position) { 558 SuppressAssignment = true; 559 SuppressAssignment.setPosition(position); 560 } 561 562 const OptionalFlag &getSuppressAssignment() const { 563 return SuppressAssignment; 564 } 565 566 void setConversionSpecifier(const ScanfConversionSpecifier &cs) { 567 CS = cs; 568 } 569 570 const ScanfConversionSpecifier &getConversionSpecifier() const { 571 return cast<ScanfConversionSpecifier>(CS); 572 } 573 574 bool consumesDataArgument() const { 575 return CS.consumesDataArgument() && !SuppressAssignment; 576 } 577 578 ArgType getArgType(ASTContext &Ctx) const; 579 580 bool fixType(QualType QT, const LangOptions &LangOpt, ASTContext &Ctx); 581 582 void toString(raw_ostream &os) const; 583 584 static ScanfSpecifier Parse(const char *beg, const char *end); 585}; 586 587} // end analyze_scanf namespace 588 589//===----------------------------------------------------------------------===// 590// Parsing and processing of format strings (both fprintf and fscanf). 591 592namespace analyze_format_string { 593 594enum PositionContext { FieldWidthPos = 0, PrecisionPos = 1 }; 595 596class FormatStringHandler { 597public: 598 FormatStringHandler() {} 599 virtual ~FormatStringHandler(); 600 601 virtual void HandleNullChar(const char *nullCharacter) {} 602 603 virtual void HandlePosition(const char *startPos, unsigned posLen) {} 604 605 virtual void HandleInvalidPosition(const char *startPos, unsigned posLen, 606 PositionContext p) {} 607 608 virtual void HandleZeroPosition(const char *startPos, unsigned posLen) {} 609 610 virtual void HandleIncompleteSpecifier(const char *startSpecifier, 611 unsigned specifierLen) {} 612 613 // Printf-specific handlers. 614 615 virtual bool HandleInvalidPrintfConversionSpecifier( 616 const analyze_printf::PrintfSpecifier &FS, 617 const char *startSpecifier, 618 unsigned specifierLen) { 619 return true; 620 } 621 622 virtual bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS, 623 const char *startSpecifier, 624 unsigned specifierLen) { 625 return true; 626 } 627 628 // Scanf-specific handlers. 629 630 virtual bool HandleInvalidScanfConversionSpecifier( 631 const analyze_scanf::ScanfSpecifier &FS, 632 const char *startSpecifier, 633 unsigned specifierLen) { 634 return true; 635 } 636 637 virtual bool HandleScanfSpecifier(const analyze_scanf::ScanfSpecifier &FS, 638 const char *startSpecifier, 639 unsigned specifierLen) { 640 return true; 641 } 642 643 virtual void HandleIncompleteScanList(const char *start, const char *end) {} 644}; 645 646bool ParsePrintfString(FormatStringHandler &H, 647 const char *beg, const char *end, const LangOptions &LO, 648 const TargetInfo &Target); 649 650bool ParseScanfString(FormatStringHandler &H, 651 const char *beg, const char *end, const LangOptions &LO, 652 const TargetInfo &Target); 653 654} // end analyze_format_string namespace 655} // end clang namespace 656#endif 657