1//===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9 10#include "MCTargetDesc/X86BaseInfo.h" 11#include "llvm/ADT/APFloat.h" 12#include "llvm/ADT/SmallString.h" 13#include "llvm/ADT/SmallVector.h" 14#include "llvm/ADT/StringSwitch.h" 15#include "llvm/ADT/Twine.h" 16#include "llvm/MC/MCContext.h" 17#include "llvm/MC/MCExpr.h" 18#include "llvm/MC/MCInst.h" 19#include "llvm/MC/MCParser/MCAsmLexer.h" 20#include "llvm/MC/MCParser/MCAsmParser.h" 21#include "llvm/MC/MCParser/MCParsedAsmOperand.h" 22#include "llvm/MC/MCRegisterInfo.h" 23#include "llvm/MC/MCStreamer.h" 24#include "llvm/MC/MCSubtargetInfo.h" 25#include "llvm/MC/MCSymbol.h" 26#include "llvm/MC/MCTargetAsmParser.h" 27#include "llvm/Support/SourceMgr.h" 28#include "llvm/Support/TargetRegistry.h" 29#include "llvm/Support/raw_ostream.h" 30 31using namespace llvm; 32 33namespace { 34struct X86Operand; 35 36static const char OpPrecedence[] = { 37 0, // IC_PLUS 38 0, // IC_MINUS 39 1, // IC_MULTIPLY 40 1, // IC_DIVIDE 41 2, // IC_RPAREN 42 3, // IC_LPAREN 43 0, // IC_IMM 44 0 // IC_REGISTER 45}; 46 47class X86AsmParser : public MCTargetAsmParser { 48 MCSubtargetInfo &STI; 49 MCAsmParser &Parser; 50 ParseInstructionInfo *InstInfo; 51private: 52 enum InfixCalculatorTok { 53 IC_PLUS = 0, 54 IC_MINUS, 55 IC_MULTIPLY, 56 IC_DIVIDE, 57 IC_RPAREN, 58 IC_LPAREN, 59 IC_IMM, 60 IC_REGISTER 61 }; 62 63 class InfixCalculator { 64 typedef std::pair< InfixCalculatorTok, int64_t > ICToken; 65 SmallVector<InfixCalculatorTok, 4> InfixOperatorStack; 66 SmallVector<ICToken, 4> PostfixStack; 67 68 public: 69 int64_t popOperand() { 70 assert (!PostfixStack.empty() && "Poped an empty stack!"); 71 ICToken Op = PostfixStack.pop_back_val(); 72 assert ((Op.first == IC_IMM || Op.first == IC_REGISTER) 73 && "Expected and immediate or register!"); 74 return Op.second; 75 } 76 void pushOperand(InfixCalculatorTok Op, int64_t Val = 0) { 77 assert ((Op == IC_IMM || Op == IC_REGISTER) && 78 "Unexpected operand!"); 79 PostfixStack.push_back(std::make_pair(Op, Val)); 80 } 81 82 void popOperator() { InfixOperatorStack.pop_back_val(); } 83 void pushOperator(InfixCalculatorTok Op) { 84 // Push the new operator if the stack is empty. 85 if (InfixOperatorStack.empty()) { 86 InfixOperatorStack.push_back(Op); 87 return; 88 } 89 90 // Push the new operator if it has a higher precedence than the operator 91 // on the top of the stack or the operator on the top of the stack is a 92 // left parentheses. 93 unsigned Idx = InfixOperatorStack.size() - 1; 94 InfixCalculatorTok StackOp = InfixOperatorStack[Idx]; 95 if (OpPrecedence[Op] > OpPrecedence[StackOp] || StackOp == IC_LPAREN) { 96 InfixOperatorStack.push_back(Op); 97 return; 98 } 99 100 // The operator on the top of the stack has higher precedence than the 101 // new operator. 102 unsigned ParenCount = 0; 103 while (1) { 104 // Nothing to process. 105 if (InfixOperatorStack.empty()) 106 break; 107 108 Idx = InfixOperatorStack.size() - 1; 109 StackOp = InfixOperatorStack[Idx]; 110 if (!(OpPrecedence[StackOp] >= OpPrecedence[Op] || ParenCount)) 111 break; 112 113 // If we have an even parentheses count and we see a left parentheses, 114 // then stop processing. 115 if (!ParenCount && StackOp == IC_LPAREN) 116 break; 117 118 if (StackOp == IC_RPAREN) { 119 ++ParenCount; 120 InfixOperatorStack.pop_back_val(); 121 } else if (StackOp == IC_LPAREN) { 122 --ParenCount; 123 InfixOperatorStack.pop_back_val(); 124 } else { 125 InfixOperatorStack.pop_back_val(); 126 PostfixStack.push_back(std::make_pair(StackOp, 0)); 127 } 128 } 129 // Push the new operator. 130 InfixOperatorStack.push_back(Op); 131 } 132 int64_t execute() { 133 // Push any remaining operators onto the postfix stack. 134 while (!InfixOperatorStack.empty()) { 135 InfixCalculatorTok StackOp = InfixOperatorStack.pop_back_val(); 136 if (StackOp != IC_LPAREN && StackOp != IC_RPAREN) 137 PostfixStack.push_back(std::make_pair(StackOp, 0)); 138 } 139 140 if (PostfixStack.empty()) 141 return 0; 142 143 SmallVector<ICToken, 16> OperandStack; 144 for (unsigned i = 0, e = PostfixStack.size(); i != e; ++i) { 145 ICToken Op = PostfixStack[i]; 146 if (Op.first == IC_IMM || Op.first == IC_REGISTER) { 147 OperandStack.push_back(Op); 148 } else { 149 assert (OperandStack.size() > 1 && "Too few operands."); 150 int64_t Val; 151 ICToken Op2 = OperandStack.pop_back_val(); 152 ICToken Op1 = OperandStack.pop_back_val(); 153 switch (Op.first) { 154 default: 155 report_fatal_error("Unexpected operator!"); 156 break; 157 case IC_PLUS: 158 Val = Op1.second + Op2.second; 159 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 160 break; 161 case IC_MINUS: 162 Val = Op1.second - Op2.second; 163 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 164 break; 165 case IC_MULTIPLY: 166 assert (Op1.first == IC_IMM && Op2.first == IC_IMM && 167 "Multiply operation with an immediate and a register!"); 168 Val = Op1.second * Op2.second; 169 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 170 break; 171 case IC_DIVIDE: 172 assert (Op1.first == IC_IMM && Op2.first == IC_IMM && 173 "Divide operation with an immediate and a register!"); 174 assert (Op2.second != 0 && "Division by zero!"); 175 Val = Op1.second / Op2.second; 176 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 177 break; 178 } 179 } 180 } 181 assert (OperandStack.size() == 1 && "Expected a single result."); 182 return OperandStack.pop_back_val().second; 183 } 184 }; 185 186 enum IntelExprState { 187 IES_PLUS, 188 IES_MINUS, 189 IES_MULTIPLY, 190 IES_DIVIDE, 191 IES_LBRAC, 192 IES_RBRAC, 193 IES_LPAREN, 194 IES_RPAREN, 195 IES_REGISTER, 196 IES_INTEGER, 197 IES_IDENTIFIER, 198 IES_ERROR 199 }; 200 201 class IntelExprStateMachine { 202 IntelExprState State, PrevState; 203 unsigned BaseReg, IndexReg, TmpReg, Scale; 204 int64_t Imm; 205 const MCExpr *Sym; 206 StringRef SymName; 207 bool StopOnLBrac, AddImmPrefix; 208 InfixCalculator IC; 209 InlineAsmIdentifierInfo Info; 210 public: 211 IntelExprStateMachine(int64_t imm, bool stoponlbrac, bool addimmprefix) : 212 State(IES_PLUS), PrevState(IES_ERROR), BaseReg(0), IndexReg(0), TmpReg(0), 213 Scale(1), Imm(imm), Sym(0), StopOnLBrac(stoponlbrac), 214 AddImmPrefix(addimmprefix) { Info.clear(); } 215 216 unsigned getBaseReg() { return BaseReg; } 217 unsigned getIndexReg() { return IndexReg; } 218 unsigned getScale() { return Scale; } 219 const MCExpr *getSym() { return Sym; } 220 StringRef getSymName() { return SymName; } 221 int64_t getImm() { return Imm + IC.execute(); } 222 bool isValidEndState() { return State == IES_RBRAC; } 223 bool getStopOnLBrac() { return StopOnLBrac; } 224 bool getAddImmPrefix() { return AddImmPrefix; } 225 bool hadError() { return State == IES_ERROR; } 226 227 InlineAsmIdentifierInfo &getIdentifierInfo() { 228 return Info; 229 } 230 231 void onPlus() { 232 IntelExprState CurrState = State; 233 switch (State) { 234 default: 235 State = IES_ERROR; 236 break; 237 case IES_INTEGER: 238 case IES_RPAREN: 239 case IES_REGISTER: 240 State = IES_PLUS; 241 IC.pushOperator(IC_PLUS); 242 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) { 243 // If we already have a BaseReg, then assume this is the IndexReg with 244 // a scale of 1. 245 if (!BaseReg) { 246 BaseReg = TmpReg; 247 } else { 248 assert (!IndexReg && "BaseReg/IndexReg already set!"); 249 IndexReg = TmpReg; 250 Scale = 1; 251 } 252 } 253 break; 254 } 255 PrevState = CurrState; 256 } 257 void onMinus() { 258 IntelExprState CurrState = State; 259 switch (State) { 260 default: 261 State = IES_ERROR; 262 break; 263 case IES_PLUS: 264 case IES_MULTIPLY: 265 case IES_DIVIDE: 266 case IES_LPAREN: 267 case IES_RPAREN: 268 case IES_LBRAC: 269 case IES_RBRAC: 270 case IES_INTEGER: 271 case IES_REGISTER: 272 State = IES_MINUS; 273 // Only push the minus operator if it is not a unary operator. 274 if (!(CurrState == IES_PLUS || CurrState == IES_MINUS || 275 CurrState == IES_MULTIPLY || CurrState == IES_DIVIDE || 276 CurrState == IES_LPAREN || CurrState == IES_LBRAC)) 277 IC.pushOperator(IC_MINUS); 278 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) { 279 // If we already have a BaseReg, then assume this is the IndexReg with 280 // a scale of 1. 281 if (!BaseReg) { 282 BaseReg = TmpReg; 283 } else { 284 assert (!IndexReg && "BaseReg/IndexReg already set!"); 285 IndexReg = TmpReg; 286 Scale = 1; 287 } 288 } 289 break; 290 } 291 PrevState = CurrState; 292 } 293 void onRegister(unsigned Reg) { 294 IntelExprState CurrState = State; 295 switch (State) { 296 default: 297 State = IES_ERROR; 298 break; 299 case IES_PLUS: 300 case IES_LPAREN: 301 State = IES_REGISTER; 302 TmpReg = Reg; 303 IC.pushOperand(IC_REGISTER); 304 break; 305 case IES_MULTIPLY: 306 // Index Register - Scale * Register 307 if (PrevState == IES_INTEGER) { 308 assert (!IndexReg && "IndexReg already set!"); 309 State = IES_REGISTER; 310 IndexReg = Reg; 311 // Get the scale and replace the 'Scale * Register' with '0'. 312 Scale = IC.popOperand(); 313 IC.pushOperand(IC_IMM); 314 IC.popOperator(); 315 } else { 316 State = IES_ERROR; 317 } 318 break; 319 } 320 PrevState = CurrState; 321 } 322 void onIdentifierExpr(const MCExpr *SymRef, StringRef SymRefName) { 323 PrevState = State; 324 switch (State) { 325 default: 326 State = IES_ERROR; 327 break; 328 case IES_PLUS: 329 case IES_MINUS: 330 State = IES_INTEGER; 331 Sym = SymRef; 332 SymName = SymRefName; 333 IC.pushOperand(IC_IMM); 334 break; 335 } 336 } 337 void onInteger(int64_t TmpInt) { 338 IntelExprState CurrState = State; 339 switch (State) { 340 default: 341 State = IES_ERROR; 342 break; 343 case IES_PLUS: 344 case IES_MINUS: 345 case IES_DIVIDE: 346 case IES_MULTIPLY: 347 case IES_LPAREN: 348 State = IES_INTEGER; 349 if (PrevState == IES_REGISTER && CurrState == IES_MULTIPLY) { 350 // Index Register - Register * Scale 351 assert (!IndexReg && "IndexReg already set!"); 352 IndexReg = TmpReg; 353 Scale = TmpInt; 354 // Get the scale and replace the 'Register * Scale' with '0'. 355 IC.popOperator(); 356 } else if ((PrevState == IES_PLUS || PrevState == IES_MINUS || 357 PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE || 358 PrevState == IES_LPAREN || PrevState == IES_LBRAC) && 359 CurrState == IES_MINUS) { 360 // Unary minus. No need to pop the minus operand because it was never 361 // pushed. 362 IC.pushOperand(IC_IMM, -TmpInt); // Push -Imm. 363 } else { 364 IC.pushOperand(IC_IMM, TmpInt); 365 } 366 break; 367 } 368 PrevState = CurrState; 369 } 370 void onStar() { 371 PrevState = State; 372 switch (State) { 373 default: 374 State = IES_ERROR; 375 break; 376 case IES_INTEGER: 377 case IES_REGISTER: 378 case IES_RPAREN: 379 State = IES_MULTIPLY; 380 IC.pushOperator(IC_MULTIPLY); 381 break; 382 } 383 } 384 void onDivide() { 385 PrevState = State; 386 switch (State) { 387 default: 388 State = IES_ERROR; 389 break; 390 case IES_INTEGER: 391 case IES_RPAREN: 392 State = IES_DIVIDE; 393 IC.pushOperator(IC_DIVIDE); 394 break; 395 } 396 } 397 void onLBrac() { 398 PrevState = State; 399 switch (State) { 400 default: 401 State = IES_ERROR; 402 break; 403 case IES_RBRAC: 404 State = IES_PLUS; 405 IC.pushOperator(IC_PLUS); 406 break; 407 } 408 } 409 void onRBrac() { 410 IntelExprState CurrState = State; 411 switch (State) { 412 default: 413 State = IES_ERROR; 414 break; 415 case IES_INTEGER: 416 case IES_REGISTER: 417 case IES_RPAREN: 418 State = IES_RBRAC; 419 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) { 420 // If we already have a BaseReg, then assume this is the IndexReg with 421 // a scale of 1. 422 if (!BaseReg) { 423 BaseReg = TmpReg; 424 } else { 425 assert (!IndexReg && "BaseReg/IndexReg already set!"); 426 IndexReg = TmpReg; 427 Scale = 1; 428 } 429 } 430 break; 431 } 432 PrevState = CurrState; 433 } 434 void onLParen() { 435 IntelExprState CurrState = State; 436 switch (State) { 437 default: 438 State = IES_ERROR; 439 break; 440 case IES_PLUS: 441 case IES_MINUS: 442 case IES_MULTIPLY: 443 case IES_DIVIDE: 444 case IES_LPAREN: 445 // FIXME: We don't handle this type of unary minus, yet. 446 if ((PrevState == IES_PLUS || PrevState == IES_MINUS || 447 PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE || 448 PrevState == IES_LPAREN || PrevState == IES_LBRAC) && 449 CurrState == IES_MINUS) { 450 State = IES_ERROR; 451 break; 452 } 453 State = IES_LPAREN; 454 IC.pushOperator(IC_LPAREN); 455 break; 456 } 457 PrevState = CurrState; 458 } 459 void onRParen() { 460 PrevState = State; 461 switch (State) { 462 default: 463 State = IES_ERROR; 464 break; 465 case IES_INTEGER: 466 case IES_REGISTER: 467 case IES_RPAREN: 468 State = IES_RPAREN; 469 IC.pushOperator(IC_RPAREN); 470 break; 471 } 472 } 473 }; 474 475 MCAsmParser &getParser() const { return Parser; } 476 477 MCAsmLexer &getLexer() const { return Parser.getLexer(); } 478 479 bool Error(SMLoc L, const Twine &Msg, 480 ArrayRef<SMRange> Ranges = None, 481 bool MatchingInlineAsm = false) { 482 if (MatchingInlineAsm) return true; 483 return Parser.Error(L, Msg, Ranges); 484 } 485 486 X86Operand *ErrorOperand(SMLoc Loc, StringRef Msg) { 487 Error(Loc, Msg); 488 return 0; 489 } 490 491 X86Operand *ParseOperand(); 492 X86Operand *ParseATTOperand(); 493 X86Operand *ParseIntelOperand(); 494 X86Operand *ParseIntelOffsetOfOperator(); 495 X86Operand *ParseIntelDotOperator(const MCExpr *Disp, const MCExpr *&NewDisp); 496 X86Operand *ParseIntelOperator(unsigned OpKind); 497 X86Operand *ParseIntelMemOperand(unsigned SegReg, int64_t ImmDisp, 498 SMLoc StartLoc); 499 X86Operand *ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End); 500 X86Operand *ParseIntelBracExpression(unsigned SegReg, SMLoc Start, 501 int64_t ImmDisp, unsigned Size); 502 X86Operand *ParseIntelIdentifier(const MCExpr *&Val, StringRef &Identifier, 503 InlineAsmIdentifierInfo &Info, 504 bool IsUnevaluatedOperand, SMLoc &End); 505 506 X86Operand *ParseMemOperand(unsigned SegReg, SMLoc StartLoc); 507 508 X86Operand *CreateMemForInlineAsm(unsigned SegReg, const MCExpr *Disp, 509 unsigned BaseReg, unsigned IndexReg, 510 unsigned Scale, SMLoc Start, SMLoc End, 511 unsigned Size, StringRef Identifier, 512 InlineAsmIdentifierInfo &Info); 513 514 bool ParseDirectiveWord(unsigned Size, SMLoc L); 515 bool ParseDirectiveCode(StringRef IDVal, SMLoc L); 516 517 bool processInstruction(MCInst &Inst, 518 const SmallVectorImpl<MCParsedAsmOperand*> &Ops); 519 520 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 521 SmallVectorImpl<MCParsedAsmOperand*> &Operands, 522 MCStreamer &Out, unsigned &ErrorInfo, 523 bool MatchingInlineAsm); 524 525 /// isSrcOp - Returns true if operand is either (%rsi) or %ds:%(rsi) 526 /// in 64bit mode or (%esi) or %es:(%esi) in 32bit mode. 527 bool isSrcOp(X86Operand &Op); 528 529 /// isDstOp - Returns true if operand is either (%rdi) or %es:(%rdi) 530 /// in 64bit mode or (%edi) or %es:(%edi) in 32bit mode. 531 bool isDstOp(X86Operand &Op); 532 533 bool is64BitMode() const { 534 // FIXME: Can tablegen auto-generate this? 535 return (STI.getFeatureBits() & X86::Mode64Bit) != 0; 536 } 537 void SwitchMode() { 538 unsigned FB = ComputeAvailableFeatures(STI.ToggleFeature(X86::Mode64Bit)); 539 setAvailableFeatures(FB); 540 } 541 542 bool isParsingIntelSyntax() { 543 return getParser().getAssemblerDialect(); 544 } 545 546 /// @name Auto-generated Matcher Functions 547 /// { 548 549#define GET_ASSEMBLER_HEADER 550#include "X86GenAsmMatcher.inc" 551 552 /// } 553 554public: 555 X86AsmParser(MCSubtargetInfo &sti, MCAsmParser &parser) 556 : MCTargetAsmParser(), STI(sti), Parser(parser), InstInfo(0) { 557 558 // Initialize the set of available features. 559 setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits())); 560 } 561 virtual bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc); 562 563 virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 564 SMLoc NameLoc, 565 SmallVectorImpl<MCParsedAsmOperand*> &Operands); 566 567 virtual bool ParseDirective(AsmToken DirectiveID); 568}; 569} // end anonymous namespace 570 571/// @name Auto-generated Match Functions 572/// { 573 574static unsigned MatchRegisterName(StringRef Name); 575 576/// } 577 578static bool isImmSExti16i8Value(uint64_t Value) { 579 return (( Value <= 0x000000000000007FULL)|| 580 (0x000000000000FF80ULL <= Value && Value <= 0x000000000000FFFFULL)|| 581 (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL)); 582} 583 584static bool isImmSExti32i8Value(uint64_t Value) { 585 return (( Value <= 0x000000000000007FULL)|| 586 (0x00000000FFFFFF80ULL <= Value && Value <= 0x00000000FFFFFFFFULL)|| 587 (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL)); 588} 589 590static bool isImmZExtu32u8Value(uint64_t Value) { 591 return (Value <= 0x00000000000000FFULL); 592} 593 594static bool isImmSExti64i8Value(uint64_t Value) { 595 return (( Value <= 0x000000000000007FULL)|| 596 (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL)); 597} 598 599static bool isImmSExti64i32Value(uint64_t Value) { 600 return (( Value <= 0x000000007FFFFFFFULL)|| 601 (0xFFFFFFFF80000000ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL)); 602} 603namespace { 604 605/// X86Operand - Instances of this class represent a parsed X86 machine 606/// instruction. 607struct X86Operand : public MCParsedAsmOperand { 608 enum KindTy { 609 Token, 610 Register, 611 Immediate, 612 Memory 613 } Kind; 614 615 SMLoc StartLoc, EndLoc; 616 SMLoc OffsetOfLoc; 617 StringRef SymName; 618 void *OpDecl; 619 bool AddressOf; 620 621 struct TokOp { 622 const char *Data; 623 unsigned Length; 624 }; 625 626 struct RegOp { 627 unsigned RegNo; 628 }; 629 630 struct ImmOp { 631 const MCExpr *Val; 632 }; 633 634 struct MemOp { 635 unsigned SegReg; 636 const MCExpr *Disp; 637 unsigned BaseReg; 638 unsigned IndexReg; 639 unsigned Scale; 640 unsigned Size; 641 }; 642 643 union { 644 struct TokOp Tok; 645 struct RegOp Reg; 646 struct ImmOp Imm; 647 struct MemOp Mem; 648 }; 649 650 X86Operand(KindTy K, SMLoc Start, SMLoc End) 651 : Kind(K), StartLoc(Start), EndLoc(End) {} 652 653 StringRef getSymName() { return SymName; } 654 void *getOpDecl() { return OpDecl; } 655 656 /// getStartLoc - Get the location of the first token of this operand. 657 SMLoc getStartLoc() const { return StartLoc; } 658 /// getEndLoc - Get the location of the last token of this operand. 659 SMLoc getEndLoc() const { return EndLoc; } 660 /// getLocRange - Get the range between the first and last token of this 661 /// operand. 662 SMRange getLocRange() const { return SMRange(StartLoc, EndLoc); } 663 /// getOffsetOfLoc - Get the location of the offset operator. 664 SMLoc getOffsetOfLoc() const { return OffsetOfLoc; } 665 666 virtual void print(raw_ostream &OS) const {} 667 668 StringRef getToken() const { 669 assert(Kind == Token && "Invalid access!"); 670 return StringRef(Tok.Data, Tok.Length); 671 } 672 void setTokenValue(StringRef Value) { 673 assert(Kind == Token && "Invalid access!"); 674 Tok.Data = Value.data(); 675 Tok.Length = Value.size(); 676 } 677 678 unsigned getReg() const { 679 assert(Kind == Register && "Invalid access!"); 680 return Reg.RegNo; 681 } 682 683 const MCExpr *getImm() const { 684 assert(Kind == Immediate && "Invalid access!"); 685 return Imm.Val; 686 } 687 688 const MCExpr *getMemDisp() const { 689 assert(Kind == Memory && "Invalid access!"); 690 return Mem.Disp; 691 } 692 unsigned getMemSegReg() const { 693 assert(Kind == Memory && "Invalid access!"); 694 return Mem.SegReg; 695 } 696 unsigned getMemBaseReg() const { 697 assert(Kind == Memory && "Invalid access!"); 698 return Mem.BaseReg; 699 } 700 unsigned getMemIndexReg() const { 701 assert(Kind == Memory && "Invalid access!"); 702 return Mem.IndexReg; 703 } 704 unsigned getMemScale() const { 705 assert(Kind == Memory && "Invalid access!"); 706 return Mem.Scale; 707 } 708 709 bool isToken() const {return Kind == Token; } 710 711 bool isImm() const { return Kind == Immediate; } 712 713 bool isImmSExti16i8() const { 714 if (!isImm()) 715 return false; 716 717 // If this isn't a constant expr, just assume it fits and let relaxation 718 // handle it. 719 const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); 720 if (!CE) 721 return true; 722 723 // Otherwise, check the value is in a range that makes sense for this 724 // extension. 725 return isImmSExti16i8Value(CE->getValue()); 726 } 727 bool isImmSExti32i8() const { 728 if (!isImm()) 729 return false; 730 731 // If this isn't a constant expr, just assume it fits and let relaxation 732 // handle it. 733 const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); 734 if (!CE) 735 return true; 736 737 // Otherwise, check the value is in a range that makes sense for this 738 // extension. 739 return isImmSExti32i8Value(CE->getValue()); 740 } 741 bool isImmZExtu32u8() const { 742 if (!isImm()) 743 return false; 744 745 // If this isn't a constant expr, just assume it fits and let relaxation 746 // handle it. 747 const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); 748 if (!CE) 749 return true; 750 751 // Otherwise, check the value is in a range that makes sense for this 752 // extension. 753 return isImmZExtu32u8Value(CE->getValue()); 754 } 755 bool isImmSExti64i8() const { 756 if (!isImm()) 757 return false; 758 759 // If this isn't a constant expr, just assume it fits and let relaxation 760 // handle it. 761 const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); 762 if (!CE) 763 return true; 764 765 // Otherwise, check the value is in a range that makes sense for this 766 // extension. 767 return isImmSExti64i8Value(CE->getValue()); 768 } 769 bool isImmSExti64i32() const { 770 if (!isImm()) 771 return false; 772 773 // If this isn't a constant expr, just assume it fits and let relaxation 774 // handle it. 775 const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); 776 if (!CE) 777 return true; 778 779 // Otherwise, check the value is in a range that makes sense for this 780 // extension. 781 return isImmSExti64i32Value(CE->getValue()); 782 } 783 784 bool isOffsetOf() const { 785 return OffsetOfLoc.getPointer(); 786 } 787 788 bool needAddressOf() const { 789 return AddressOf; 790 } 791 792 bool isMem() const { return Kind == Memory; } 793 bool isMem8() const { 794 return Kind == Memory && (!Mem.Size || Mem.Size == 8); 795 } 796 bool isMem16() const { 797 return Kind == Memory && (!Mem.Size || Mem.Size == 16); 798 } 799 bool isMem32() const { 800 return Kind == Memory && (!Mem.Size || Mem.Size == 32); 801 } 802 bool isMem64() const { 803 return Kind == Memory && (!Mem.Size || Mem.Size == 64); 804 } 805 bool isMem80() const { 806 return Kind == Memory && (!Mem.Size || Mem.Size == 80); 807 } 808 bool isMem128() const { 809 return Kind == Memory && (!Mem.Size || Mem.Size == 128); 810 } 811 bool isMem256() const { 812 return Kind == Memory && (!Mem.Size || Mem.Size == 256); 813 } 814 815 bool isMemVX32() const { 816 return Kind == Memory && (!Mem.Size || Mem.Size == 32) && 817 getMemIndexReg() >= X86::XMM0 && getMemIndexReg() <= X86::XMM15; 818 } 819 bool isMemVY32() const { 820 return Kind == Memory && (!Mem.Size || Mem.Size == 32) && 821 getMemIndexReg() >= X86::YMM0 && getMemIndexReg() <= X86::YMM15; 822 } 823 bool isMemVX64() const { 824 return Kind == Memory && (!Mem.Size || Mem.Size == 64) && 825 getMemIndexReg() >= X86::XMM0 && getMemIndexReg() <= X86::XMM15; 826 } 827 bool isMemVY64() const { 828 return Kind == Memory && (!Mem.Size || Mem.Size == 64) && 829 getMemIndexReg() >= X86::YMM0 && getMemIndexReg() <= X86::YMM15; 830 } 831 832 bool isAbsMem() const { 833 return Kind == Memory && !getMemSegReg() && !getMemBaseReg() && 834 !getMemIndexReg() && getMemScale() == 1; 835 } 836 837 bool isReg() const { return Kind == Register; } 838 839 void addExpr(MCInst &Inst, const MCExpr *Expr) const { 840 // Add as immediates when possible. 841 if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr)) 842 Inst.addOperand(MCOperand::CreateImm(CE->getValue())); 843 else 844 Inst.addOperand(MCOperand::CreateExpr(Expr)); 845 } 846 847 void addRegOperands(MCInst &Inst, unsigned N) const { 848 assert(N == 1 && "Invalid number of operands!"); 849 Inst.addOperand(MCOperand::CreateReg(getReg())); 850 } 851 852 void addImmOperands(MCInst &Inst, unsigned N) const { 853 assert(N == 1 && "Invalid number of operands!"); 854 addExpr(Inst, getImm()); 855 } 856 857 void addMem8Operands(MCInst &Inst, unsigned N) const { 858 addMemOperands(Inst, N); 859 } 860 void addMem16Operands(MCInst &Inst, unsigned N) const { 861 addMemOperands(Inst, N); 862 } 863 void addMem32Operands(MCInst &Inst, unsigned N) const { 864 addMemOperands(Inst, N); 865 } 866 void addMem64Operands(MCInst &Inst, unsigned N) const { 867 addMemOperands(Inst, N); 868 } 869 void addMem80Operands(MCInst &Inst, unsigned N) const { 870 addMemOperands(Inst, N); 871 } 872 void addMem128Operands(MCInst &Inst, unsigned N) const { 873 addMemOperands(Inst, N); 874 } 875 void addMem256Operands(MCInst &Inst, unsigned N) const { 876 addMemOperands(Inst, N); 877 } 878 void addMemVX32Operands(MCInst &Inst, unsigned N) const { 879 addMemOperands(Inst, N); 880 } 881 void addMemVY32Operands(MCInst &Inst, unsigned N) const { 882 addMemOperands(Inst, N); 883 } 884 void addMemVX64Operands(MCInst &Inst, unsigned N) const { 885 addMemOperands(Inst, N); 886 } 887 void addMemVY64Operands(MCInst &Inst, unsigned N) const { 888 addMemOperands(Inst, N); 889 } 890 891 void addMemOperands(MCInst &Inst, unsigned N) const { 892 assert((N == 5) && "Invalid number of operands!"); 893 Inst.addOperand(MCOperand::CreateReg(getMemBaseReg())); 894 Inst.addOperand(MCOperand::CreateImm(getMemScale())); 895 Inst.addOperand(MCOperand::CreateReg(getMemIndexReg())); 896 addExpr(Inst, getMemDisp()); 897 Inst.addOperand(MCOperand::CreateReg(getMemSegReg())); 898 } 899 900 void addAbsMemOperands(MCInst &Inst, unsigned N) const { 901 assert((N == 1) && "Invalid number of operands!"); 902 // Add as immediates when possible. 903 if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemDisp())) 904 Inst.addOperand(MCOperand::CreateImm(CE->getValue())); 905 else 906 Inst.addOperand(MCOperand::CreateExpr(getMemDisp())); 907 } 908 909 static X86Operand *CreateToken(StringRef Str, SMLoc Loc) { 910 SMLoc EndLoc = SMLoc::getFromPointer(Loc.getPointer() + Str.size()); 911 X86Operand *Res = new X86Operand(Token, Loc, EndLoc); 912 Res->Tok.Data = Str.data(); 913 Res->Tok.Length = Str.size(); 914 return Res; 915 } 916 917 static X86Operand *CreateReg(unsigned RegNo, SMLoc StartLoc, SMLoc EndLoc, 918 bool AddressOf = false, 919 SMLoc OffsetOfLoc = SMLoc(), 920 StringRef SymName = StringRef(), 921 void *OpDecl = 0) { 922 X86Operand *Res = new X86Operand(Register, StartLoc, EndLoc); 923 Res->Reg.RegNo = RegNo; 924 Res->AddressOf = AddressOf; 925 Res->OffsetOfLoc = OffsetOfLoc; 926 Res->SymName = SymName; 927 Res->OpDecl = OpDecl; 928 return Res; 929 } 930 931 static X86Operand *CreateImm(const MCExpr *Val, SMLoc StartLoc, SMLoc EndLoc){ 932 X86Operand *Res = new X86Operand(Immediate, StartLoc, EndLoc); 933 Res->Imm.Val = Val; 934 return Res; 935 } 936 937 /// Create an absolute memory operand. 938 static X86Operand *CreateMem(const MCExpr *Disp, SMLoc StartLoc, SMLoc EndLoc, 939 unsigned Size = 0, StringRef SymName = StringRef(), 940 void *OpDecl = 0) { 941 X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc); 942 Res->Mem.SegReg = 0; 943 Res->Mem.Disp = Disp; 944 Res->Mem.BaseReg = 0; 945 Res->Mem.IndexReg = 0; 946 Res->Mem.Scale = 1; 947 Res->Mem.Size = Size; 948 Res->SymName = SymName; 949 Res->OpDecl = OpDecl; 950 Res->AddressOf = false; 951 return Res; 952 } 953 954 /// Create a generalized memory operand. 955 static X86Operand *CreateMem(unsigned SegReg, const MCExpr *Disp, 956 unsigned BaseReg, unsigned IndexReg, 957 unsigned Scale, SMLoc StartLoc, SMLoc EndLoc, 958 unsigned Size = 0, 959 StringRef SymName = StringRef(), 960 void *OpDecl = 0) { 961 // We should never just have a displacement, that should be parsed as an 962 // absolute memory operand. 963 assert((SegReg || BaseReg || IndexReg) && "Invalid memory operand!"); 964 965 // The scale should always be one of {1,2,4,8}. 966 assert(((Scale == 1 || Scale == 2 || Scale == 4 || Scale == 8)) && 967 "Invalid scale!"); 968 X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc); 969 Res->Mem.SegReg = SegReg; 970 Res->Mem.Disp = Disp; 971 Res->Mem.BaseReg = BaseReg; 972 Res->Mem.IndexReg = IndexReg; 973 Res->Mem.Scale = Scale; 974 Res->Mem.Size = Size; 975 Res->SymName = SymName; 976 Res->OpDecl = OpDecl; 977 Res->AddressOf = false; 978 return Res; 979 } 980}; 981 982} // end anonymous namespace. 983 984bool X86AsmParser::isSrcOp(X86Operand &Op) { 985 unsigned basereg = is64BitMode() ? X86::RSI : X86::ESI; 986 987 return (Op.isMem() && 988 (Op.Mem.SegReg == 0 || Op.Mem.SegReg == X86::DS) && 989 isa<MCConstantExpr>(Op.Mem.Disp) && 990 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 && 991 Op.Mem.BaseReg == basereg && Op.Mem.IndexReg == 0); 992} 993 994bool X86AsmParser::isDstOp(X86Operand &Op) { 995 unsigned basereg = is64BitMode() ? X86::RDI : X86::EDI; 996 997 return Op.isMem() && 998 (Op.Mem.SegReg == 0 || Op.Mem.SegReg == X86::ES) && 999 isa<MCConstantExpr>(Op.Mem.Disp) && 1000 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 && 1001 Op.Mem.BaseReg == basereg && Op.Mem.IndexReg == 0; 1002} 1003 1004bool X86AsmParser::ParseRegister(unsigned &RegNo, 1005 SMLoc &StartLoc, SMLoc &EndLoc) { 1006 RegNo = 0; 1007 const AsmToken &PercentTok = Parser.getTok(); 1008 StartLoc = PercentTok.getLoc(); 1009 1010 // If we encounter a %, ignore it. This code handles registers with and 1011 // without the prefix, unprefixed registers can occur in cfi directives. 1012 if (!isParsingIntelSyntax() && PercentTok.is(AsmToken::Percent)) 1013 Parser.Lex(); // Eat percent token. 1014 1015 const AsmToken &Tok = Parser.getTok(); 1016 EndLoc = Tok.getEndLoc(); 1017 1018 if (Tok.isNot(AsmToken::Identifier)) { 1019 if (isParsingIntelSyntax()) return true; 1020 return Error(StartLoc, "invalid register name", 1021 SMRange(StartLoc, EndLoc)); 1022 } 1023 1024 RegNo = MatchRegisterName(Tok.getString()); 1025 1026 // If the match failed, try the register name as lowercase. 1027 if (RegNo == 0) 1028 RegNo = MatchRegisterName(Tok.getString().lower()); 1029 1030 if (!is64BitMode()) { 1031 // FIXME: This should be done using Requires<In32BitMode> and 1032 // Requires<In64BitMode> so "eiz" usage in 64-bit instructions can be also 1033 // checked. 1034 // FIXME: Check AH, CH, DH, BH cannot be used in an instruction requiring a 1035 // REX prefix. 1036 if (RegNo == X86::RIZ || 1037 X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo) || 1038 X86II::isX86_64NonExtLowByteReg(RegNo) || 1039 X86II::isX86_64ExtendedReg(RegNo)) 1040 return Error(StartLoc, "register %" 1041 + Tok.getString() + " is only available in 64-bit mode", 1042 SMRange(StartLoc, EndLoc)); 1043 } 1044 1045 // Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens. 1046 if (RegNo == 0 && (Tok.getString() == "st" || Tok.getString() == "ST")) { 1047 RegNo = X86::ST0; 1048 Parser.Lex(); // Eat 'st' 1049 1050 // Check to see if we have '(4)' after %st. 1051 if (getLexer().isNot(AsmToken::LParen)) 1052 return false; 1053 // Lex the paren. 1054 getParser().Lex(); 1055 1056 const AsmToken &IntTok = Parser.getTok(); 1057 if (IntTok.isNot(AsmToken::Integer)) 1058 return Error(IntTok.getLoc(), "expected stack index"); 1059 switch (IntTok.getIntVal()) { 1060 case 0: RegNo = X86::ST0; break; 1061 case 1: RegNo = X86::ST1; break; 1062 case 2: RegNo = X86::ST2; break; 1063 case 3: RegNo = X86::ST3; break; 1064 case 4: RegNo = X86::ST4; break; 1065 case 5: RegNo = X86::ST5; break; 1066 case 6: RegNo = X86::ST6; break; 1067 case 7: RegNo = X86::ST7; break; 1068 default: return Error(IntTok.getLoc(), "invalid stack index"); 1069 } 1070 1071 if (getParser().Lex().isNot(AsmToken::RParen)) 1072 return Error(Parser.getTok().getLoc(), "expected ')'"); 1073 1074 EndLoc = Parser.getTok().getEndLoc(); 1075 Parser.Lex(); // Eat ')' 1076 return false; 1077 } 1078 1079 EndLoc = Parser.getTok().getEndLoc(); 1080 1081 // If this is "db[0-7]", match it as an alias 1082 // for dr[0-7]. 1083 if (RegNo == 0 && Tok.getString().size() == 3 && 1084 Tok.getString().startswith("db")) { 1085 switch (Tok.getString()[2]) { 1086 case '0': RegNo = X86::DR0; break; 1087 case '1': RegNo = X86::DR1; break; 1088 case '2': RegNo = X86::DR2; break; 1089 case '3': RegNo = X86::DR3; break; 1090 case '4': RegNo = X86::DR4; break; 1091 case '5': RegNo = X86::DR5; break; 1092 case '6': RegNo = X86::DR6; break; 1093 case '7': RegNo = X86::DR7; break; 1094 } 1095 1096 if (RegNo != 0) { 1097 EndLoc = Parser.getTok().getEndLoc(); 1098 Parser.Lex(); // Eat it. 1099 return false; 1100 } 1101 } 1102 1103 if (RegNo == 0) { 1104 if (isParsingIntelSyntax()) return true; 1105 return Error(StartLoc, "invalid register name", 1106 SMRange(StartLoc, EndLoc)); 1107 } 1108 1109 Parser.Lex(); // Eat identifier token. 1110 return false; 1111} 1112 1113X86Operand *X86AsmParser::ParseOperand() { 1114 if (isParsingIntelSyntax()) 1115 return ParseIntelOperand(); 1116 return ParseATTOperand(); 1117} 1118 1119/// getIntelMemOperandSize - Return intel memory operand size. 1120static unsigned getIntelMemOperandSize(StringRef OpStr) { 1121 unsigned Size = StringSwitch<unsigned>(OpStr) 1122 .Cases("BYTE", "byte", 8) 1123 .Cases("WORD", "word", 16) 1124 .Cases("DWORD", "dword", 32) 1125 .Cases("QWORD", "qword", 64) 1126 .Cases("XWORD", "xword", 80) 1127 .Cases("XMMWORD", "xmmword", 128) 1128 .Cases("YMMWORD", "ymmword", 256) 1129 .Default(0); 1130 return Size; 1131} 1132 1133X86Operand * 1134X86AsmParser::CreateMemForInlineAsm(unsigned SegReg, const MCExpr *Disp, 1135 unsigned BaseReg, unsigned IndexReg, 1136 unsigned Scale, SMLoc Start, SMLoc End, 1137 unsigned Size, StringRef Identifier, 1138 InlineAsmIdentifierInfo &Info){ 1139 if (isa<MCSymbolRefExpr>(Disp)) { 1140 // If this is not a VarDecl then assume it is a FuncDecl or some other label 1141 // reference. We need an 'r' constraint here, so we need to create register 1142 // operand to ensure proper matching. Just pick a GPR based on the size of 1143 // a pointer. 1144 if (!Info.IsVarDecl) { 1145 unsigned RegNo = is64BitMode() ? X86::RBX : X86::EBX; 1146 return X86Operand::CreateReg(RegNo, Start, End, /*AddressOf=*/true, 1147 SMLoc(), Identifier, Info.OpDecl); 1148 } 1149 if (!Size) { 1150 Size = Info.Type * 8; // Size is in terms of bits in this context. 1151 if (Size) 1152 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_SizeDirective, Start, 1153 /*Len=*/0, Size)); 1154 } 1155 } 1156 1157 // When parsing inline assembly we set the base register to a non-zero value 1158 // if we don't know the actual value at this time. This is necessary to 1159 // get the matching correct in some cases. 1160 BaseReg = BaseReg ? BaseReg : 1; 1161 return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale, Start, 1162 End, Size, Identifier, Info.OpDecl); 1163} 1164 1165static void 1166RewriteIntelBracExpression(SmallVectorImpl<AsmRewrite> *AsmRewrites, 1167 StringRef SymName, int64_t ImmDisp, 1168 int64_t FinalImmDisp, SMLoc &BracLoc, 1169 SMLoc &StartInBrac, SMLoc &End) { 1170 // Remove the '[' and ']' from the IR string. 1171 AsmRewrites->push_back(AsmRewrite(AOK_Skip, BracLoc, 1)); 1172 AsmRewrites->push_back(AsmRewrite(AOK_Skip, End, 1)); 1173 1174 // If ImmDisp is non-zero, then we parsed a displacement before the 1175 // bracketed expression (i.e., ImmDisp [ BaseReg + Scale*IndexReg + Disp]) 1176 // If ImmDisp doesn't match the displacement computed by the state machine 1177 // then we have an additional displacement in the bracketed expression. 1178 if (ImmDisp != FinalImmDisp) { 1179 if (ImmDisp) { 1180 // We have an immediate displacement before the bracketed expression. 1181 // Adjust this to match the final immediate displacement. 1182 bool Found = false; 1183 for (SmallVectorImpl<AsmRewrite>::iterator I = AsmRewrites->begin(), 1184 E = AsmRewrites->end(); I != E; ++I) { 1185 if ((*I).Loc.getPointer() > BracLoc.getPointer()) 1186 continue; 1187 if ((*I).Kind == AOK_ImmPrefix || (*I).Kind == AOK_Imm) { 1188 assert (!Found && "ImmDisp already rewritten."); 1189 (*I).Kind = AOK_Imm; 1190 (*I).Len = BracLoc.getPointer() - (*I).Loc.getPointer(); 1191 (*I).Val = FinalImmDisp; 1192 Found = true; 1193 break; 1194 } 1195 } 1196 assert (Found && "Unable to rewrite ImmDisp."); 1197 } else { 1198 // We have a symbolic and an immediate displacement, but no displacement 1199 // before the bracketed expression. Put the immediate displacement 1200 // before the bracketed expression. 1201 AsmRewrites->push_back(AsmRewrite(AOK_Imm, BracLoc, 0, FinalImmDisp)); 1202 } 1203 } 1204 // Remove all the ImmPrefix rewrites within the brackets. 1205 for (SmallVectorImpl<AsmRewrite>::iterator I = AsmRewrites->begin(), 1206 E = AsmRewrites->end(); I != E; ++I) { 1207 if ((*I).Loc.getPointer() < StartInBrac.getPointer()) 1208 continue; 1209 if ((*I).Kind == AOK_ImmPrefix) 1210 (*I).Kind = AOK_Delete; 1211 } 1212 const char *SymLocPtr = SymName.data(); 1213 // Skip everything before the symbol. 1214 if (unsigned Len = SymLocPtr - StartInBrac.getPointer()) { 1215 assert(Len > 0 && "Expected a non-negative length."); 1216 AsmRewrites->push_back(AsmRewrite(AOK_Skip, StartInBrac, Len)); 1217 } 1218 // Skip everything after the symbol. 1219 if (unsigned Len = End.getPointer() - (SymLocPtr + SymName.size())) { 1220 SMLoc Loc = SMLoc::getFromPointer(SymLocPtr + SymName.size()); 1221 assert(Len > 0 && "Expected a non-negative length."); 1222 AsmRewrites->push_back(AsmRewrite(AOK_Skip, Loc, Len)); 1223 } 1224} 1225 1226X86Operand * 1227X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) { 1228 const AsmToken &Tok = Parser.getTok(); 1229 1230 bool Done = false; 1231 while (!Done) { 1232 bool UpdateLocLex = true; 1233 1234 // The period in the dot operator (e.g., [ebx].foo.bar) is parsed as an 1235 // identifier. Don't try an parse it as a register. 1236 if (Tok.getString().startswith(".")) 1237 break; 1238 1239 // If we're parsing an immediate expression, we don't expect a '['. 1240 if (SM.getStopOnLBrac() && getLexer().getKind() == AsmToken::LBrac) 1241 break; 1242 1243 switch (getLexer().getKind()) { 1244 default: { 1245 if (SM.isValidEndState()) { 1246 Done = true; 1247 break; 1248 } 1249 return ErrorOperand(Tok.getLoc(), "Unexpected token!"); 1250 } 1251 case AsmToken::EndOfStatement: { 1252 Done = true; 1253 break; 1254 } 1255 case AsmToken::Identifier: { 1256 // This could be a register or a symbolic displacement. 1257 unsigned TmpReg; 1258 const MCExpr *Val; 1259 SMLoc IdentLoc = Tok.getLoc(); 1260 StringRef Identifier = Tok.getString(); 1261 if(!ParseRegister(TmpReg, IdentLoc, End)) { 1262 SM.onRegister(TmpReg); 1263 UpdateLocLex = false; 1264 break; 1265 } else { 1266 if (!isParsingInlineAsm()) { 1267 if (getParser().parsePrimaryExpr(Val, End)) 1268 return ErrorOperand(Tok.getLoc(), "Unexpected identifier!"); 1269 } else { 1270 InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo(); 1271 if (X86Operand *Err = ParseIntelIdentifier(Val, Identifier, Info, 1272 /*Unevaluated*/ false, End)) 1273 return Err; 1274 } 1275 SM.onIdentifierExpr(Val, Identifier); 1276 UpdateLocLex = false; 1277 break; 1278 } 1279 return ErrorOperand(Tok.getLoc(), "Unexpected identifier!"); 1280 } 1281 case AsmToken::Integer: 1282 if (isParsingInlineAsm() && SM.getAddImmPrefix()) 1283 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix, 1284 Tok.getLoc())); 1285 SM.onInteger(Tok.getIntVal()); 1286 break; 1287 case AsmToken::Plus: SM.onPlus(); break; 1288 case AsmToken::Minus: SM.onMinus(); break; 1289 case AsmToken::Star: SM.onStar(); break; 1290 case AsmToken::Slash: SM.onDivide(); break; 1291 case AsmToken::LBrac: SM.onLBrac(); break; 1292 case AsmToken::RBrac: SM.onRBrac(); break; 1293 case AsmToken::LParen: SM.onLParen(); break; 1294 case AsmToken::RParen: SM.onRParen(); break; 1295 } 1296 if (SM.hadError()) 1297 return ErrorOperand(Tok.getLoc(), "Unexpected token!"); 1298 1299 if (!Done && UpdateLocLex) { 1300 End = Tok.getLoc(); 1301 Parser.Lex(); // Consume the token. 1302 } 1303 } 1304 return 0; 1305} 1306 1307X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, SMLoc Start, 1308 int64_t ImmDisp, 1309 unsigned Size) { 1310 const AsmToken &Tok = Parser.getTok(); 1311 SMLoc BracLoc = Tok.getLoc(), End = Tok.getEndLoc(); 1312 if (getLexer().isNot(AsmToken::LBrac)) 1313 return ErrorOperand(BracLoc, "Expected '[' token!"); 1314 Parser.Lex(); // Eat '[' 1315 1316 SMLoc StartInBrac = Tok.getLoc(); 1317 // Parse [ Symbol + ImmDisp ] and [ BaseReg + Scale*IndexReg + ImmDisp ]. We 1318 // may have already parsed an immediate displacement before the bracketed 1319 // expression. 1320 IntelExprStateMachine SM(ImmDisp, /*StopOnLBrac=*/false, /*AddImmPrefix=*/true); 1321 if (X86Operand *Err = ParseIntelExpression(SM, End)) 1322 return Err; 1323 1324 const MCExpr *Disp; 1325 if (const MCExpr *Sym = SM.getSym()) { 1326 // A symbolic displacement. 1327 Disp = Sym; 1328 if (isParsingInlineAsm()) 1329 RewriteIntelBracExpression(InstInfo->AsmRewrites, SM.getSymName(), 1330 ImmDisp, SM.getImm(), BracLoc, StartInBrac, 1331 End); 1332 } else { 1333 // An immediate displacement only. 1334 Disp = MCConstantExpr::Create(SM.getImm(), getContext()); 1335 } 1336 1337 // Parse the dot operator (e.g., [ebx].foo.bar). 1338 if (Tok.getString().startswith(".")) { 1339 const MCExpr *NewDisp; 1340 if (X86Operand *Err = ParseIntelDotOperator(Disp, NewDisp)) 1341 return Err; 1342 1343 End = Tok.getEndLoc(); 1344 Parser.Lex(); // Eat the field. 1345 Disp = NewDisp; 1346 } 1347 1348 int BaseReg = SM.getBaseReg(); 1349 int IndexReg = SM.getIndexReg(); 1350 int Scale = SM.getScale(); 1351 if (!isParsingInlineAsm()) { 1352 // handle [-42] 1353 if (!BaseReg && !IndexReg) { 1354 if (!SegReg) 1355 return X86Operand::CreateMem(Disp, Start, End, Size); 1356 else 1357 return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, Start, End, Size); 1358 } 1359 return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale, Start, 1360 End, Size); 1361 } 1362 1363 InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo(); 1364 return CreateMemForInlineAsm(SegReg, Disp, BaseReg, IndexReg, Scale, Start, 1365 End, Size, SM.getSymName(), Info); 1366} 1367 1368// Inline assembly may use variable names with namespace alias qualifiers. 1369X86Operand *X86AsmParser::ParseIntelIdentifier(const MCExpr *&Val, 1370 StringRef &Identifier, 1371 InlineAsmIdentifierInfo &Info, 1372 bool IsUnevaluatedOperand, 1373 SMLoc &End) { 1374 assert (isParsingInlineAsm() && "Expected to be parsing inline assembly."); 1375 Val = 0; 1376 1377 StringRef LineBuf(Identifier.data()); 1378 SemaCallback->LookupInlineAsmIdentifier(LineBuf, Info, IsUnevaluatedOperand); 1379 1380 const AsmToken &Tok = Parser.getTok(); 1381 1382 // Advance the token stream until the end of the current token is 1383 // after the end of what the frontend claimed. 1384 const char *EndPtr = Tok.getLoc().getPointer() + LineBuf.size(); 1385 while (true) { 1386 End = Tok.getEndLoc(); 1387 getLexer().Lex(); 1388 1389 assert(End.getPointer() <= EndPtr && "frontend claimed part of a token?"); 1390 if (End.getPointer() == EndPtr) break; 1391 } 1392 1393 // Create the symbol reference. 1394 Identifier = LineBuf; 1395 MCSymbol *Sym = getContext().GetOrCreateSymbol(Identifier); 1396 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None; 1397 Val = MCSymbolRefExpr::Create(Sym, Variant, getParser().getContext()); 1398 return 0; 1399} 1400 1401/// ParseIntelMemOperand - Parse intel style memory operand. 1402X86Operand *X86AsmParser::ParseIntelMemOperand(unsigned SegReg, 1403 int64_t ImmDisp, 1404 SMLoc Start) { 1405 const AsmToken &Tok = Parser.getTok(); 1406 SMLoc End; 1407 1408 unsigned Size = getIntelMemOperandSize(Tok.getString()); 1409 if (Size) { 1410 Parser.Lex(); // Eat operand size (e.g., byte, word). 1411 if (Tok.getString() != "PTR" && Tok.getString() != "ptr") 1412 return ErrorOperand(Start, "Expected 'PTR' or 'ptr' token!"); 1413 Parser.Lex(); // Eat ptr. 1414 } 1415 1416 // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ]. 1417 if (getLexer().is(AsmToken::Integer)) { 1418 if (isParsingInlineAsm()) 1419 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix, 1420 Tok.getLoc())); 1421 int64_t ImmDisp = Tok.getIntVal(); 1422 Parser.Lex(); // Eat the integer. 1423 if (getLexer().isNot(AsmToken::LBrac)) 1424 return ErrorOperand(Start, "Expected '[' token!"); 1425 return ParseIntelBracExpression(SegReg, Start, ImmDisp, Size); 1426 } 1427 1428 if (getLexer().is(AsmToken::LBrac)) 1429 return ParseIntelBracExpression(SegReg, Start, ImmDisp, Size); 1430 1431 if (!ParseRegister(SegReg, Start, End)) { 1432 // Handel SegReg : [ ... ] 1433 if (getLexer().isNot(AsmToken::Colon)) 1434 return ErrorOperand(Start, "Expected ':' token!"); 1435 Parser.Lex(); // Eat : 1436 if (getLexer().isNot(AsmToken::LBrac)) 1437 return ErrorOperand(Start, "Expected '[' token!"); 1438 return ParseIntelBracExpression(SegReg, Start, ImmDisp, Size); 1439 } 1440 1441 const MCExpr *Val; 1442 if (!isParsingInlineAsm()) { 1443 if (getParser().parsePrimaryExpr(Val, End)) 1444 return ErrorOperand(Tok.getLoc(), "Unexpected token!"); 1445 1446 return X86Operand::CreateMem(Val, Start, End, Size); 1447 } 1448 1449 InlineAsmIdentifierInfo Info; 1450 StringRef Identifier = Tok.getString(); 1451 if (X86Operand *Err = ParseIntelIdentifier(Val, Identifier, Info, 1452 /*Unevaluated*/ false, End)) 1453 return Err; 1454 return CreateMemForInlineAsm(/*SegReg=*/0, Val, /*BaseReg=*/0,/*IndexReg=*/0, 1455 /*Scale=*/1, Start, End, Size, Identifier, Info); 1456} 1457 1458/// Parse the '.' operator. 1459X86Operand *X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp, 1460 const MCExpr *&NewDisp) { 1461 const AsmToken &Tok = Parser.getTok(); 1462 int64_t OrigDispVal, DotDispVal; 1463 1464 // FIXME: Handle non-constant expressions. 1465 if (const MCConstantExpr *OrigDisp = dyn_cast<MCConstantExpr>(Disp)) 1466 OrigDispVal = OrigDisp->getValue(); 1467 else 1468 return ErrorOperand(Tok.getLoc(), "Non-constant offsets are not supported!"); 1469 1470 // Drop the '.'. 1471 StringRef DotDispStr = Tok.getString().drop_front(1); 1472 1473 // .Imm gets lexed as a real. 1474 if (Tok.is(AsmToken::Real)) { 1475 APInt DotDisp; 1476 DotDispStr.getAsInteger(10, DotDisp); 1477 DotDispVal = DotDisp.getZExtValue(); 1478 } else if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) { 1479 unsigned DotDisp; 1480 std::pair<StringRef, StringRef> BaseMember = DotDispStr.split('.'); 1481 if (SemaCallback->LookupInlineAsmField(BaseMember.first, BaseMember.second, 1482 DotDisp)) 1483 return ErrorOperand(Tok.getLoc(), "Unable to lookup field reference!"); 1484 DotDispVal = DotDisp; 1485 } else 1486 return ErrorOperand(Tok.getLoc(), "Unexpected token type!"); 1487 1488 if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) { 1489 SMLoc Loc = SMLoc::getFromPointer(DotDispStr.data()); 1490 unsigned Len = DotDispStr.size(); 1491 unsigned Val = OrigDispVal + DotDispVal; 1492 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_DotOperator, Loc, Len, 1493 Val)); 1494 } 1495 1496 NewDisp = MCConstantExpr::Create(OrigDispVal + DotDispVal, getContext()); 1497 return 0; 1498} 1499 1500/// Parse the 'offset' operator. This operator is used to specify the 1501/// location rather then the content of a variable. 1502X86Operand *X86AsmParser::ParseIntelOffsetOfOperator() { 1503 const AsmToken &Tok = Parser.getTok(); 1504 SMLoc OffsetOfLoc = Tok.getLoc(); 1505 Parser.Lex(); // Eat offset. 1506 1507 const MCExpr *Val; 1508 InlineAsmIdentifierInfo Info; 1509 SMLoc Start = Tok.getLoc(), End; 1510 StringRef Identifier = Tok.getString(); 1511 if (X86Operand *Err = ParseIntelIdentifier(Val, Identifier, Info, 1512 /*Unevaluated*/ false, End)) 1513 return Err; 1514 1515 // Don't emit the offset operator. 1516 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Skip, OffsetOfLoc, 7)); 1517 1518 // The offset operator will have an 'r' constraint, thus we need to create 1519 // register operand to ensure proper matching. Just pick a GPR based on 1520 // the size of a pointer. 1521 unsigned RegNo = is64BitMode() ? X86::RBX : X86::EBX; 1522 return X86Operand::CreateReg(RegNo, Start, End, /*GetAddress=*/true, 1523 OffsetOfLoc, Identifier, Info.OpDecl); 1524} 1525 1526enum IntelOperatorKind { 1527 IOK_LENGTH, 1528 IOK_SIZE, 1529 IOK_TYPE 1530}; 1531 1532/// Parse the 'LENGTH', 'TYPE' and 'SIZE' operators. The LENGTH operator 1533/// returns the number of elements in an array. It returns the value 1 for 1534/// non-array variables. The SIZE operator returns the size of a C or C++ 1535/// variable. A variable's size is the product of its LENGTH and TYPE. The 1536/// TYPE operator returns the size of a C or C++ type or variable. If the 1537/// variable is an array, TYPE returns the size of a single element. 1538X86Operand *X86AsmParser::ParseIntelOperator(unsigned OpKind) { 1539 const AsmToken &Tok = Parser.getTok(); 1540 SMLoc TypeLoc = Tok.getLoc(); 1541 Parser.Lex(); // Eat operator. 1542 1543 const MCExpr *Val = 0; 1544 InlineAsmIdentifierInfo Info; 1545 SMLoc Start = Tok.getLoc(), End; 1546 StringRef Identifier = Tok.getString(); 1547 if (X86Operand *Err = ParseIntelIdentifier(Val, Identifier, Info, 1548 /*Unevaluated*/ true, End)) 1549 return Err; 1550 1551 unsigned CVal = 0; 1552 switch(OpKind) { 1553 default: llvm_unreachable("Unexpected operand kind!"); 1554 case IOK_LENGTH: CVal = Info.Length; break; 1555 case IOK_SIZE: CVal = Info.Size; break; 1556 case IOK_TYPE: CVal = Info.Type; break; 1557 } 1558 1559 // Rewrite the type operator and the C or C++ type or variable in terms of an 1560 // immediate. E.g. TYPE foo -> $$4 1561 unsigned Len = End.getPointer() - TypeLoc.getPointer(); 1562 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, TypeLoc, Len, CVal)); 1563 1564 const MCExpr *Imm = MCConstantExpr::Create(CVal, getContext()); 1565 return X86Operand::CreateImm(Imm, Start, End); 1566} 1567 1568X86Operand *X86AsmParser::ParseIntelOperand() { 1569 const AsmToken &Tok = Parser.getTok(); 1570 SMLoc Start = Tok.getLoc(), End; 1571 1572 // Offset, length, type and size operators. 1573 if (isParsingInlineAsm()) { 1574 StringRef AsmTokStr = Tok.getString(); 1575 if (AsmTokStr == "offset" || AsmTokStr == "OFFSET") 1576 return ParseIntelOffsetOfOperator(); 1577 if (AsmTokStr == "length" || AsmTokStr == "LENGTH") 1578 return ParseIntelOperator(IOK_LENGTH); 1579 if (AsmTokStr == "size" || AsmTokStr == "SIZE") 1580 return ParseIntelOperator(IOK_SIZE); 1581 if (AsmTokStr == "type" || AsmTokStr == "TYPE") 1582 return ParseIntelOperator(IOK_TYPE); 1583 } 1584 1585 // Immediate. 1586 if (getLexer().is(AsmToken::Integer) || getLexer().is(AsmToken::Minus) || 1587 getLexer().is(AsmToken::LParen)) { 1588 AsmToken StartTok = Tok; 1589 IntelExprStateMachine SM(/*Imm=*/0, /*StopOnLBrac=*/true, 1590 /*AddImmPrefix=*/false); 1591 if (X86Operand *Err = ParseIntelExpression(SM, End)) 1592 return Err; 1593 1594 int64_t Imm = SM.getImm(); 1595 if (isParsingInlineAsm()) { 1596 unsigned Len = Tok.getLoc().getPointer() - Start.getPointer(); 1597 if (StartTok.getString().size() == Len) 1598 // Just add a prefix if this wasn't a complex immediate expression. 1599 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix, Start)); 1600 else 1601 // Otherwise, rewrite the complex expression as a single immediate. 1602 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, Start, Len, Imm)); 1603 } 1604 1605 if (getLexer().isNot(AsmToken::LBrac)) { 1606 const MCExpr *ImmExpr = MCConstantExpr::Create(Imm, getContext()); 1607 return X86Operand::CreateImm(ImmExpr, Start, End); 1608 } 1609 1610 // Only positive immediates are valid. 1611 if (Imm < 0) 1612 return ErrorOperand(Start, "expected a positive immediate displacement " 1613 "before bracketed expr."); 1614 1615 // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ]. 1616 return ParseIntelMemOperand(/*SegReg=*/0, Imm, Start); 1617 } 1618 1619 // Register. 1620 unsigned RegNo = 0; 1621 if (!ParseRegister(RegNo, Start, End)) { 1622 // If this is a segment register followed by a ':', then this is the start 1623 // of a memory reference, otherwise this is a normal register reference. 1624 if (getLexer().isNot(AsmToken::Colon)) 1625 return X86Operand::CreateReg(RegNo, Start, End); 1626 1627 getParser().Lex(); // Eat the colon. 1628 return ParseIntelMemOperand(/*SegReg=*/RegNo, /*Disp=*/0, Start); 1629 } 1630 1631 // Memory operand. 1632 return ParseIntelMemOperand(/*SegReg=*/0, /*Disp=*/0, Start); 1633} 1634 1635X86Operand *X86AsmParser::ParseATTOperand() { 1636 switch (getLexer().getKind()) { 1637 default: 1638 // Parse a memory operand with no segment register. 1639 return ParseMemOperand(0, Parser.getTok().getLoc()); 1640 case AsmToken::Percent: { 1641 // Read the register. 1642 unsigned RegNo; 1643 SMLoc Start, End; 1644 if (ParseRegister(RegNo, Start, End)) return 0; 1645 if (RegNo == X86::EIZ || RegNo == X86::RIZ) { 1646 Error(Start, "%eiz and %riz can only be used as index registers", 1647 SMRange(Start, End)); 1648 return 0; 1649 } 1650 1651 // If this is a segment register followed by a ':', then this is the start 1652 // of a memory reference, otherwise this is a normal register reference. 1653 if (getLexer().isNot(AsmToken::Colon)) 1654 return X86Operand::CreateReg(RegNo, Start, End); 1655 1656 getParser().Lex(); // Eat the colon. 1657 return ParseMemOperand(RegNo, Start); 1658 } 1659 case AsmToken::Dollar: { 1660 // $42 -> immediate. 1661 SMLoc Start = Parser.getTok().getLoc(), End; 1662 Parser.Lex(); 1663 const MCExpr *Val; 1664 if (getParser().parseExpression(Val, End)) 1665 return 0; 1666 return X86Operand::CreateImm(Val, Start, End); 1667 } 1668 } 1669} 1670 1671/// ParseMemOperand: segment: disp(basereg, indexreg, scale). The '%ds:' prefix 1672/// has already been parsed if present. 1673X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) { 1674 1675 // We have to disambiguate a parenthesized expression "(4+5)" from the start 1676 // of a memory operand with a missing displacement "(%ebx)" or "(,%eax)". The 1677 // only way to do this without lookahead is to eat the '(' and see what is 1678 // after it. 1679 const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext()); 1680 if (getLexer().isNot(AsmToken::LParen)) { 1681 SMLoc ExprEnd; 1682 if (getParser().parseExpression(Disp, ExprEnd)) return 0; 1683 1684 // After parsing the base expression we could either have a parenthesized 1685 // memory address or not. If not, return now. If so, eat the (. 1686 if (getLexer().isNot(AsmToken::LParen)) { 1687 // Unless we have a segment register, treat this as an immediate. 1688 if (SegReg == 0) 1689 return X86Operand::CreateMem(Disp, MemStart, ExprEnd); 1690 return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd); 1691 } 1692 1693 // Eat the '('. 1694 Parser.Lex(); 1695 } else { 1696 // Okay, we have a '('. We don't know if this is an expression or not, but 1697 // so we have to eat the ( to see beyond it. 1698 SMLoc LParenLoc = Parser.getTok().getLoc(); 1699 Parser.Lex(); // Eat the '('. 1700 1701 if (getLexer().is(AsmToken::Percent) || getLexer().is(AsmToken::Comma)) { 1702 // Nothing to do here, fall into the code below with the '(' part of the 1703 // memory operand consumed. 1704 } else { 1705 SMLoc ExprEnd; 1706 1707 // It must be an parenthesized expression, parse it now. 1708 if (getParser().parseParenExpression(Disp, ExprEnd)) 1709 return 0; 1710 1711 // After parsing the base expression we could either have a parenthesized 1712 // memory address or not. If not, return now. If so, eat the (. 1713 if (getLexer().isNot(AsmToken::LParen)) { 1714 // Unless we have a segment register, treat this as an immediate. 1715 if (SegReg == 0) 1716 return X86Operand::CreateMem(Disp, LParenLoc, ExprEnd); 1717 return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd); 1718 } 1719 1720 // Eat the '('. 1721 Parser.Lex(); 1722 } 1723 } 1724 1725 // If we reached here, then we just ate the ( of the memory operand. Process 1726 // the rest of the memory operand. 1727 unsigned BaseReg = 0, IndexReg = 0, Scale = 1; 1728 SMLoc IndexLoc; 1729 1730 if (getLexer().is(AsmToken::Percent)) { 1731 SMLoc StartLoc, EndLoc; 1732 if (ParseRegister(BaseReg, StartLoc, EndLoc)) return 0; 1733 if (BaseReg == X86::EIZ || BaseReg == X86::RIZ) { 1734 Error(StartLoc, "eiz and riz can only be used as index registers", 1735 SMRange(StartLoc, EndLoc)); 1736 return 0; 1737 } 1738 } 1739 1740 if (getLexer().is(AsmToken::Comma)) { 1741 Parser.Lex(); // Eat the comma. 1742 IndexLoc = Parser.getTok().getLoc(); 1743 1744 // Following the comma we should have either an index register, or a scale 1745 // value. We don't support the later form, but we want to parse it 1746 // correctly. 1747 // 1748 // Not that even though it would be completely consistent to support syntax 1749 // like "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this. 1750 if (getLexer().is(AsmToken::Percent)) { 1751 SMLoc L; 1752 if (ParseRegister(IndexReg, L, L)) return 0; 1753 1754 if (getLexer().isNot(AsmToken::RParen)) { 1755 // Parse the scale amount: 1756 // ::= ',' [scale-expression] 1757 if (getLexer().isNot(AsmToken::Comma)) { 1758 Error(Parser.getTok().getLoc(), 1759 "expected comma in scale expression"); 1760 return 0; 1761 } 1762 Parser.Lex(); // Eat the comma. 1763 1764 if (getLexer().isNot(AsmToken::RParen)) { 1765 SMLoc Loc = Parser.getTok().getLoc(); 1766 1767 int64_t ScaleVal; 1768 if (getParser().parseAbsoluteExpression(ScaleVal)){ 1769 Error(Loc, "expected scale expression"); 1770 return 0; 1771 } 1772 1773 // Validate the scale amount. 1774 if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 && ScaleVal != 8){ 1775 Error(Loc, "scale factor in address must be 1, 2, 4 or 8"); 1776 return 0; 1777 } 1778 Scale = (unsigned)ScaleVal; 1779 } 1780 } 1781 } else if (getLexer().isNot(AsmToken::RParen)) { 1782 // A scale amount without an index is ignored. 1783 // index. 1784 SMLoc Loc = Parser.getTok().getLoc(); 1785 1786 int64_t Value; 1787 if (getParser().parseAbsoluteExpression(Value)) 1788 return 0; 1789 1790 if (Value != 1) 1791 Warning(Loc, "scale factor without index register is ignored"); 1792 Scale = 1; 1793 } 1794 } 1795 1796 // Ok, we've eaten the memory operand, verify we have a ')' and eat it too. 1797 if (getLexer().isNot(AsmToken::RParen)) { 1798 Error(Parser.getTok().getLoc(), "unexpected token in memory operand"); 1799 return 0; 1800 } 1801 SMLoc MemEnd = Parser.getTok().getEndLoc(); 1802 Parser.Lex(); // Eat the ')'. 1803 1804 // If we have both a base register and an index register make sure they are 1805 // both 64-bit or 32-bit registers. 1806 // To support VSIB, IndexReg can be 128-bit or 256-bit registers. 1807 if (BaseReg != 0 && IndexReg != 0) { 1808 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) && 1809 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) || 1810 X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg)) && 1811 IndexReg != X86::RIZ) { 1812 Error(IndexLoc, "index register is 32-bit, but base register is 64-bit"); 1813 return 0; 1814 } 1815 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) && 1816 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) || 1817 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) && 1818 IndexReg != X86::EIZ){ 1819 Error(IndexLoc, "index register is 64-bit, but base register is 32-bit"); 1820 return 0; 1821 } 1822 } 1823 1824 return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale, 1825 MemStart, MemEnd); 1826} 1827 1828bool X86AsmParser:: 1829ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, 1830 SmallVectorImpl<MCParsedAsmOperand*> &Operands) { 1831 InstInfo = &Info; 1832 StringRef PatchedName = Name; 1833 1834 // FIXME: Hack to recognize setneb as setne. 1835 if (PatchedName.startswith("set") && PatchedName.endswith("b") && 1836 PatchedName != "setb" && PatchedName != "setnb") 1837 PatchedName = PatchedName.substr(0, Name.size()-1); 1838 1839 // FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}. 1840 const MCExpr *ExtraImmOp = 0; 1841 if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) && 1842 (PatchedName.endswith("ss") || PatchedName.endswith("sd") || 1843 PatchedName.endswith("ps") || PatchedName.endswith("pd"))) { 1844 bool IsVCMP = PatchedName[0] == 'v'; 1845 unsigned SSECCIdx = IsVCMP ? 4 : 3; 1846 unsigned SSEComparisonCode = StringSwitch<unsigned>( 1847 PatchedName.slice(SSECCIdx, PatchedName.size() - 2)) 1848 .Case("eq", 0x00) 1849 .Case("lt", 0x01) 1850 .Case("le", 0x02) 1851 .Case("unord", 0x03) 1852 .Case("neq", 0x04) 1853 .Case("nlt", 0x05) 1854 .Case("nle", 0x06) 1855 .Case("ord", 0x07) 1856 /* AVX only from here */ 1857 .Case("eq_uq", 0x08) 1858 .Case("nge", 0x09) 1859 .Case("ngt", 0x0A) 1860 .Case("false", 0x0B) 1861 .Case("neq_oq", 0x0C) 1862 .Case("ge", 0x0D) 1863 .Case("gt", 0x0E) 1864 .Case("true", 0x0F) 1865 .Case("eq_os", 0x10) 1866 .Case("lt_oq", 0x11) 1867 .Case("le_oq", 0x12) 1868 .Case("unord_s", 0x13) 1869 .Case("neq_us", 0x14) 1870 .Case("nlt_uq", 0x15) 1871 .Case("nle_uq", 0x16) 1872 .Case("ord_s", 0x17) 1873 .Case("eq_us", 0x18) 1874 .Case("nge_uq", 0x19) 1875 .Case("ngt_uq", 0x1A) 1876 .Case("false_os", 0x1B) 1877 .Case("neq_os", 0x1C) 1878 .Case("ge_oq", 0x1D) 1879 .Case("gt_oq", 0x1E) 1880 .Case("true_us", 0x1F) 1881 .Default(~0U); 1882 if (SSEComparisonCode != ~0U && (IsVCMP || SSEComparisonCode < 8)) { 1883 ExtraImmOp = MCConstantExpr::Create(SSEComparisonCode, 1884 getParser().getContext()); 1885 if (PatchedName.endswith("ss")) { 1886 PatchedName = IsVCMP ? "vcmpss" : "cmpss"; 1887 } else if (PatchedName.endswith("sd")) { 1888 PatchedName = IsVCMP ? "vcmpsd" : "cmpsd"; 1889 } else if (PatchedName.endswith("ps")) { 1890 PatchedName = IsVCMP ? "vcmpps" : "cmpps"; 1891 } else { 1892 assert(PatchedName.endswith("pd") && "Unexpected mnemonic!"); 1893 PatchedName = IsVCMP ? "vcmppd" : "cmppd"; 1894 } 1895 } 1896 } 1897 1898 Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc)); 1899 1900 if (ExtraImmOp && !isParsingIntelSyntax()) 1901 Operands.push_back(X86Operand::CreateImm(ExtraImmOp, NameLoc, NameLoc)); 1902 1903 // Determine whether this is an instruction prefix. 1904 bool isPrefix = 1905 Name == "lock" || Name == "rep" || 1906 Name == "repe" || Name == "repz" || 1907 Name == "repne" || Name == "repnz" || 1908 Name == "rex64" || Name == "data16"; 1909 1910 1911 // This does the actual operand parsing. Don't parse any more if we have a 1912 // prefix juxtaposed with an operation like "lock incl 4(%rax)", because we 1913 // just want to parse the "lock" as the first instruction and the "incl" as 1914 // the next one. 1915 if (getLexer().isNot(AsmToken::EndOfStatement) && !isPrefix) { 1916 1917 // Parse '*' modifier. 1918 if (getLexer().is(AsmToken::Star)) { 1919 SMLoc Loc = Parser.getTok().getLoc(); 1920 Operands.push_back(X86Operand::CreateToken("*", Loc)); 1921 Parser.Lex(); // Eat the star. 1922 } 1923 1924 // Read the first operand. 1925 if (X86Operand *Op = ParseOperand()) 1926 Operands.push_back(Op); 1927 else { 1928 Parser.eatToEndOfStatement(); 1929 return true; 1930 } 1931 1932 while (getLexer().is(AsmToken::Comma)) { 1933 Parser.Lex(); // Eat the comma. 1934 1935 // Parse and remember the operand. 1936 if (X86Operand *Op = ParseOperand()) 1937 Operands.push_back(Op); 1938 else { 1939 Parser.eatToEndOfStatement(); 1940 return true; 1941 } 1942 } 1943 1944 if (getLexer().isNot(AsmToken::EndOfStatement)) { 1945 SMLoc Loc = getLexer().getLoc(); 1946 Parser.eatToEndOfStatement(); 1947 return Error(Loc, "unexpected token in argument list"); 1948 } 1949 } 1950 1951 if (getLexer().is(AsmToken::EndOfStatement)) 1952 Parser.Lex(); // Consume the EndOfStatement 1953 else if (isPrefix && getLexer().is(AsmToken::Slash)) 1954 Parser.Lex(); // Consume the prefix separator Slash 1955 1956 if (ExtraImmOp && isParsingIntelSyntax()) 1957 Operands.push_back(X86Operand::CreateImm(ExtraImmOp, NameLoc, NameLoc)); 1958 1959 // This is a terrible hack to handle "out[bwl]? %al, (%dx)" -> 1960 // "outb %al, %dx". Out doesn't take a memory form, but this is a widely 1961 // documented form in various unofficial manuals, so a lot of code uses it. 1962 if ((Name == "outb" || Name == "outw" || Name == "outl" || Name == "out") && 1963 Operands.size() == 3) { 1964 X86Operand &Op = *(X86Operand*)Operands.back(); 1965 if (Op.isMem() && Op.Mem.SegReg == 0 && 1966 isa<MCConstantExpr>(Op.Mem.Disp) && 1967 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 && 1968 Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) { 1969 SMLoc Loc = Op.getEndLoc(); 1970 Operands.back() = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc); 1971 delete &Op; 1972 } 1973 } 1974 // Same hack for "in[bwl]? (%dx), %al" -> "inb %dx, %al". 1975 if ((Name == "inb" || Name == "inw" || Name == "inl" || Name == "in") && 1976 Operands.size() == 3) { 1977 X86Operand &Op = *(X86Operand*)Operands.begin()[1]; 1978 if (Op.isMem() && Op.Mem.SegReg == 0 && 1979 isa<MCConstantExpr>(Op.Mem.Disp) && 1980 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 && 1981 Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) { 1982 SMLoc Loc = Op.getEndLoc(); 1983 Operands.begin()[1] = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc); 1984 delete &Op; 1985 } 1986 } 1987 // Transform "ins[bwl] %dx, %es:(%edi)" into "ins[bwl]" 1988 if (Name.startswith("ins") && Operands.size() == 3 && 1989 (Name == "insb" || Name == "insw" || Name == "insl")) { 1990 X86Operand &Op = *(X86Operand*)Operands.begin()[1]; 1991 X86Operand &Op2 = *(X86Operand*)Operands.begin()[2]; 1992 if (Op.isReg() && Op.getReg() == X86::DX && isDstOp(Op2)) { 1993 Operands.pop_back(); 1994 Operands.pop_back(); 1995 delete &Op; 1996 delete &Op2; 1997 } 1998 } 1999 2000 // Transform "outs[bwl] %ds:(%esi), %dx" into "out[bwl]" 2001 if (Name.startswith("outs") && Operands.size() == 3 && 2002 (Name == "outsb" || Name == "outsw" || Name == "outsl")) { 2003 X86Operand &Op = *(X86Operand*)Operands.begin()[1]; 2004 X86Operand &Op2 = *(X86Operand*)Operands.begin()[2]; 2005 if (isSrcOp(Op) && Op2.isReg() && Op2.getReg() == X86::DX) { 2006 Operands.pop_back(); 2007 Operands.pop_back(); 2008 delete &Op; 2009 delete &Op2; 2010 } 2011 } 2012 2013 // Transform "movs[bwl] %ds:(%esi), %es:(%edi)" into "movs[bwl]" 2014 if (Name.startswith("movs") && Operands.size() == 3 && 2015 (Name == "movsb" || Name == "movsw" || Name == "movsl" || 2016 (is64BitMode() && Name == "movsq"))) { 2017 X86Operand &Op = *(X86Operand*)Operands.begin()[1]; 2018 X86Operand &Op2 = *(X86Operand*)Operands.begin()[2]; 2019 if (isSrcOp(Op) && isDstOp(Op2)) { 2020 Operands.pop_back(); 2021 Operands.pop_back(); 2022 delete &Op; 2023 delete &Op2; 2024 } 2025 } 2026 // Transform "lods[bwl] %ds:(%esi),{%al,%ax,%eax,%rax}" into "lods[bwl]" 2027 if (Name.startswith("lods") && Operands.size() == 3 && 2028 (Name == "lods" || Name == "lodsb" || Name == "lodsw" || 2029 Name == "lodsl" || (is64BitMode() && Name == "lodsq"))) { 2030 X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]); 2031 X86Operand *Op2 = static_cast<X86Operand*>(Operands[2]); 2032 if (isSrcOp(*Op1) && Op2->isReg()) { 2033 const char *ins; 2034 unsigned reg = Op2->getReg(); 2035 bool isLods = Name == "lods"; 2036 if (reg == X86::AL && (isLods || Name == "lodsb")) 2037 ins = "lodsb"; 2038 else if (reg == X86::AX && (isLods || Name == "lodsw")) 2039 ins = "lodsw"; 2040 else if (reg == X86::EAX && (isLods || Name == "lodsl")) 2041 ins = "lodsl"; 2042 else if (reg == X86::RAX && (isLods || Name == "lodsq")) 2043 ins = "lodsq"; 2044 else 2045 ins = NULL; 2046 if (ins != NULL) { 2047 Operands.pop_back(); 2048 Operands.pop_back(); 2049 delete Op1; 2050 delete Op2; 2051 if (Name != ins) 2052 static_cast<X86Operand*>(Operands[0])->setTokenValue(ins); 2053 } 2054 } 2055 } 2056 // Transform "stos[bwl] {%al,%ax,%eax,%rax},%es:(%edi)" into "stos[bwl]" 2057 if (Name.startswith("stos") && Operands.size() == 3 && 2058 (Name == "stos" || Name == "stosb" || Name == "stosw" || 2059 Name == "stosl" || (is64BitMode() && Name == "stosq"))) { 2060 X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]); 2061 X86Operand *Op2 = static_cast<X86Operand*>(Operands[2]); 2062 if (isDstOp(*Op2) && Op1->isReg()) { 2063 const char *ins; 2064 unsigned reg = Op1->getReg(); 2065 bool isStos = Name == "stos"; 2066 if (reg == X86::AL && (isStos || Name == "stosb")) 2067 ins = "stosb"; 2068 else if (reg == X86::AX && (isStos || Name == "stosw")) 2069 ins = "stosw"; 2070 else if (reg == X86::EAX && (isStos || Name == "stosl")) 2071 ins = "stosl"; 2072 else if (reg == X86::RAX && (isStos || Name == "stosq")) 2073 ins = "stosq"; 2074 else 2075 ins = NULL; 2076 if (ins != NULL) { 2077 Operands.pop_back(); 2078 Operands.pop_back(); 2079 delete Op1; 2080 delete Op2; 2081 if (Name != ins) 2082 static_cast<X86Operand*>(Operands[0])->setTokenValue(ins); 2083 } 2084 } 2085 } 2086 2087 // FIXME: Hack to handle recognize s{hr,ar,hl} $1, <op>. Canonicalize to 2088 // "shift <op>". 2089 if ((Name.startswith("shr") || Name.startswith("sar") || 2090 Name.startswith("shl") || Name.startswith("sal") || 2091 Name.startswith("rcl") || Name.startswith("rcr") || 2092 Name.startswith("rol") || Name.startswith("ror")) && 2093 Operands.size() == 3) { 2094 if (isParsingIntelSyntax()) { 2095 // Intel syntax 2096 X86Operand *Op1 = static_cast<X86Operand*>(Operands[2]); 2097 if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) && 2098 cast<MCConstantExpr>(Op1->getImm())->getValue() == 1) { 2099 delete Operands[2]; 2100 Operands.pop_back(); 2101 } 2102 } else { 2103 X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]); 2104 if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) && 2105 cast<MCConstantExpr>(Op1->getImm())->getValue() == 1) { 2106 delete Operands[1]; 2107 Operands.erase(Operands.begin() + 1); 2108 } 2109 } 2110 } 2111 2112 // Transforms "int $3" into "int3" as a size optimization. We can't write an 2113 // instalias with an immediate operand yet. 2114 if (Name == "int" && Operands.size() == 2) { 2115 X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]); 2116 if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) && 2117 cast<MCConstantExpr>(Op1->getImm())->getValue() == 3) { 2118 delete Operands[1]; 2119 Operands.erase(Operands.begin() + 1); 2120 static_cast<X86Operand*>(Operands[0])->setTokenValue("int3"); 2121 } 2122 } 2123 2124 return false; 2125} 2126 2127static bool convertToSExti8(MCInst &Inst, unsigned Opcode, unsigned Reg, 2128 bool isCmp) { 2129 MCInst TmpInst; 2130 TmpInst.setOpcode(Opcode); 2131 if (!isCmp) 2132 TmpInst.addOperand(MCOperand::CreateReg(Reg)); 2133 TmpInst.addOperand(MCOperand::CreateReg(Reg)); 2134 TmpInst.addOperand(Inst.getOperand(0)); 2135 Inst = TmpInst; 2136 return true; 2137} 2138 2139static bool convert16i16to16ri8(MCInst &Inst, unsigned Opcode, 2140 bool isCmp = false) { 2141 if (!Inst.getOperand(0).isImm() || 2142 !isImmSExti16i8Value(Inst.getOperand(0).getImm())) 2143 return false; 2144 2145 return convertToSExti8(Inst, Opcode, X86::AX, isCmp); 2146} 2147 2148static bool convert32i32to32ri8(MCInst &Inst, unsigned Opcode, 2149 bool isCmp = false) { 2150 if (!Inst.getOperand(0).isImm() || 2151 !isImmSExti32i8Value(Inst.getOperand(0).getImm())) 2152 return false; 2153 2154 return convertToSExti8(Inst, Opcode, X86::EAX, isCmp); 2155} 2156 2157static bool convert64i32to64ri8(MCInst &Inst, unsigned Opcode, 2158 bool isCmp = false) { 2159 if (!Inst.getOperand(0).isImm() || 2160 !isImmSExti64i8Value(Inst.getOperand(0).getImm())) 2161 return false; 2162 2163 return convertToSExti8(Inst, Opcode, X86::RAX, isCmp); 2164} 2165 2166bool X86AsmParser:: 2167processInstruction(MCInst &Inst, 2168 const SmallVectorImpl<MCParsedAsmOperand*> &Ops) { 2169 switch (Inst.getOpcode()) { 2170 default: return false; 2171 case X86::AND16i16: return convert16i16to16ri8(Inst, X86::AND16ri8); 2172 case X86::AND32i32: return convert32i32to32ri8(Inst, X86::AND32ri8); 2173 case X86::AND64i32: return convert64i32to64ri8(Inst, X86::AND64ri8); 2174 case X86::XOR16i16: return convert16i16to16ri8(Inst, X86::XOR16ri8); 2175 case X86::XOR32i32: return convert32i32to32ri8(Inst, X86::XOR32ri8); 2176 case X86::XOR64i32: return convert64i32to64ri8(Inst, X86::XOR64ri8); 2177 case X86::OR16i16: return convert16i16to16ri8(Inst, X86::OR16ri8); 2178 case X86::OR32i32: return convert32i32to32ri8(Inst, X86::OR32ri8); 2179 case X86::OR64i32: return convert64i32to64ri8(Inst, X86::OR64ri8); 2180 case X86::CMP16i16: return convert16i16to16ri8(Inst, X86::CMP16ri8, true); 2181 case X86::CMP32i32: return convert32i32to32ri8(Inst, X86::CMP32ri8, true); 2182 case X86::CMP64i32: return convert64i32to64ri8(Inst, X86::CMP64ri8, true); 2183 case X86::ADD16i16: return convert16i16to16ri8(Inst, X86::ADD16ri8); 2184 case X86::ADD32i32: return convert32i32to32ri8(Inst, X86::ADD32ri8); 2185 case X86::ADD64i32: return convert64i32to64ri8(Inst, X86::ADD64ri8); 2186 case X86::SUB16i16: return convert16i16to16ri8(Inst, X86::SUB16ri8); 2187 case X86::SUB32i32: return convert32i32to32ri8(Inst, X86::SUB32ri8); 2188 case X86::SUB64i32: return convert64i32to64ri8(Inst, X86::SUB64ri8); 2189 case X86::ADC16i16: return convert16i16to16ri8(Inst, X86::ADC16ri8); 2190 case X86::ADC32i32: return convert32i32to32ri8(Inst, X86::ADC32ri8); 2191 case X86::ADC64i32: return convert64i32to64ri8(Inst, X86::ADC64ri8); 2192 case X86::SBB16i16: return convert16i16to16ri8(Inst, X86::SBB16ri8); 2193 case X86::SBB32i32: return convert32i32to32ri8(Inst, X86::SBB32ri8); 2194 case X86::SBB64i32: return convert64i32to64ri8(Inst, X86::SBB64ri8); 2195 } 2196} 2197 2198static const char *getSubtargetFeatureName(unsigned Val); 2199bool X86AsmParser:: 2200MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 2201 SmallVectorImpl<MCParsedAsmOperand*> &Operands, 2202 MCStreamer &Out, unsigned &ErrorInfo, 2203 bool MatchingInlineAsm) { 2204 assert(!Operands.empty() && "Unexpect empty operand list!"); 2205 X86Operand *Op = static_cast<X86Operand*>(Operands[0]); 2206 assert(Op->isToken() && "Leading operand should always be a mnemonic!"); 2207 ArrayRef<SMRange> EmptyRanges = None; 2208 2209 // First, handle aliases that expand to multiple instructions. 2210 // FIXME: This should be replaced with a real .td file alias mechanism. 2211 // Also, MatchInstructionImpl should actually *do* the EmitInstruction 2212 // call. 2213 if (Op->getToken() == "fstsw" || Op->getToken() == "fstcw" || 2214 Op->getToken() == "fstsww" || Op->getToken() == "fstcww" || 2215 Op->getToken() == "finit" || Op->getToken() == "fsave" || 2216 Op->getToken() == "fstenv" || Op->getToken() == "fclex") { 2217 MCInst Inst; 2218 Inst.setOpcode(X86::WAIT); 2219 Inst.setLoc(IDLoc); 2220 if (!MatchingInlineAsm) 2221 Out.EmitInstruction(Inst); 2222 2223 const char *Repl = 2224 StringSwitch<const char*>(Op->getToken()) 2225 .Case("finit", "fninit") 2226 .Case("fsave", "fnsave") 2227 .Case("fstcw", "fnstcw") 2228 .Case("fstcww", "fnstcw") 2229 .Case("fstenv", "fnstenv") 2230 .Case("fstsw", "fnstsw") 2231 .Case("fstsww", "fnstsw") 2232 .Case("fclex", "fnclex") 2233 .Default(0); 2234 assert(Repl && "Unknown wait-prefixed instruction"); 2235 delete Operands[0]; 2236 Operands[0] = X86Operand::CreateToken(Repl, IDLoc); 2237 } 2238 2239 bool WasOriginallyInvalidOperand = false; 2240 MCInst Inst; 2241 2242 // First, try a direct match. 2243 switch (MatchInstructionImpl(Operands, Inst, 2244 ErrorInfo, MatchingInlineAsm, 2245 isParsingIntelSyntax())) { 2246 default: break; 2247 case Match_Success: 2248 // Some instructions need post-processing to, for example, tweak which 2249 // encoding is selected. Loop on it while changes happen so the 2250 // individual transformations can chain off each other. 2251 if (!MatchingInlineAsm) 2252 while (processInstruction(Inst, Operands)) 2253 ; 2254 2255 Inst.setLoc(IDLoc); 2256 if (!MatchingInlineAsm) 2257 Out.EmitInstruction(Inst); 2258 Opcode = Inst.getOpcode(); 2259 return false; 2260 case Match_MissingFeature: { 2261 assert(ErrorInfo && "Unknown missing feature!"); 2262 // Special case the error message for the very common case where only 2263 // a single subtarget feature is missing. 2264 std::string Msg = "instruction requires:"; 2265 unsigned Mask = 1; 2266 for (unsigned i = 0; i < (sizeof(ErrorInfo)*8-1); ++i) { 2267 if (ErrorInfo & Mask) { 2268 Msg += " "; 2269 Msg += getSubtargetFeatureName(ErrorInfo & Mask); 2270 } 2271 Mask <<= 1; 2272 } 2273 return Error(IDLoc, Msg, EmptyRanges, MatchingInlineAsm); 2274 } 2275 case Match_InvalidOperand: 2276 WasOriginallyInvalidOperand = true; 2277 break; 2278 case Match_MnemonicFail: 2279 break; 2280 } 2281 2282 // FIXME: Ideally, we would only attempt suffix matches for things which are 2283 // valid prefixes, and we could just infer the right unambiguous 2284 // type. However, that requires substantially more matcher support than the 2285 // following hack. 2286 2287 // Change the operand to point to a temporary token. 2288 StringRef Base = Op->getToken(); 2289 SmallString<16> Tmp; 2290 Tmp += Base; 2291 Tmp += ' '; 2292 Op->setTokenValue(Tmp.str()); 2293 2294 // If this instruction starts with an 'f', then it is a floating point stack 2295 // instruction. These come in up to three forms for 32-bit, 64-bit, and 2296 // 80-bit floating point, which use the suffixes s,l,t respectively. 2297 // 2298 // Otherwise, we assume that this may be an integer instruction, which comes 2299 // in 8/16/32/64-bit forms using the b,w,l,q suffixes respectively. 2300 const char *Suffixes = Base[0] != 'f' ? "bwlq" : "slt\0"; 2301 2302 // Check for the various suffix matches. 2303 Tmp[Base.size()] = Suffixes[0]; 2304 unsigned ErrorInfoIgnore; 2305 unsigned ErrorInfoMissingFeature = 0; // Init suppresses compiler warnings. 2306 unsigned Match1, Match2, Match3, Match4; 2307 2308 Match1 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore, 2309 MatchingInlineAsm, isParsingIntelSyntax()); 2310 // If this returned as a missing feature failure, remember that. 2311 if (Match1 == Match_MissingFeature) 2312 ErrorInfoMissingFeature = ErrorInfoIgnore; 2313 Tmp[Base.size()] = Suffixes[1]; 2314 Match2 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore, 2315 MatchingInlineAsm, isParsingIntelSyntax()); 2316 // If this returned as a missing feature failure, remember that. 2317 if (Match2 == Match_MissingFeature) 2318 ErrorInfoMissingFeature = ErrorInfoIgnore; 2319 Tmp[Base.size()] = Suffixes[2]; 2320 Match3 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore, 2321 MatchingInlineAsm, isParsingIntelSyntax()); 2322 // If this returned as a missing feature failure, remember that. 2323 if (Match3 == Match_MissingFeature) 2324 ErrorInfoMissingFeature = ErrorInfoIgnore; 2325 Tmp[Base.size()] = Suffixes[3]; 2326 Match4 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore, 2327 MatchingInlineAsm, isParsingIntelSyntax()); 2328 // If this returned as a missing feature failure, remember that. 2329 if (Match4 == Match_MissingFeature) 2330 ErrorInfoMissingFeature = ErrorInfoIgnore; 2331 2332 // Restore the old token. 2333 Op->setTokenValue(Base); 2334 2335 // If exactly one matched, then we treat that as a successful match (and the 2336 // instruction will already have been filled in correctly, since the failing 2337 // matches won't have modified it). 2338 unsigned NumSuccessfulMatches = 2339 (Match1 == Match_Success) + (Match2 == Match_Success) + 2340 (Match3 == Match_Success) + (Match4 == Match_Success); 2341 if (NumSuccessfulMatches == 1) { 2342 Inst.setLoc(IDLoc); 2343 if (!MatchingInlineAsm) 2344 Out.EmitInstruction(Inst); 2345 Opcode = Inst.getOpcode(); 2346 return false; 2347 } 2348 2349 // Otherwise, the match failed, try to produce a decent error message. 2350 2351 // If we had multiple suffix matches, then identify this as an ambiguous 2352 // match. 2353 if (NumSuccessfulMatches > 1) { 2354 char MatchChars[4]; 2355 unsigned NumMatches = 0; 2356 if (Match1 == Match_Success) MatchChars[NumMatches++] = Suffixes[0]; 2357 if (Match2 == Match_Success) MatchChars[NumMatches++] = Suffixes[1]; 2358 if (Match3 == Match_Success) MatchChars[NumMatches++] = Suffixes[2]; 2359 if (Match4 == Match_Success) MatchChars[NumMatches++] = Suffixes[3]; 2360 2361 SmallString<126> Msg; 2362 raw_svector_ostream OS(Msg); 2363 OS << "ambiguous instructions require an explicit suffix (could be "; 2364 for (unsigned i = 0; i != NumMatches; ++i) { 2365 if (i != 0) 2366 OS << ", "; 2367 if (i + 1 == NumMatches) 2368 OS << "or "; 2369 OS << "'" << Base << MatchChars[i] << "'"; 2370 } 2371 OS << ")"; 2372 Error(IDLoc, OS.str(), EmptyRanges, MatchingInlineAsm); 2373 return true; 2374 } 2375 2376 // Okay, we know that none of the variants matched successfully. 2377 2378 // If all of the instructions reported an invalid mnemonic, then the original 2379 // mnemonic was invalid. 2380 if ((Match1 == Match_MnemonicFail) && (Match2 == Match_MnemonicFail) && 2381 (Match3 == Match_MnemonicFail) && (Match4 == Match_MnemonicFail)) { 2382 if (!WasOriginallyInvalidOperand) { 2383 ArrayRef<SMRange> Ranges = MatchingInlineAsm ? EmptyRanges : 2384 Op->getLocRange(); 2385 return Error(IDLoc, "invalid instruction mnemonic '" + Base + "'", 2386 Ranges, MatchingInlineAsm); 2387 } 2388 2389 // Recover location info for the operand if we know which was the problem. 2390 if (ErrorInfo != ~0U) { 2391 if (ErrorInfo >= Operands.size()) 2392 return Error(IDLoc, "too few operands for instruction", 2393 EmptyRanges, MatchingInlineAsm); 2394 2395 X86Operand *Operand = (X86Operand*)Operands[ErrorInfo]; 2396 if (Operand->getStartLoc().isValid()) { 2397 SMRange OperandRange = Operand->getLocRange(); 2398 return Error(Operand->getStartLoc(), "invalid operand for instruction", 2399 OperandRange, MatchingInlineAsm); 2400 } 2401 } 2402 2403 return Error(IDLoc, "invalid operand for instruction", EmptyRanges, 2404 MatchingInlineAsm); 2405 } 2406 2407 // If one instruction matched with a missing feature, report this as a 2408 // missing feature. 2409 if ((Match1 == Match_MissingFeature) + (Match2 == Match_MissingFeature) + 2410 (Match3 == Match_MissingFeature) + (Match4 == Match_MissingFeature) == 1){ 2411 std::string Msg = "instruction requires:"; 2412 unsigned Mask = 1; 2413 for (unsigned i = 0; i < (sizeof(ErrorInfoMissingFeature)*8-1); ++i) { 2414 if (ErrorInfoMissingFeature & Mask) { 2415 Msg += " "; 2416 Msg += getSubtargetFeatureName(ErrorInfoMissingFeature & Mask); 2417 } 2418 Mask <<= 1; 2419 } 2420 return Error(IDLoc, Msg, EmptyRanges, MatchingInlineAsm); 2421 } 2422 2423 // If one instruction matched with an invalid operand, report this as an 2424 // operand failure. 2425 if ((Match1 == Match_InvalidOperand) + (Match2 == Match_InvalidOperand) + 2426 (Match3 == Match_InvalidOperand) + (Match4 == Match_InvalidOperand) == 1){ 2427 Error(IDLoc, "invalid operand for instruction", EmptyRanges, 2428 MatchingInlineAsm); 2429 return true; 2430 } 2431 2432 // If all of these were an outright failure, report it in a useless way. 2433 Error(IDLoc, "unknown use of instruction mnemonic without a size suffix", 2434 EmptyRanges, MatchingInlineAsm); 2435 return true; 2436} 2437 2438 2439bool X86AsmParser::ParseDirective(AsmToken DirectiveID) { 2440 StringRef IDVal = DirectiveID.getIdentifier(); 2441 if (IDVal == ".word") 2442 return ParseDirectiveWord(2, DirectiveID.getLoc()); 2443 else if (IDVal.startswith(".code")) 2444 return ParseDirectiveCode(IDVal, DirectiveID.getLoc()); 2445 else if (IDVal.startswith(".att_syntax")) { 2446 getParser().setAssemblerDialect(0); 2447 return false; 2448 } else if (IDVal.startswith(".intel_syntax")) { 2449 getParser().setAssemblerDialect(1); 2450 if (getLexer().isNot(AsmToken::EndOfStatement)) { 2451 if(Parser.getTok().getString() == "noprefix") { 2452 // FIXME : Handle noprefix 2453 Parser.Lex(); 2454 } else 2455 return true; 2456 } 2457 return false; 2458 } 2459 return true; 2460} 2461 2462/// ParseDirectiveWord 2463/// ::= .word [ expression (, expression)* ] 2464bool X86AsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) { 2465 if (getLexer().isNot(AsmToken::EndOfStatement)) { 2466 for (;;) { 2467 const MCExpr *Value; 2468 if (getParser().parseExpression(Value)) 2469 return true; 2470 2471 getParser().getStreamer().EmitValue(Value, Size); 2472 2473 if (getLexer().is(AsmToken::EndOfStatement)) 2474 break; 2475 2476 // FIXME: Improve diagnostic. 2477 if (getLexer().isNot(AsmToken::Comma)) 2478 return Error(L, "unexpected token in directive"); 2479 Parser.Lex(); 2480 } 2481 } 2482 2483 Parser.Lex(); 2484 return false; 2485} 2486 2487/// ParseDirectiveCode 2488/// ::= .code32 | .code64 2489bool X86AsmParser::ParseDirectiveCode(StringRef IDVal, SMLoc L) { 2490 if (IDVal == ".code32") { 2491 Parser.Lex(); 2492 if (is64BitMode()) { 2493 SwitchMode(); 2494 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32); 2495 } 2496 } else if (IDVal == ".code64") { 2497 Parser.Lex(); 2498 if (!is64BitMode()) { 2499 SwitchMode(); 2500 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code64); 2501 } 2502 } else { 2503 return Error(L, "unexpected directive " + IDVal); 2504 } 2505 2506 return false; 2507} 2508 2509// Force static initialization. 2510extern "C" void LLVMInitializeX86AsmParser() { 2511 RegisterMCAsmParser<X86AsmParser> X(TheX86_32Target); 2512 RegisterMCAsmParser<X86AsmParser> Y(TheX86_64Target); 2513} 2514 2515#define GET_REGISTER_MATCHER 2516#define GET_MATCHER_IMPLEMENTATION 2517#define GET_SUBTARGET_FEATURE_NAME 2518#include "X86GenAsmMatcher.inc" 2519