1/* ----------------------------------------------------------------------------- 2 * decode.c 3 * 4 * Copyright (c) 2005, 2006, Vivek Mohan <vivek@sig9.com> 5 * All rights reserved. See LICENSE 6 * ----------------------------------------------------------------------------- 7 */ 8 9#include <assert.h> 10#include <string.h> 11 12#include "types.h" 13#include "itab.h" 14#include "input.h" 15#include "decode.h" 16#include "extern.h" 17 18/* The max number of prefixes to an instruction */ 19#define MAX_PREFIXES 15 20 21static struct ud_itab_entry ie_invalid = { UD_Iinvalid, O_NONE, O_NONE, O_NONE, P_none }; 22static struct ud_itab_entry ie_pause = { UD_Ipause, O_NONE, O_NONE, O_NONE, P_none }; 23static struct ud_itab_entry ie_nop = { UD_Inop, O_NONE, O_NONE, O_NONE, P_none }; 24 25 26/* Looks up mnemonic code in the mnemonic string table 27 * Returns NULL if the mnemonic code is invalid 28 */ 29const char * ud_lookup_mnemonic( enum ud_mnemonic_code c ) 30{ 31 if ( c < UD_Id3vil ) 32 return ud_mnemonics_str[ c ]; 33 return NULL; 34} 35 36 37/* Extracts instruction prefixes. 38 */ 39static int get_prefixes( struct ud* u ) 40{ 41 unsigned int have_pfx = 1; 42 unsigned int i; 43 uint8_t curr; 44 45 /* if in error state, bail out */ 46 if ( u->error ) 47 return -1; 48 49 /* keep going as long as there are prefixes available */ 50 for ( i = 0; have_pfx ; ++i ) { 51 52 /* Get next byte. */ 53 inp_next(u); 54 if ( u->error ) 55 return -1; 56 curr = inp_curr( u ); 57 58 /* rex prefixes in 64bit mode */ 59 if ( u->dis_mode == 64 && ( curr & 0xF0 ) == 0x40 ) { 60 u->pfx_rex = curr; 61 } else { 62 switch ( curr ) 63 { 64 case 0x2E : 65 u->pfx_seg = UD_R_CS; 66 u->pfx_rex = 0; 67 break; 68 case 0x36 : 69 u->pfx_seg = UD_R_SS; 70 u->pfx_rex = 0; 71 break; 72 case 0x3E : 73 u->pfx_seg = UD_R_DS; 74 u->pfx_rex = 0; 75 break; 76 case 0x26 : 77 u->pfx_seg = UD_R_ES; 78 u->pfx_rex = 0; 79 break; 80 case 0x64 : 81 u->pfx_seg = UD_R_FS; 82 u->pfx_rex = 0; 83 break; 84 case 0x65 : 85 u->pfx_seg = UD_R_GS; 86 u->pfx_rex = 0; 87 break; 88 case 0x67 : /* adress-size override prefix */ 89 u->pfx_adr = 0x67; 90 u->pfx_rex = 0; 91 break; 92 case 0xF0 : 93 u->pfx_lock = 0xF0; 94 u->pfx_rex = 0; 95 break; 96 case 0x66: 97 /* the 0x66 sse prefix is only effective if no other sse prefix 98 * has already been specified. 99 */ 100 if ( !u->pfx_insn ) u->pfx_insn = 0x66; 101 u->pfx_opr = 0x66; 102 u->pfx_rex = 0; 103 break; 104 case 0xF2: 105 u->pfx_insn = 0xF2; 106 u->pfx_repne = 0xF2; 107 u->pfx_rex = 0; 108 break; 109 case 0xF3: 110 u->pfx_insn = 0xF3; 111 u->pfx_rep = 0xF3; 112 u->pfx_repe = 0xF3; 113 u->pfx_rex = 0; 114 break; 115 default : 116 /* No more prefixes */ 117 have_pfx = 0; 118 break; 119 } 120 } 121 122 /* check if we reached max instruction length */ 123 if ( i + 1 == MAX_INSN_LENGTH ) { 124 u->error = 1; 125 break; 126 } 127 } 128 129 /* return status */ 130 if ( u->error ) 131 return -1; 132 133 /* rewind back one byte in stream, since the above loop 134 * stops with a non-prefix byte. 135 */ 136 inp_back(u); 137 138 /* speculatively determine the effective operand mode, 139 * based on the prefixes and the current disassembly 140 * mode. This may be inaccurate, but useful for mode 141 * dependent decoding. 142 */ 143 if ( u->dis_mode == 64 ) { 144 u->opr_mode = REX_W( u->pfx_rex ) ? 64 : ( ( u->pfx_opr ) ? 16 : 32 ) ; 145 u->adr_mode = ( u->pfx_adr ) ? 32 : 64; 146 } else if ( u->dis_mode == 32 ) { 147 u->opr_mode = ( u->pfx_opr ) ? 16 : 32; 148 u->adr_mode = ( u->pfx_adr ) ? 16 : 32; 149 } else if ( u->dis_mode == 16 ) { 150 u->opr_mode = ( u->pfx_opr ) ? 32 : 16; 151 u->adr_mode = ( u->pfx_adr ) ? 32 : 16; 152 } 153 154 return 0; 155} 156 157 158/* Searches the instruction tables for the right entry. 159 */ 160static int search_itab( struct ud * u ) 161{ 162 struct ud_itab_entry * e = NULL; 163 enum ud_itab_index table; 164 uint8_t peek; 165 uint8_t did_peek = 0; 166 uint8_t curr; 167 uint8_t index; 168 169 /* if in state of error, return */ 170 if ( u->error ) 171 return -1; 172 173 /* get first byte of opcode. */ 174 inp_next(u); 175 if ( u->error ) 176 return -1; 177 curr = inp_curr(u); 178 179 /* resolve xchg, nop, pause crazyness */ 180 if ( 0x90 == curr ) { 181 if ( !( u->dis_mode == 64 && REX_B( u->pfx_rex ) ) ) { 182 if ( u->pfx_rep ) { 183 u->pfx_rep = 0; 184 e = & ie_pause; 185 } else { 186 e = & ie_nop; 187 } 188 goto found_entry; 189 } 190 } 191 192 /* get top-level table */ 193 if ( 0x0F == curr ) { 194 table = ITAB__0F; 195 curr = inp_next(u); 196 if ( u->error ) 197 return -1; 198 199 /* 2byte opcodes can be modified by 0x66, F3, and F2 prefixes */ 200 if ( 0x66 == u->pfx_insn ) { 201 if ( ud_itab_list[ ITAB__PFX_SSE66__0F ][ curr ].mnemonic != UD_Iinvalid ) { 202 table = ITAB__PFX_SSE66__0F; 203 u->pfx_opr = 0; 204 } 205 } else if ( 0xF2 == u->pfx_insn ) { 206 if ( ud_itab_list[ ITAB__PFX_SSEF2__0F ][ curr ].mnemonic != UD_Iinvalid ) { 207 table = ITAB__PFX_SSEF2__0F; 208 u->pfx_repne = 0; 209 } 210 } else if ( 0xF3 == u->pfx_insn ) { 211 if ( ud_itab_list[ ITAB__PFX_SSEF3__0F ][ curr ].mnemonic != UD_Iinvalid ) { 212 table = ITAB__PFX_SSEF3__0F; 213 u->pfx_repe = 0; 214 u->pfx_rep = 0; 215 } 216 } 217 /* pick an instruction from the 1byte table */ 218 } else { 219 table = ITAB__1BYTE; 220 } 221 222 index = curr; 223 224search: 225 226 e = & ud_itab_list[ table ][ index ]; 227 228 /* if mnemonic constant is a standard instruction constant 229 * our search is over. 230 */ 231 232 if ( e->mnemonic < UD_Id3vil ) { 233 if ( e->mnemonic == UD_Iinvalid ) { 234 if ( did_peek ) { 235 inp_next( u ); if ( u->error ) return -1; 236 } 237 goto found_entry; 238 } 239 goto found_entry; 240 } 241 242 table = e->prefix; 243 244 switch ( e->mnemonic ) 245 { 246 case UD_Igrp_reg: 247 peek = inp_peek( u ); 248 did_peek = 1; 249 index = MODRM_REG( peek ); 250 break; 251 252 case UD_Igrp_mod: 253 peek = inp_peek( u ); 254 did_peek = 1; 255 index = MODRM_MOD( peek ); 256 if ( index == 3 ) 257 index = ITAB__MOD_INDX__11; 258 else 259 index = ITAB__MOD_INDX__NOT_11; 260 break; 261 262 case UD_Igrp_rm: 263 curr = inp_next( u ); 264 did_peek = 0; 265 if ( u->error ) 266 return -1; 267 index = MODRM_RM( curr ); 268 break; 269 270 case UD_Igrp_x87: 271 curr = inp_next( u ); 272 did_peek = 0; 273 if ( u->error ) 274 return -1; 275 index = curr - 0xC0; 276 break; 277 278 case UD_Igrp_osize: 279 if ( u->opr_mode == 64 ) 280 index = ITAB__MODE_INDX__64; 281 else if ( u->opr_mode == 32 ) 282 index = ITAB__MODE_INDX__32; 283 else 284 index = ITAB__MODE_INDX__16; 285 break; 286 287 case UD_Igrp_asize: 288 if ( u->adr_mode == 64 ) 289 index = ITAB__MODE_INDX__64; 290 else if ( u->adr_mode == 32 ) 291 index = ITAB__MODE_INDX__32; 292 else 293 index = ITAB__MODE_INDX__16; 294 break; 295 296 case UD_Igrp_mode: 297 if ( u->dis_mode == 64 ) 298 index = ITAB__MODE_INDX__64; 299 else if ( u->dis_mode == 32 ) 300 index = ITAB__MODE_INDX__32; 301 else 302 index = ITAB__MODE_INDX__16; 303 break; 304 305 case UD_Igrp_vendor: 306 if ( u->vendor == UD_VENDOR_INTEL ) 307 index = ITAB__VENDOR_INDX__INTEL; 308 else if ( u->vendor == UD_VENDOR_AMD ) 309 index = ITAB__VENDOR_INDX__AMD; 310 else 311 assert( !"unrecognized vendor id" ); 312 break; 313 314 case UD_Id3vil: 315 assert( !"invalid instruction mnemonic constant Id3vil" ); 316 break; 317 318 default: 319 assert( !"invalid instruction mnemonic constant" ); 320 break; 321 } 322 323 goto search; 324 325found_entry: 326 327 u->itab_entry = e; 328 u->mnemonic = u->itab_entry->mnemonic; 329 330 return 0; 331} 332 333 334static unsigned int resolve_operand_size( const struct ud * u, unsigned int s ) 335{ 336 switch ( s ) 337 { 338 case SZ_V: 339 return ( u->opr_mode ); 340 case SZ_Z: 341 return ( u->opr_mode == 16 ) ? 16 : 32; 342 case SZ_P: 343 return ( u->opr_mode == 16 ) ? SZ_WP : SZ_DP; 344 case SZ_MDQ: 345 return ( u->opr_mode == 16 ) ? 32 : u->opr_mode; 346 case SZ_RDQ: 347 return ( u->dis_mode == 64 ) ? 64 : 32; 348 default: 349 return s; 350 } 351} 352 353 354static int resolve_mnemonic( struct ud* u ) 355{ 356 /* far/near flags */ 357 u->br_far = 0; 358 u->br_near = 0; 359 /* readjust operand sizes for call/jmp instrcutions */ 360 if ( u->mnemonic == UD_Icall || u->mnemonic == UD_Ijmp ) { 361 /* WP: 16bit pointer */ 362 if ( u->operand[ 0 ].size == SZ_WP ) { 363 u->operand[ 0 ].size = 16; 364 u->br_far = 1; 365 u->br_near= 0; 366 /* DP: 32bit pointer */ 367 } else if ( u->operand[ 0 ].size == SZ_DP ) { 368 u->operand[ 0 ].size = 32; 369 u->br_far = 1; 370 u->br_near= 0; 371 } else { 372 u->br_far = 0; 373 u->br_near= 1; 374 } 375 /* resolve 3dnow weirdness. */ 376 } else if ( u->mnemonic == UD_I3dnow ) { 377 u->mnemonic = ud_itab_list[ ITAB__3DNOW ][ inp_curr( u ) ].mnemonic; 378 } 379 /* SWAPGS is only valid in 64bits mode */ 380 if ( u->mnemonic == UD_Iswapgs && u->dis_mode != 64 ) { 381 u->error = 1; 382 return -1; 383 } 384 385 return 0; 386} 387 388 389/* ----------------------------------------------------------------------------- 390 * decode_a()- Decodes operands of the type seg:offset 391 * ----------------------------------------------------------------------------- 392 */ 393static void 394decode_a(struct ud* u, struct ud_operand *op) 395{ 396 if (u->opr_mode == 16) { 397 /* seg16:off16 */ 398 op->type = UD_OP_PTR; 399 op->size = 32; 400 op->lval.ptr.off = inp_uint16(u); 401 op->lval.ptr.seg = inp_uint16(u); 402 } else { 403 /* seg16:off32 */ 404 op->type = UD_OP_PTR; 405 op->size = 48; 406 op->lval.ptr.off = inp_uint32(u); 407 op->lval.ptr.seg = inp_uint16(u); 408 } 409} 410 411/* ----------------------------------------------------------------------------- 412 * decode_gpr() - Returns decoded General Purpose Register 413 * ----------------------------------------------------------------------------- 414 */ 415static enum ud_type 416decode_gpr(register struct ud* u, unsigned int s, unsigned char rm) 417{ 418 s = resolve_operand_size(u, s); 419 420 switch (s) { 421 case 64: 422 return UD_R_RAX + rm; 423 case SZ_DP: 424 case 32: 425 return UD_R_EAX + rm; 426 case SZ_WP: 427 case 16: 428 return UD_R_AX + rm; 429 case 8: 430 if (u->dis_mode == 64 && u->pfx_rex) { 431 if (rm >= 4) 432 return UD_R_SPL + (rm-4); 433 return UD_R_AL + rm; 434 } else return UD_R_AL + rm; 435 default: 436 return 0; 437 } 438} 439 440/* ----------------------------------------------------------------------------- 441 * resolve_gpr64() - 64bit General Purpose Register-Selection. 442 * ----------------------------------------------------------------------------- 443 */ 444static enum ud_type 445resolve_gpr64(struct ud* u, enum ud_operand_code gpr_op) 446{ 447 if (gpr_op >= OP_rAXr8 && gpr_op <= OP_rDIr15) 448 gpr_op = (gpr_op - OP_rAXr8) | (REX_B(u->pfx_rex) << 3); 449 else gpr_op = (gpr_op - OP_rAX); 450 451 if (u->opr_mode == 16) 452 return gpr_op + UD_R_AX; 453 if (u->dis_mode == 32 || 454 (u->opr_mode == 32 && ! (REX_W(u->pfx_rex) || u->default64))) { 455 return gpr_op + UD_R_EAX; 456 } 457 458 return gpr_op + UD_R_RAX; 459} 460 461/* ----------------------------------------------------------------------------- 462 * resolve_gpr32 () - 32bit General Purpose Register-Selection. 463 * ----------------------------------------------------------------------------- 464 */ 465static enum ud_type 466resolve_gpr32(struct ud* u, enum ud_operand_code gpr_op) 467{ 468 gpr_op = gpr_op - OP_eAX; 469 470 if (u->opr_mode == 16) 471 return gpr_op + UD_R_AX; 472 473 return gpr_op + UD_R_EAX; 474} 475 476/* ----------------------------------------------------------------------------- 477 * resolve_reg() - Resolves the register type 478 * ----------------------------------------------------------------------------- 479 */ 480static enum ud_type 481resolve_reg(struct ud* u, unsigned int type, unsigned char i) 482{ 483 switch (type) { 484 case T_MMX : return UD_R_MM0 + (i & 7); 485 case T_XMM : return UD_R_XMM0 + i; 486 case T_CRG : return UD_R_CR0 + i; 487 case T_DBG : return UD_R_DR0 + i; 488 case T_SEG : return UD_R_ES + (i & 7); 489 case T_NONE: 490 default: return UD_NONE; 491 } 492} 493 494/* ----------------------------------------------------------------------------- 495 * decode_imm() - Decodes Immediate values. 496 * ----------------------------------------------------------------------------- 497 */ 498static void 499decode_imm(struct ud* u, unsigned int s, struct ud_operand *op) 500{ 501 op->size = resolve_operand_size(u, s); 502 op->type = UD_OP_IMM; 503 504 switch (op->size) { 505 case 8: op->lval.sbyte = inp_uint8(u); break; 506 case 16: op->lval.uword = inp_uint16(u); break; 507 case 32: op->lval.udword = inp_uint32(u); break; 508 case 64: op->lval.uqword = inp_uint64(u); break; 509 default: return; 510 } 511} 512 513/* ----------------------------------------------------------------------------- 514 * decode_modrm() - Decodes ModRM Byte 515 * ----------------------------------------------------------------------------- 516 */ 517static void 518decode_modrm(struct ud* u, struct ud_operand *op, unsigned int s, 519 unsigned char rm_type, struct ud_operand *opreg, 520 unsigned char reg_size, unsigned char reg_type) 521{ 522 unsigned char mod, rm, reg; 523 524 inp_next(u); 525 526 /* get mod, r/m and reg fields */ 527 mod = MODRM_MOD(inp_curr(u)); 528 rm = (REX_B(u->pfx_rex) << 3) | MODRM_RM(inp_curr(u)); 529 reg = (REX_R(u->pfx_rex) << 3) | MODRM_REG(inp_curr(u)); 530 531 op->size = resolve_operand_size(u, s); 532 533 /* if mod is 11b, then the UD_R_m specifies a gpr/mmx/sse/control/debug */ 534 if (mod == 3) { 535 op->type = UD_OP_REG; 536 if (rm_type == T_GPR) 537 op->base = decode_gpr(u, op->size, rm); 538 else op->base = resolve_reg(u, rm_type, (REX_B(u->pfx_rex) << 3) | (rm&7)); 539 } 540 /* else its memory addressing */ 541 else { 542 op->type = UD_OP_MEM; 543 544 /* 64bit addressing */ 545 if (u->adr_mode == 64) { 546 547 op->base = UD_R_RAX + rm; 548 549 /* get offset type */ 550 if (mod == 1) 551 op->offset = 8; 552 else if (mod == 2) 553 op->offset = 32; 554 else if (mod == 0 && (rm & 7) == 5) { 555 op->base = UD_R_RIP; 556 op->offset = 32; 557 } else op->offset = 0; 558 559 /* Scale-Index-Base (SIB) */ 560 if ((rm & 7) == 4) { 561 inp_next(u); 562 563 op->scale = (1 << SIB_S(inp_curr(u))) & ~1; 564 op->index = UD_R_RAX + (SIB_I(inp_curr(u)) | (REX_X(u->pfx_rex) << 3)); 565 op->base = UD_R_RAX + (SIB_B(inp_curr(u)) | (REX_B(u->pfx_rex) << 3)); 566 567 /* special conditions for base reference */ 568 if (op->index == UD_R_RSP) { 569 op->index = UD_NONE; 570 op->scale = UD_NONE; 571 } 572 573 if (op->base == UD_R_RBP || op->base == UD_R_R13) { 574 if (mod == 0) 575 op->base = UD_NONE; 576 if (mod == 1) 577 op->offset = 8; 578 else op->offset = 32; 579 } 580 } 581 } 582 583 /* 32-Bit addressing mode */ 584 else if (u->adr_mode == 32) { 585 586 /* get base */ 587 op->base = UD_R_EAX + rm; 588 589 /* get offset type */ 590 if (mod == 1) 591 op->offset = 8; 592 else if (mod == 2) 593 op->offset = 32; 594 else if (mod == 0 && rm == 5) { 595 op->base = UD_NONE; 596 op->offset = 32; 597 } else op->offset = 0; 598 599 /* Scale-Index-Base (SIB) */ 600 if ((rm & 7) == 4) { 601 inp_next(u); 602 603 op->scale = (1 << SIB_S(inp_curr(u))) & ~1; 604 op->index = UD_R_EAX + (SIB_I(inp_curr(u)) | (REX_X(u->pfx_rex) << 3)); 605 op->base = UD_R_EAX + (SIB_B(inp_curr(u)) | (REX_B(u->pfx_rex) << 3)); 606 607 if (op->index == UD_R_ESP) { 608 op->index = UD_NONE; 609 op->scale = UD_NONE; 610 } 611 612 /* special condition for base reference */ 613 if (op->base == UD_R_EBP) { 614 if (mod == 0) 615 op->base = UD_NONE; 616 if (mod == 1) 617 op->offset = 8; 618 else op->offset = 32; 619 } 620 } 621 } 622 623 /* 16bit addressing mode */ 624 else { 625 switch (rm) { 626 case 0: op->base = UD_R_BX; op->index = UD_R_SI; break; 627 case 1: op->base = UD_R_BX; op->index = UD_R_DI; break; 628 case 2: op->base = UD_R_BP; op->index = UD_R_SI; break; 629 case 3: op->base = UD_R_BP; op->index = UD_R_DI; break; 630 case 4: op->base = UD_R_SI; break; 631 case 5: op->base = UD_R_DI; break; 632 case 6: op->base = UD_R_BP; break; 633 case 7: op->base = UD_R_BX; break; 634 } 635 636 if (mod == 0 && rm == 6) { 637 op->offset= 16; 638 op->base = UD_NONE; 639 } 640 else if (mod == 1) 641 op->offset = 8; 642 else if (mod == 2) 643 op->offset = 16; 644 } 645 } 646 647 /* extract offset, if any */ 648 switch(op->offset) { 649 case 8 : op->lval.ubyte = inp_uint8(u); break; 650 case 16: op->lval.uword = inp_uint16(u); break; 651 case 32: op->lval.udword = inp_uint32(u); break; 652 case 64: op->lval.uqword = inp_uint64(u); break; 653 default: break; 654 } 655 656 /* resolve register encoded in reg field */ 657 if (opreg) { 658 opreg->type = UD_OP_REG; 659 opreg->size = resolve_operand_size(u, reg_size); 660 if (reg_type == T_GPR) 661 opreg->base = decode_gpr(u, opreg->size, reg); 662 else opreg->base = resolve_reg(u, reg_type, reg); 663 } 664} 665 666/* ----------------------------------------------------------------------------- 667 * decode_o() - Decodes offset 668 * ----------------------------------------------------------------------------- 669 */ 670static void 671decode_o(struct ud* u, unsigned int s, struct ud_operand *op) 672{ 673 switch (u->adr_mode) { 674 case 64: 675 op->offset = 64; 676 op->lval.uqword = inp_uint64(u); 677 break; 678 case 32: 679 op->offset = 32; 680 op->lval.udword = inp_uint32(u); 681 break; 682 case 16: 683 op->offset = 16; 684 op->lval.uword = inp_uint16(u); 685 break; 686 default: 687 return; 688 } 689 op->type = UD_OP_MEM; 690 op->size = resolve_operand_size(u, s); 691} 692 693/* ----------------------------------------------------------------------------- 694 * disasm_operands() - Disassembles Operands. 695 * ----------------------------------------------------------------------------- 696 */ 697static int disasm_operands(register struct ud* u) 698{ 699 700 701 /* mopXt = map entry, operand X, type; */ 702 enum ud_operand_code mop1t = u->itab_entry->operand1.type; 703 enum ud_operand_code mop2t = u->itab_entry->operand2.type; 704 enum ud_operand_code mop3t = u->itab_entry->operand3.type; 705 706 /* mopXs = map entry, operand X, size */ 707 unsigned int mop1s = u->itab_entry->operand1.size; 708 unsigned int mop2s = u->itab_entry->operand2.size; 709 unsigned int mop3s = u->itab_entry->operand3.size; 710 711 /* iop = instruction operand */ 712 register struct ud_operand* iop = u->operand; 713 714 switch(mop1t) { 715 716 case OP_A : 717 decode_a(u, &(iop[0])); 718 break; 719 720 /* M[b] ... */ 721 case OP_M : 722 if (MODRM_MOD(inp_peek(u)) == 3) 723 u->error= 1; 724 /* E, G/P/V/I/CL/1/S */ 725 case OP_E : 726 if (mop2t == OP_G) { 727 decode_modrm(u, &(iop[0]), mop1s, T_GPR, &(iop[1]), mop2s, T_GPR); 728 if (mop3t == OP_I) 729 decode_imm(u, mop3s, &(iop[2])); 730 else if (mop3t == OP_CL) { 731 iop[2].type = UD_OP_REG; 732 iop[2].base = UD_R_CL; 733 iop[2].size = 8; 734 } 735 } 736 else if (mop2t == OP_P) 737 decode_modrm(u, &(iop[0]), mop1s, T_GPR, &(iop[1]), mop2s, T_MMX); 738 else if (mop2t == OP_V) 739 decode_modrm(u, &(iop[0]), mop1s, T_GPR, &(iop[1]), mop2s, T_XMM); 740 else if (mop2t == OP_S) 741 decode_modrm(u, &(iop[0]), mop1s, T_GPR, &(iop[1]), mop2s, T_SEG); 742 else { 743 decode_modrm(u, &(iop[0]), mop1s, T_GPR, NULL, 0, T_NONE); 744 if (mop2t == OP_CL) { 745 iop[1].type = UD_OP_REG; 746 iop[1].base = UD_R_CL; 747 iop[1].size = 8; 748 } else if (mop2t == OP_I1) { 749 iop[1].type = UD_OP_CONST; 750 u->operand[1].lval.udword = 1; 751 } else if (mop2t == OP_I) { 752 decode_imm(u, mop2s, &(iop[1])); 753 } 754 } 755 break; 756 757 /* G, E/PR[,I]/VR */ 758 case OP_G : 759 if (mop2t == OP_M) { 760 if (MODRM_MOD(inp_peek(u)) == 3) 761 u->error= 1; 762 decode_modrm(u, &(iop[1]), mop2s, T_GPR, &(iop[0]), mop1s, T_GPR); 763 } else if (mop2t == OP_E) { 764 decode_modrm(u, &(iop[1]), mop2s, T_GPR, &(iop[0]), mop1s, T_GPR); 765 if (mop3t == OP_I) 766 decode_imm(u, mop3s, &(iop[2])); 767 } else if (mop2t == OP_PR) { 768 decode_modrm(u, &(iop[1]), mop2s, T_MMX, &(iop[0]), mop1s, T_GPR); 769 if (mop3t == OP_I) 770 decode_imm(u, mop3s, &(iop[2])); 771 } else if (mop2t == OP_VR) { 772 if (MODRM_MOD(inp_peek(u)) != 3) 773 u->error = 1; 774 decode_modrm(u, &(iop[1]), mop2s, T_XMM, &(iop[0]), mop1s, T_GPR); 775 } else if (mop2t == OP_W) 776 decode_modrm(u, &(iop[1]), mop2s, T_XMM, &(iop[0]), mop1s, T_GPR); 777 break; 778 779 /* AL..BH, I/O/DX */ 780 case OP_AL : case OP_CL : case OP_DL : case OP_BL : 781 case OP_AH : case OP_CH : case OP_DH : case OP_BH : 782 783 iop[0].type = UD_OP_REG; 784 iop[0].base = UD_R_AL + (mop1t - OP_AL); 785 iop[0].size = 8; 786 787 if (mop2t == OP_I) 788 decode_imm(u, mop2s, &(iop[1])); 789 else if (mop2t == OP_DX) { 790 iop[1].type = UD_OP_REG; 791 iop[1].base = UD_R_DX; 792 iop[1].size = 16; 793 } 794 else if (mop2t == OP_O) 795 decode_o(u, mop2s, &(iop[1])); 796 break; 797 798 /* rAX[r8]..rDI[r15], I/rAX..rDI/O */ 799 case OP_rAXr8 : case OP_rCXr9 : case OP_rDXr10 : case OP_rBXr11 : 800 case OP_rSPr12: case OP_rBPr13: case OP_rSIr14 : case OP_rDIr15 : 801 case OP_rAX : case OP_rCX : case OP_rDX : case OP_rBX : 802 case OP_rSP : case OP_rBP : case OP_rSI : case OP_rDI : 803 804 iop[0].type = UD_OP_REG; 805 iop[0].base = resolve_gpr64(u, mop1t); 806 807 if (mop2t == OP_I) 808 decode_imm(u, mop2s, &(iop[1])); 809 else if (mop2t >= OP_rAX && mop2t <= OP_rDI) { 810 iop[1].type = UD_OP_REG; 811 iop[1].base = resolve_gpr64(u, mop2t); 812 } 813 else if (mop2t == OP_O) { 814 decode_o(u, mop2s, &(iop[1])); 815 iop[0].size = resolve_operand_size(u, mop2s); 816 } 817 break; 818 819 /* AL[r8b]..BH[r15b], I */ 820 case OP_ALr8b : case OP_CLr9b : case OP_DLr10b : case OP_BLr11b : 821 case OP_AHr12b: case OP_CHr13b: case OP_DHr14b : case OP_BHr15b : 822 { 823 ud_type_t gpr = (mop1t - OP_ALr8b) + UD_R_AL + 824 (REX_B(u->pfx_rex) << 3); 825 if (UD_R_AH <= gpr && u->pfx_rex) 826 gpr = gpr + 4; 827 iop[0].type = UD_OP_REG; 828 iop[0].base = gpr; 829 if (mop2t == OP_I) 830 decode_imm(u, mop2s, &(iop[1])); 831 break; 832 } 833 834 /* eAX..eDX, DX/I */ 835 case OP_eAX : case OP_eCX : case OP_eDX : case OP_eBX : 836 case OP_eSP : case OP_eBP : case OP_eSI : case OP_eDI : 837 iop[0].type = UD_OP_REG; 838 iop[0].base = resolve_gpr32(u, mop1t); 839 if (mop2t == OP_DX) { 840 iop[1].type = UD_OP_REG; 841 iop[1].base = UD_R_DX; 842 iop[1].size = 16; 843 } else if (mop2t == OP_I) 844 decode_imm(u, mop2s, &(iop[1])); 845 break; 846 847 /* ES..GS */ 848 case OP_ES : case OP_CS : case OP_DS : 849 case OP_SS : case OP_FS : case OP_GS : 850 851 /* in 64bits mode, only fs and gs are allowed */ 852 if (u->dis_mode == 64) 853 if (mop1t != OP_FS && mop1t != OP_GS) 854 u->error= 1; 855 iop[0].type = UD_OP_REG; 856 iop[0].base = (mop1t - OP_ES) + UD_R_ES; 857 iop[0].size = 16; 858 859 break; 860 861 /* J */ 862 case OP_J : 863 decode_imm(u, mop1s, &(iop[0])); 864 iop[0].type = UD_OP_JIMM; 865 break ; 866 867 /* PR, I */ 868 case OP_PR: 869 if (MODRM_MOD(inp_peek(u)) != 3) 870 u->error = 1; 871 decode_modrm(u, &(iop[0]), mop1s, T_MMX, NULL, 0, T_NONE); 872 if (mop2t == OP_I) 873 decode_imm(u, mop2s, &(iop[1])); 874 break; 875 876 /* VR, I */ 877 case OP_VR: 878 if (MODRM_MOD(inp_peek(u)) != 3) 879 u->error = 1; 880 decode_modrm(u, &(iop[0]), mop1s, T_XMM, NULL, 0, T_NONE); 881 if (mop2t == OP_I) 882 decode_imm(u, mop2s, &(iop[1])); 883 break; 884 885 /* P, Q[,I]/W/E[,I],VR */ 886 case OP_P : 887 if (mop2t == OP_Q) { 888 decode_modrm(u, &(iop[1]), mop2s, T_MMX, &(iop[0]), mop1s, T_MMX); 889 if (mop3t == OP_I) 890 decode_imm(u, mop3s, &(iop[2])); 891 } else if (mop2t == OP_W) { 892 decode_modrm(u, &(iop[1]), mop2s, T_XMM, &(iop[0]), mop1s, T_MMX); 893 } else if (mop2t == OP_VR) { 894 if (MODRM_MOD(inp_peek(u)) != 3) 895 u->error = 1; 896 decode_modrm(u, &(iop[1]), mop2s, T_XMM, &(iop[0]), mop1s, T_MMX); 897 } else if (mop2t == OP_E) { 898 decode_modrm(u, &(iop[1]), mop2s, T_GPR, &(iop[0]), mop1s, T_MMX); 899 if (mop3t == OP_I) 900 decode_imm(u, mop3s, &(iop[2])); 901 } 902 break; 903 904 /* R, C/D */ 905 case OP_R : 906 if (mop2t == OP_C) 907 decode_modrm(u, &(iop[0]), mop1s, T_GPR, &(iop[1]), mop2s, T_CRG); 908 else if (mop2t == OP_D) 909 decode_modrm(u, &(iop[0]), mop1s, T_GPR, &(iop[1]), mop2s, T_DBG); 910 break; 911 912 /* C, R */ 913 case OP_C : 914 decode_modrm(u, &(iop[1]), mop2s, T_GPR, &(iop[0]), mop1s, T_CRG); 915 break; 916 917 /* D, R */ 918 case OP_D : 919 decode_modrm(u, &(iop[1]), mop2s, T_GPR, &(iop[0]), mop1s, T_DBG); 920 break; 921 922 /* Q, P */ 923 case OP_Q : 924 decode_modrm(u, &(iop[0]), mop1s, T_MMX, &(iop[1]), mop2s, T_MMX); 925 break; 926 927 /* S, E */ 928 case OP_S : 929 decode_modrm(u, &(iop[1]), mop2s, T_GPR, &(iop[0]), mop1s, T_SEG); 930 break; 931 932 /* W, V */ 933 case OP_W : 934 decode_modrm(u, &(iop[0]), mop1s, T_XMM, &(iop[1]), mop2s, T_XMM); 935 break; 936 937 /* V, W[,I]/Q/M/E */ 938 case OP_V : 939 if (mop2t == OP_W) { 940 /* special cases for movlps and movhps */ 941 if (MODRM_MOD(inp_peek(u)) == 3) { 942 if (u->mnemonic == UD_Imovlps) 943 u->mnemonic = UD_Imovhlps; 944 else 945 if (u->mnemonic == UD_Imovhps) 946 u->mnemonic = UD_Imovlhps; 947 } 948 decode_modrm(u, &(iop[1]), mop2s, T_XMM, &(iop[0]), mop1s, T_XMM); 949 if (mop3t == OP_I) 950 decode_imm(u, mop3s, &(iop[2])); 951 } else if (mop2t == OP_Q) 952 decode_modrm(u, &(iop[1]), mop2s, T_MMX, &(iop[0]), mop1s, T_XMM); 953 else if (mop2t == OP_M) { 954 if (MODRM_MOD(inp_peek(u)) == 3) 955 u->error= 1; 956 decode_modrm(u, &(iop[1]), mop2s, T_GPR, &(iop[0]), mop1s, T_XMM); 957 } else if (mop2t == OP_E) { 958 decode_modrm(u, &(iop[1]), mop2s, T_GPR, &(iop[0]), mop1s, T_XMM); 959 } else if (mop2t == OP_PR) { 960 decode_modrm(u, &(iop[1]), mop2s, T_MMX, &(iop[0]), mop1s, T_XMM); 961 } 962 break; 963 964 /* DX, eAX/AL */ 965 case OP_DX : 966 iop[0].type = UD_OP_REG; 967 iop[0].base = UD_R_DX; 968 iop[0].size = 16; 969 970 if (mop2t == OP_eAX) { 971 iop[1].type = UD_OP_REG; 972 iop[1].base = resolve_gpr32(u, mop2t); 973 } else if (mop2t == OP_AL) { 974 iop[1].type = UD_OP_REG; 975 iop[1].base = UD_R_AL; 976 iop[1].size = 8; 977 } 978 979 break; 980 981 /* I, I/AL/eAX */ 982 case OP_I : 983 decode_imm(u, mop1s, &(iop[0])); 984 if (mop2t == OP_I) 985 decode_imm(u, mop2s, &(iop[1])); 986 else if (mop2t == OP_AL) { 987 iop[1].type = UD_OP_REG; 988 iop[1].base = UD_R_AL; 989 iop[1].size = 16; 990 } else if (mop2t == OP_eAX) { 991 iop[1].type = UD_OP_REG; 992 iop[1].base = resolve_gpr32(u, mop2t); 993 } 994 break; 995 996 /* O, AL/eAX */ 997 case OP_O : 998 decode_o(u, mop1s, &(iop[0])); 999 iop[1].type = UD_OP_REG; 1000 iop[1].size = resolve_operand_size(u, mop1s); 1001 if (mop2t == OP_AL) 1002 iop[1].base = UD_R_AL; 1003 else if (mop2t == OP_eAX) 1004 iop[1].base = resolve_gpr32(u, mop2t); 1005 else if (mop2t == OP_rAX) 1006 iop[1].base = resolve_gpr64(u, mop2t); 1007 break; 1008 1009 /* 3 */ 1010 case OP_I3 : 1011 iop[0].type = UD_OP_CONST; 1012 iop[0].lval.sbyte = 3; 1013 break; 1014 1015 /* ST(n), ST(n) */ 1016 case OP_ST0 : case OP_ST1 : case OP_ST2 : case OP_ST3 : 1017 case OP_ST4 : case OP_ST5 : case OP_ST6 : case OP_ST7 : 1018 1019 iop[0].type = UD_OP_REG; 1020 iop[0].base = (mop1t-OP_ST0) + UD_R_ST0; 1021 iop[0].size = 0; 1022 1023 if (mop2t >= OP_ST0 && mop2t <= OP_ST7) { 1024 iop[1].type = UD_OP_REG; 1025 iop[1].base = (mop2t-OP_ST0) + UD_R_ST0; 1026 iop[1].size = 0; 1027 } 1028 break; 1029 1030 /* AX */ 1031 case OP_AX: 1032 iop[0].type = UD_OP_REG; 1033 iop[0].base = UD_R_AX; 1034 iop[0].size = 16; 1035 break; 1036 1037 /* none */ 1038 default : 1039 iop[0].type = iop[1].type = iop[2].type = UD_NONE; 1040 } 1041 1042 return 0; 1043} 1044 1045/* ----------------------------------------------------------------------------- 1046 * clear_insn() - clear instruction pointer 1047 * ----------------------------------------------------------------------------- 1048 */ 1049static int clear_insn(register struct ud* u) 1050{ 1051 u->error = 0; 1052 u->pfx_seg = 0; 1053 u->pfx_opr = 0; 1054 u->pfx_adr = 0; 1055 u->pfx_lock = 0; 1056 u->pfx_repne = 0; 1057 u->pfx_rep = 0; 1058 u->pfx_repe = 0; 1059 u->pfx_seg = 0; 1060 u->pfx_rex = 0; 1061 u->pfx_insn = 0; 1062 u->mnemonic = UD_Inone; 1063 u->itab_entry = NULL; 1064 1065 memset( &u->operand[ 0 ], 0, sizeof( struct ud_operand ) ); 1066 memset( &u->operand[ 1 ], 0, sizeof( struct ud_operand ) ); 1067 memset( &u->operand[ 2 ], 0, sizeof( struct ud_operand ) ); 1068 1069 return 0; 1070} 1071 1072static int do_mode( struct ud* u ) 1073{ 1074 /* if in error state, bail out */ 1075 if ( u->error ) return -1; 1076 1077 /* propagate perfix effects */ 1078 if ( u->dis_mode == 64 ) { /* set 64bit-mode flags */ 1079 1080 /* Check validity of instruction m64 */ 1081 if ( P_INV64( u->itab_entry->prefix ) ) { 1082 u->error = 1; 1083 return -1; 1084 } 1085 1086 /* effective rex prefix is the effective mask for the 1087 * instruction hard-coded in the opcode map. 1088 */ 1089 u->pfx_rex = ( u->pfx_rex & 0x40 ) | 1090 ( u->pfx_rex & REX_PFX_MASK( u->itab_entry->prefix ) ); 1091 1092 /* whether this instruction has a default operand size of 1093 * 64bit, also hardcoded into the opcode map. 1094 */ 1095 u->default64 = P_DEF64( u->itab_entry->prefix ); 1096 /* calculate effective operand size */ 1097 if ( REX_W( u->pfx_rex ) ) { 1098 u->opr_mode = 64; 1099 } else if ( u->pfx_opr ) { 1100 u->opr_mode = 16; 1101 } else { 1102 /* unless the default opr size of instruction is 64, 1103 * the effective operand size in the absence of rex.w 1104 * prefix is 32. 1105 */ 1106 u->opr_mode = ( u->default64 ) ? 64 : 32; 1107 } 1108 1109 /* calculate effective address size */ 1110 u->adr_mode = (u->pfx_adr) ? 32 : 64; 1111 } else if ( u->dis_mode == 32 ) { /* set 32bit-mode flags */ 1112 u->opr_mode = ( u->pfx_opr ) ? 16 : 32; 1113 u->adr_mode = ( u->pfx_adr ) ? 16 : 32; 1114 } else if ( u->dis_mode == 16 ) { /* set 16bit-mode flags */ 1115 u->opr_mode = ( u->pfx_opr ) ? 32 : 16; 1116 u->adr_mode = ( u->pfx_adr ) ? 32 : 16; 1117 } 1118 1119 /* These flags determine which operand to apply the operand size 1120 * cast to. 1121 */ 1122 u->c1 = ( P_C1( u->itab_entry->prefix ) ) ? 1 : 0; 1123 u->c2 = ( P_C2( u->itab_entry->prefix ) ) ? 1 : 0; 1124 u->c3 = ( P_C3( u->itab_entry->prefix ) ) ? 1 : 0; 1125 1126 /* set flags for implicit addressing */ 1127 u->implicit_addr = P_IMPADDR( u->itab_entry->prefix ); 1128 1129 return 0; 1130} 1131 1132static int gen_hex( struct ud *u ) 1133{ 1134 unsigned int i; 1135 unsigned char *src_ptr = inp_sess( u ); 1136 char* src_hex; 1137 1138 /* bail out if in error stat. */ 1139 if ( u->error ) return -1; 1140 /* output buffer pointe */ 1141 src_hex = ( char* ) u->insn_hexcode; 1142 /* for each byte used to decode instruction */ 1143 for ( i = 0; i < u->inp_ctr; ++i, ++src_ptr) { 1144 sprintf( src_hex, "%02x", *src_ptr & 0xFF ); 1145 src_hex += 2; 1146 } 1147 return 0; 1148} 1149 1150/* ============================================================================= 1151 * ud_decode() - Instruction decoder. Returns the number of bytes decoded. 1152 * ============================================================================= 1153 */ 1154unsigned int ud_decode( struct ud* u ) 1155{ 1156 inp_start(u); 1157 1158 if ( clear_insn( u ) ) { 1159 ; /* error */ 1160 } else if ( get_prefixes( u ) != 0 ) { 1161 ; /* error */ 1162 } else if ( search_itab( u ) != 0 ) { 1163 ; /* error */ 1164 } else if ( do_mode( u ) != 0 ) { 1165 ; /* error */ 1166 } else if ( disasm_operands( u ) != 0 ) { 1167 ; /* error */ 1168 } else if ( resolve_mnemonic( u ) != 0 ) { 1169 ; /* error */ 1170 } 1171 1172 /* Handle decode error. */ 1173 if ( u->error ) { 1174 /* clear out the decode data. */ 1175 clear_insn( u ); 1176 /* mark the sequence of bytes as invalid. */ 1177 u->itab_entry = & ie_invalid; 1178 u->mnemonic = u->itab_entry->mnemonic; 1179 } 1180 1181 u->insn_offset = u->pc; /* set offset of instruction */ 1182 u->insn_fill = 0; /* set translation buffer index to 0 */ 1183 u->pc += u->inp_ctr; /* move program counter by bytes decoded */ 1184 gen_hex( u ); /* generate hex code */ 1185 1186 /* return number of bytes disassembled. */ 1187 return u->inp_ctr; 1188} 1189 1190/* vim:cindent 1191 * vim:ts=4 1192 * vim:sw=4 1193 * vim:expandtab 1194 */ 1195