1/* udis86 - libudis86/decode.c 2 * 3 * Copyright (c) 2002-2009 Vivek Thampi 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without modification, 7 * are permitted provided that the following conditions are met: 8 * 9 * * Redistributions of source code must retain the above copyright notice, 10 * this list of conditions and the following disclaimer. 11 * * Redistributions in binary form must reproduce the above copyright notice, 12 * this list of conditions and the following disclaimer in the documentation 13 * and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 19 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 20 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 21 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 22 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 24 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 */ 26#include "udint.h" 27#include "types.h" 28#include "decode.h" 29#include "extern.h" 30 31#ifndef __UD_STANDALONE__ 32# include <string.h> 33#endif /* __UD_STANDALONE__ */ 34 35/* The max number of prefixes to an instruction */ 36#define MAX_PREFIXES 15 37 38/* rex prefix bits */ 39#define REX_W(r) ( ( 0xF & ( r ) ) >> 3 ) 40#define REX_R(r) ( ( 0x7 & ( r ) ) >> 2 ) 41#define REX_X(r) ( ( 0x3 & ( r ) ) >> 1 ) 42#define REX_B(r) ( ( 0x1 & ( r ) ) >> 0 ) 43#define REX_PFX_MASK(n) ( ( P_REXW(n) << 3 ) | \ 44 ( P_REXR(n) << 2 ) | \ 45 ( P_REXX(n) << 1 ) | \ 46 ( P_REXB(n) << 0 ) ) 47 48/* scable-index-base bits */ 49#define SIB_S(b) ( ( b ) >> 6 ) 50#define SIB_I(b) ( ( ( b ) >> 3 ) & 7 ) 51#define SIB_B(b) ( ( b ) & 7 ) 52 53/* modrm bits */ 54#define MODRM_REG(b) ( ( ( b ) >> 3 ) & 7 ) 55#define MODRM_NNN(b) ( ( ( b ) >> 3 ) & 7 ) 56#define MODRM_MOD(b) ( ( ( b ) >> 6 ) & 3 ) 57#define MODRM_RM(b) ( ( b ) & 7 ) 58 59static int decode_ext(struct ud *u, uint16_t ptr); 60static int decode_opcode(struct ud *u); 61 62enum reg_class { /* register classes */ 63 REGCLASS_GPR, 64 REGCLASS_MMX, 65 REGCLASS_CR, 66 REGCLASS_DB, 67 REGCLASS_SEG, 68 REGCLASS_XMM 69}; 70 71 /* 72 * inp_start 73 * Should be called before each de-code operation. 74 */ 75static void 76inp_start(struct ud *u) 77{ 78 u->inp_ctr = 0; 79} 80 81static uint8_t 82inp_peek(struct ud *u) 83{ 84 if (u->inp_end == 0) { 85 if (u->inp_buf != NULL) { 86 if (u->inp_buf_index < u->inp_buf_size) { 87 return u->inp_buf[u->inp_buf_index]; 88 } 89 } else if (u->inp_peek != UD_EOI) { 90 return u->inp_peek; 91 } else { 92 int c; 93 if ((c = u->inp_hook(u)) != UD_EOI) { 94 u->inp_peek = c; 95 return u->inp_peek; 96 } 97 } 98 } 99 u->inp_end = 1; 100 UDERR(u, "byte expected, eoi received\n"); 101 return 0; 102} 103 104static uint8_t 105inp_next(struct ud *u) 106{ 107 if (u->inp_end == 0) { 108 if (u->inp_buf != NULL) { 109 if (u->inp_buf_index < u->inp_buf_size) { 110 u->inp_ctr++; 111 return (u->inp_curr = u->inp_buf[u->inp_buf_index++]); 112 } 113 } else { 114 int c = u->inp_peek; 115 if (c != UD_EOI || (c = u->inp_hook(u)) != UD_EOI) { 116 u->inp_peek = UD_EOI; 117 u->inp_curr = c; 118 u->inp_sess[u->inp_ctr++] = u->inp_curr; 119 return u->inp_curr; 120 } 121 } 122 } 123 u->inp_end = 1; 124 UDERR(u, "byte expected, eoi received\n"); 125 return 0; 126} 127 128static uint8_t 129inp_curr(struct ud *u) 130{ 131 return u->inp_curr; 132} 133 134 135/* 136 * inp_uint8 137 * int_uint16 138 * int_uint32 139 * int_uint64 140 * Load little-endian values from input 141 */ 142static uint8_t 143inp_uint8(struct ud* u) 144{ 145 return inp_next(u); 146} 147 148static uint16_t 149inp_uint16(struct ud* u) 150{ 151 uint16_t r, ret; 152 153 ret = inp_next(u); 154 r = inp_next(u); 155 return ret | (r << 8); 156} 157 158static uint32_t 159inp_uint32(struct ud* u) 160{ 161 uint32_t r, ret; 162 163 ret = inp_next(u); 164 r = inp_next(u); 165 ret = ret | (r << 8); 166 r = inp_next(u); 167 ret = ret | (r << 16); 168 r = inp_next(u); 169 return ret | (r << 24); 170} 171 172static uint64_t 173inp_uint64(struct ud* u) 174{ 175 uint64_t r, ret; 176 177 ret = inp_next(u); 178 r = inp_next(u); 179 ret = ret | (r << 8); 180 r = inp_next(u); 181 ret = ret | (r << 16); 182 r = inp_next(u); 183 ret = ret | (r << 24); 184 r = inp_next(u); 185 ret = ret | (r << 32); 186 r = inp_next(u); 187 ret = ret | (r << 40); 188 r = inp_next(u); 189 ret = ret | (r << 48); 190 r = inp_next(u); 191 return ret | (r << 56); 192} 193 194 195static UD_INLINE int 196eff_opr_mode(int dis_mode, int rex_w, int pfx_opr) 197{ 198 if (dis_mode == 64) { 199 return rex_w ? 64 : (pfx_opr ? 16 : 32); 200 } else if (dis_mode == 32) { 201 return pfx_opr ? 16 : 32; 202 } else { 203 UD_ASSERT(dis_mode == 16); 204 return pfx_opr ? 32 : 16; 205 } 206} 207 208 209static UD_INLINE int 210eff_adr_mode(int dis_mode, int pfx_adr) 211{ 212 if (dis_mode == 64) { 213 return pfx_adr ? 32 : 64; 214 } else if (dis_mode == 32) { 215 return pfx_adr ? 16 : 32; 216 } else { 217 UD_ASSERT(dis_mode == 16); 218 return pfx_adr ? 32 : 16; 219 } 220} 221 222 223/* 224 * decode_prefixes 225 * 226 * Extracts instruction prefixes. 227 */ 228static int 229decode_prefixes(struct ud *u) 230{ 231 int done = 0; 232 uint8_t curr = 0, last = 0; 233 UD_RETURN_ON_ERROR(u); 234 235 do { 236 last = curr; 237 curr = inp_next(u); 238 UD_RETURN_ON_ERROR(u); 239 if (u->inp_ctr == MAX_INSN_LENGTH) { 240 UD_RETURN_WITH_ERROR(u, "max instruction length"); 241 } 242 243 switch (curr) 244 { 245 case 0x2E: 246 u->pfx_seg = UD_R_CS; 247 break; 248 case 0x36: 249 u->pfx_seg = UD_R_SS; 250 break; 251 case 0x3E: 252 u->pfx_seg = UD_R_DS; 253 break; 254 case 0x26: 255 u->pfx_seg = UD_R_ES; 256 break; 257 case 0x64: 258 u->pfx_seg = UD_R_FS; 259 break; 260 case 0x65: 261 u->pfx_seg = UD_R_GS; 262 break; 263 case 0x67: /* adress-size override prefix */ 264 u->pfx_adr = 0x67; 265 break; 266 case 0xF0: 267 u->pfx_lock = 0xF0; 268 break; 269 case 0x66: 270 u->pfx_opr = 0x66; 271 break; 272 case 0xF2: 273 u->pfx_str = 0xf2; 274 break; 275 case 0xF3: 276 u->pfx_str = 0xf3; 277 break; 278 default: 279 /* consume if rex */ 280 done = (u->dis_mode == 64 && (curr & 0xF0) == 0x40) ? 0 : 1; 281 break; 282 } 283 } while (!done); 284 /* rex prefixes in 64bit mode, must be the last prefix */ 285 if (u->dis_mode == 64 && (last & 0xF0) == 0x40) { 286 u->pfx_rex = last; 287 } 288 return 0; 289} 290 291 292/* 293 * vex_l, vex_w 294 * Return the vex.L and vex.W bits 295 */ 296static UD_INLINE uint8_t 297vex_l(const struct ud *u) 298{ 299 UD_ASSERT(u->vex_op != 0); 300 return ((u->vex_op == 0xc4 ? u->vex_b2 : u->vex_b1) >> 2) & 1; 301} 302 303static UD_INLINE uint8_t 304vex_w(const struct ud *u) 305{ 306 UD_ASSERT(u->vex_op != 0); 307 return u->vex_op == 0xc4 ? ((u->vex_b2 >> 7) & 1) : 0; 308} 309 310 311static UD_INLINE uint8_t 312modrm(struct ud * u) 313{ 314 if ( !u->have_modrm ) { 315 u->modrm = inp_next( u ); 316 u->modrm_offset = (uint8_t) (u->inp_ctr - 1); 317 u->have_modrm = 1; 318 } 319 return u->modrm; 320} 321 322 323static unsigned int 324resolve_operand_size(const struct ud* u, ud_operand_size_t osize) 325{ 326 switch (osize) { 327 case SZ_V: 328 return u->opr_mode; 329 case SZ_Z: 330 return u->opr_mode == 16 ? 16 : 32; 331 case SZ_Y: 332 return u->opr_mode == 16 ? 32 : u->opr_mode; 333 case SZ_RDQ: 334 return u->dis_mode == 64 ? 64 : 32; 335 case SZ_X: 336 UD_ASSERT(u->vex_op != 0); 337 return (P_VEXL(u->itab_entry->prefix) && vex_l(u)) ? SZ_QQ : SZ_DQ; 338 default: 339 return osize; 340 } 341} 342 343 344static int resolve_mnemonic( struct ud* u ) 345{ 346 /* resolve 3dnow weirdness. */ 347 if ( u->mnemonic == UD_I3dnow ) { 348 u->mnemonic = ud_itab[ u->le->table[ inp_curr( u ) ] ].mnemonic; 349 } 350 /* SWAPGS is only valid in 64bits mode */ 351 if ( u->mnemonic == UD_Iswapgs && u->dis_mode != 64 ) { 352 UDERR(u, "swapgs invalid in 64bits mode\n"); 353 return -1; 354 } 355 356 if (u->mnemonic == UD_Ixchg) { 357 if ((u->operand[0].type == UD_OP_REG && u->operand[0].base == UD_R_AX && 358 u->operand[1].type == UD_OP_REG && u->operand[1].base == UD_R_AX) || 359 (u->operand[0].type == UD_OP_REG && u->operand[0].base == UD_R_EAX && 360 u->operand[1].type == UD_OP_REG && u->operand[1].base == UD_R_EAX)) { 361 u->operand[0].type = UD_NONE; 362 u->operand[1].type = UD_NONE; 363 u->mnemonic = UD_Inop; 364 } 365 } 366 367 if (u->mnemonic == UD_Inop && u->pfx_repe) { 368 u->pfx_repe = 0; 369 u->mnemonic = UD_Ipause; 370 } 371 return 0; 372} 373 374 375/* ----------------------------------------------------------------------------- 376 * decode_a()- Decodes operands of the type seg:offset 377 * ----------------------------------------------------------------------------- 378 */ 379static void 380decode_a(struct ud* u, struct ud_operand *op) 381{ 382 if (u->opr_mode == 16) { 383 /* seg16:off16 */ 384 op->type = UD_OP_PTR; 385 op->size = 32; 386 op->lval.ptr.off = inp_uint16(u); 387 op->lval.ptr.seg = inp_uint16(u); 388 } else { 389 /* seg16:off32 */ 390 op->type = UD_OP_PTR; 391 op->size = 48; 392 op->lval.ptr.off = inp_uint32(u); 393 op->lval.ptr.seg = inp_uint16(u); 394 } 395} 396 397/* ----------------------------------------------------------------------------- 398 * decode_gpr() - Returns decoded General Purpose Register 399 * ----------------------------------------------------------------------------- 400 */ 401static enum ud_type 402decode_gpr(register struct ud* u, unsigned int s, unsigned char rm) 403{ 404 switch (s) { 405 case 64: 406 return UD_R_RAX + rm; 407 case 32: 408 return UD_R_EAX + rm; 409 case 16: 410 return UD_R_AX + rm; 411 case 8: 412 if (u->dis_mode == 64 && u->pfx_rex) { 413 if (rm >= 4) 414 return UD_R_SPL + (rm-4); 415 return UD_R_AL + rm; 416 } else return UD_R_AL + rm; 417 case 0: 418 /* invalid size in case of a decode error */ 419 UD_ASSERT(u->error); 420 return UD_NONE; 421 default: 422 UD_ASSERT(!"invalid operand size"); 423 return UD_NONE; 424 } 425} 426 427static void 428decode_reg(struct ud *u, 429 struct ud_operand *opr, 430 int type, 431 int num, 432 int size) 433{ 434 int reg; 435 size = resolve_operand_size(u, size); 436 switch (type) { 437 case REGCLASS_GPR : reg = decode_gpr(u, size, num); break; 438 case REGCLASS_MMX : reg = UD_R_MM0 + (num & 7); break; 439 case REGCLASS_XMM : 440 reg = num + (size == SZ_QQ ? UD_R_YMM0 : UD_R_XMM0); 441 break; 442 case REGCLASS_CR : reg = UD_R_CR0 + num; break; 443 case REGCLASS_DB : reg = UD_R_DR0 + num; break; 444 case REGCLASS_SEG : { 445 /* 446 * Only 6 segment registers, anything else is an error. 447 */ 448 if ((num & 7) > 5) { 449 UDERR(u, "invalid segment register value\n"); 450 return; 451 } else { 452 reg = UD_R_ES + (num & 7); 453 } 454 break; 455 } 456 default: 457 UD_ASSERT(!"invalid register type"); 458 return; 459 } 460 opr->type = UD_OP_REG; 461 opr->base = reg; 462 opr->size = size; 463} 464 465 466/* 467 * decode_imm 468 * 469 * Decode Immediate values. 470 */ 471static void 472decode_imm(struct ud* u, unsigned int size, struct ud_operand *op) 473{ 474 op->size = resolve_operand_size(u, size); 475 op->type = UD_OP_IMM; 476 477 switch (op->size) { 478 case 8: op->lval.sbyte = inp_uint8(u); break; 479 case 16: op->lval.uword = inp_uint16(u); break; 480 case 32: op->lval.udword = inp_uint32(u); break; 481 case 64: op->lval.uqword = inp_uint64(u); break; 482 default: return; 483 } 484} 485 486 487/* 488 * decode_mem_disp 489 * 490 * Decode mem address displacement. 491 */ 492static void 493decode_mem_disp(struct ud* u, unsigned int size, struct ud_operand *op) 494{ 495 switch (size) { 496 case 8: 497 op->offset = 8; 498 op->lval.ubyte = inp_uint8(u); 499 break; 500 case 16: 501 op->offset = 16; 502 op->lval.uword = inp_uint16(u); 503 break; 504 case 32: 505 op->offset = 32; 506 op->lval.udword = inp_uint32(u); 507 break; 508 case 64: 509 op->offset = 64; 510 op->lval.uqword = inp_uint64(u); 511 break; 512 default: 513 return; 514 } 515} 516 517 518/* 519 * decode_modrm_reg 520 * 521 * Decodes reg field of mod/rm byte 522 * 523 */ 524static UD_INLINE void 525decode_modrm_reg(struct ud *u, 526 struct ud_operand *operand, 527 unsigned int type, 528 unsigned int size) 529{ 530 uint8_t reg = (REX_R(u->_rex) << 3) | MODRM_REG(modrm(u)); 531 decode_reg(u, operand, type, reg, size); 532} 533 534 535/* 536 * decode_modrm_rm 537 * 538 * Decodes rm field of mod/rm byte 539 * 540 */ 541static void 542decode_modrm_rm(struct ud *u, 543 struct ud_operand *op, 544 unsigned char type, /* register type */ 545 unsigned int size) /* operand size */ 546 547{ 548 size_t offset = 0; 549 unsigned char mod, rm; 550 551 /* get mod, r/m and reg fields */ 552 mod = MODRM_MOD(modrm(u)); 553 rm = (REX_B(u->_rex) << 3) | MODRM_RM(modrm(u)); 554 555 /* 556 * If mod is 11b, then the modrm.rm specifies a register. 557 * 558 */ 559 if (mod == 3) { 560 decode_reg(u, op, type, rm, size); 561 return; 562 } 563 564 /* 565 * !11b => Memory Address 566 */ 567 op->type = UD_OP_MEM; 568 op->size = resolve_operand_size(u, size); 569 570 if (u->adr_mode == 64) { 571 op->base = UD_R_RAX + rm; 572 if (mod == 1) { 573 offset = 8; 574 } else if (mod == 2) { 575 offset = 32; 576 } else if (mod == 0 && (rm & 7) == 5) { 577 op->base = UD_R_RIP; 578 offset = 32; 579 } else { 580 offset = 0; 581 } 582 /* 583 * Scale-Index-Base (SIB) 584 */ 585 if ((rm & 7) == 4) { 586 inp_next(u); 587 588 op->base = UD_R_RAX + (SIB_B(inp_curr(u)) | (REX_B(u->_rex) << 3)); 589 op->index = UD_R_RAX + (SIB_I(inp_curr(u)) | (REX_X(u->_rex) << 3)); 590 /* special conditions for base reference */ 591 if (op->index == UD_R_RSP) { 592 op->index = UD_NONE; 593 op->scale = UD_NONE; 594 } else { 595 op->scale = (1 << SIB_S(inp_curr(u))) & ~1; 596 } 597 598 if (op->base == UD_R_RBP || op->base == UD_R_R13) { 599 if (mod == 0) { 600 op->base = UD_NONE; 601 } 602 if (mod == 1) { 603 offset = 8; 604 } else { 605 offset = 32; 606 } 607 } 608 } else { 609 op->scale = UD_NONE; 610 op->index = UD_NONE; 611 } 612 } else if (u->adr_mode == 32) { 613 op->base = UD_R_EAX + rm; 614 if (mod == 1) { 615 offset = 8; 616 } else if (mod == 2) { 617 offset = 32; 618 } else if (mod == 0 && rm == 5) { 619 op->base = UD_NONE; 620 offset = 32; 621 } else { 622 offset = 0; 623 } 624 625 /* Scale-Index-Base (SIB) */ 626 if ((rm & 7) == 4) { 627 inp_next(u); 628 629 op->scale = (1 << SIB_S(inp_curr(u))) & ~1; 630 op->index = UD_R_EAX + (SIB_I(inp_curr(u)) | (REX_X(u->pfx_rex) << 3)); 631 op->base = UD_R_EAX + (SIB_B(inp_curr(u)) | (REX_B(u->pfx_rex) << 3)); 632 633 if (op->index == UD_R_ESP) { 634 op->index = UD_NONE; 635 op->scale = UD_NONE; 636 } 637 638 /* special condition for base reference */ 639 if (op->base == UD_R_EBP) { 640 if (mod == 0) { 641 op->base = UD_NONE; 642 } 643 if (mod == 1) { 644 offset = 8; 645 } else { 646 offset = 32; 647 } 648 } 649 } else { 650 op->scale = UD_NONE; 651 op->index = UD_NONE; 652 } 653 } else { 654 const unsigned int bases[] = { UD_R_BX, UD_R_BX, UD_R_BP, UD_R_BP, 655 UD_R_SI, UD_R_DI, UD_R_BP, UD_R_BX }; 656 const unsigned int indices[] = { UD_R_SI, UD_R_DI, UD_R_SI, UD_R_DI, 657 UD_NONE, UD_NONE, UD_NONE, UD_NONE }; 658 op->base = bases[rm & 7]; 659 op->index = indices[rm & 7]; 660 op->scale = UD_NONE; 661 if (mod == 0 && rm == 6) { 662 offset = 16; 663 op->base = UD_NONE; 664 } else if (mod == 1) { 665 offset = 8; 666 } else if (mod == 2) { 667 offset = 16; 668 } 669 } 670 671 if (offset) { 672 decode_mem_disp(u, offset, op); 673 } else { 674 op->offset = 0; 675 } 676} 677 678 679/* 680 * decode_moffset 681 * Decode offset-only memory operand 682 */ 683static void 684decode_moffset(struct ud *u, unsigned int size, struct ud_operand *opr) 685{ 686 opr->type = UD_OP_MEM; 687 opr->base = UD_NONE; 688 opr->index = UD_NONE; 689 opr->scale = UD_NONE; 690 opr->size = resolve_operand_size(u, size); 691 decode_mem_disp(u, u->adr_mode, opr); 692} 693 694 695static void 696decode_vex_vvvv(struct ud *u, struct ud_operand *opr, unsigned size) 697{ 698 uint8_t vvvv; 699 UD_ASSERT(u->vex_op != 0); 700 vvvv = ((u->vex_op == 0xc4 ? u->vex_b2 : u->vex_b1) >> 3) & 0xf; 701 decode_reg(u, opr, REGCLASS_XMM, (0xf & ~vvvv), size); 702} 703 704 705/* 706 * decode_vex_immreg 707 * Decode source operand encoded in immediate byte [7:4] 708 */ 709static int 710decode_vex_immreg(struct ud *u, struct ud_operand *opr, unsigned size) 711{ 712 uint8_t imm = inp_next(u); 713 uint8_t mask = u->dis_mode == 64 ? 0xf : 0x7; 714 UD_RETURN_ON_ERROR(u); 715 UD_ASSERT(u->vex_op != 0); 716 decode_reg(u, opr, REGCLASS_XMM, mask & (imm >> 4), size); 717 return 0; 718} 719 720 721/* 722 * decode_operand 723 * 724 * Decodes a single operand. 725 * Returns the type of the operand (UD_NONE if none) 726 */ 727static int 728decode_operand(struct ud *u, 729 struct ud_operand *operand, 730 enum ud_operand_code type, 731 unsigned int size) 732{ 733 operand->type = UD_NONE; 734 operand->_oprcode = type; 735 736 switch (type) { 737 case OP_A : 738 decode_a(u, operand); 739 break; 740 case OP_MR: 741 decode_modrm_rm(u, operand, REGCLASS_GPR, 742 MODRM_MOD(modrm(u)) == 3 ? 743 Mx_reg_size(size) : Mx_mem_size(size)); 744 break; 745 case OP_F: 746 u->br_far = 1; 747 /* intended fall through */ 748 case OP_M: 749 if (MODRM_MOD(modrm(u)) == 3) { 750 UDERR(u, "expected modrm.mod != 3\n"); 751 } 752 /* intended fall through */ 753 case OP_E: 754 decode_modrm_rm(u, operand, REGCLASS_GPR, size); 755 break; 756 case OP_G: 757 decode_modrm_reg(u, operand, REGCLASS_GPR, size); 758 break; 759 case OP_sI: 760 case OP_I: 761 decode_imm(u, size, operand); 762 break; 763 case OP_I1: 764 operand->type = UD_OP_CONST; 765 operand->lval.udword = 1; 766 break; 767 case OP_N: 768 if (MODRM_MOD(modrm(u)) != 3) { 769 UDERR(u, "expected modrm.mod == 3\n"); 770 } 771 /* intended fall through */ 772 case OP_Q: 773 decode_modrm_rm(u, operand, REGCLASS_MMX, size); 774 break; 775 case OP_P: 776 decode_modrm_reg(u, operand, REGCLASS_MMX, size); 777 break; 778 case OP_U: 779 if (MODRM_MOD(modrm(u)) != 3) { 780 UDERR(u, "expected modrm.mod == 3\n"); 781 } 782 /* intended fall through */ 783 case OP_W: 784 decode_modrm_rm(u, operand, REGCLASS_XMM, size); 785 break; 786 case OP_V: 787 decode_modrm_reg(u, operand, REGCLASS_XMM, size); 788 break; 789 case OP_H: 790 decode_vex_vvvv(u, operand, size); 791 break; 792 case OP_MU: 793 decode_modrm_rm(u, operand, REGCLASS_XMM, 794 MODRM_MOD(modrm(u)) == 3 ? 795 Mx_reg_size(size) : Mx_mem_size(size)); 796 break; 797 case OP_S: 798 decode_modrm_reg(u, operand, REGCLASS_SEG, size); 799 break; 800 case OP_O: 801 decode_moffset(u, size, operand); 802 break; 803 case OP_R0: 804 case OP_R1: 805 case OP_R2: 806 case OP_R3: 807 case OP_R4: 808 case OP_R5: 809 case OP_R6: 810 case OP_R7: 811 decode_reg(u, operand, REGCLASS_GPR, 812 (REX_B(u->_rex) << 3) | (type - OP_R0), size); 813 break; 814 case OP_AL: 815 case OP_AX: 816 case OP_eAX: 817 case OP_rAX: 818 decode_reg(u, operand, REGCLASS_GPR, 0, size); 819 break; 820 case OP_CL: 821 case OP_CX: 822 case OP_eCX: 823 decode_reg(u, operand, REGCLASS_GPR, 1, size); 824 break; 825 case OP_DL: 826 case OP_DX: 827 case OP_eDX: 828 decode_reg(u, operand, REGCLASS_GPR, 2, size); 829 break; 830 case OP_ES: 831 case OP_CS: 832 case OP_DS: 833 case OP_SS: 834 case OP_FS: 835 case OP_GS: 836 /* in 64bits mode, only fs and gs are allowed */ 837 if (u->dis_mode == 64) { 838 if (type != OP_FS && type != OP_GS) { 839 UDERR(u, "invalid segment register in 64bits\n"); 840 } 841 } 842 operand->type = UD_OP_REG; 843 operand->base = (type - OP_ES) + UD_R_ES; 844 operand->size = 16; 845 break; 846 case OP_J : 847 decode_imm(u, size, operand); 848 operand->type = UD_OP_JIMM; 849 break ; 850 case OP_R : 851 if (MODRM_MOD(modrm(u)) != 3) { 852 UDERR(u, "expected modrm.mod == 3\n"); 853 } 854 decode_modrm_rm(u, operand, REGCLASS_GPR, size); 855 break; 856 case OP_C: 857 decode_modrm_reg(u, operand, REGCLASS_CR, size); 858 break; 859 case OP_D: 860 decode_modrm_reg(u, operand, REGCLASS_DB, size); 861 break; 862 case OP_I3 : 863 operand->type = UD_OP_CONST; 864 operand->lval.sbyte = 3; 865 break; 866 case OP_ST0: 867 case OP_ST1: 868 case OP_ST2: 869 case OP_ST3: 870 case OP_ST4: 871 case OP_ST5: 872 case OP_ST6: 873 case OP_ST7: 874 operand->type = UD_OP_REG; 875 operand->base = (type - OP_ST0) + UD_R_ST0; 876 operand->size = 80; 877 break; 878 case OP_L: 879 decode_vex_immreg(u, operand, size); 880 break; 881 default : 882 operand->type = UD_NONE; 883 break; 884 } 885 return operand->type; 886} 887 888 889/* 890 * decode_operands 891 * 892 * Disassemble upto 3 operands of the current instruction being 893 * disassembled. By the end of the function, the operand fields 894 * of the ud structure will have been filled. 895 */ 896static int 897decode_operands(struct ud* u) 898{ 899 decode_operand(u, &u->operand[0], 900 u->itab_entry->operand1.type, 901 u->itab_entry->operand1.size); 902 if (u->operand[0].type != UD_NONE) { 903 decode_operand(u, &u->operand[1], 904 u->itab_entry->operand2.type, 905 u->itab_entry->operand2.size); 906 } 907 if (u->operand[1].type != UD_NONE) { 908 decode_operand(u, &u->operand[2], 909 u->itab_entry->operand3.type, 910 u->itab_entry->operand3.size); 911 } 912 if (u->operand[2].type != UD_NONE) { 913 decode_operand(u, &u->operand[3], 914 u->itab_entry->operand4.type, 915 u->itab_entry->operand4.size); 916 } 917 return 0; 918} 919 920/* ----------------------------------------------------------------------------- 921 * clear_insn() - clear instruction structure 922 * ----------------------------------------------------------------------------- 923 */ 924static void 925clear_insn(register struct ud* u) 926{ 927 u->error = 0; 928 u->pfx_seg = 0; 929 u->pfx_opr = 0; 930 u->pfx_adr = 0; 931 u->pfx_lock = 0; 932 u->pfx_repne = 0; 933 u->pfx_rep = 0; 934 u->pfx_repe = 0; 935 u->pfx_rex = 0; 936 u->pfx_str = 0; 937 u->mnemonic = UD_Inone; 938 u->itab_entry = NULL; 939 u->have_modrm = 0; 940 u->br_far = 0; 941 u->vex_op = 0; 942 u->_rex = 0; 943 u->operand[0].type = UD_NONE; 944 u->operand[1].type = UD_NONE; 945 u->operand[2].type = UD_NONE; 946 u->operand[3].type = UD_NONE; 947} 948 949 950static UD_INLINE int 951resolve_pfx_str(struct ud* u) 952{ 953 if (u->pfx_str == 0xf3) { 954 if (P_STR(u->itab_entry->prefix)) { 955 u->pfx_rep = 0xf3; 956 } else { 957 u->pfx_repe = 0xf3; 958 } 959 } else if (u->pfx_str == 0xf2) { 960 u->pfx_repne = 0xf3; 961 } 962 return 0; 963} 964 965 966static int 967resolve_mode( struct ud* u ) 968{ 969 int default64; 970 /* if in error state, bail out */ 971 if ( u->error ) return -1; 972 973 /* propagate prefix effects */ 974 if ( u->dis_mode == 64 ) { /* set 64bit-mode flags */ 975 976 /* Check validity of instruction m64 */ 977 if ( P_INV64( u->itab_entry->prefix ) ) { 978 UDERR(u, "instruction invalid in 64bits\n"); 979 return -1; 980 } 981 982 /* compute effective rex based on, 983 * - vex prefix (if any) 984 * - rex prefix (if any, and not vex) 985 * - allowed prefixes specified by the opcode map 986 */ 987 if (u->vex_op == 0xc4) { 988 /* vex has rex.rxb in 1's complement */ 989 u->_rex = ((~(u->vex_b1 >> 5) & 0x7) /* rex.0rxb */ | 990 ((u->vex_b2 >> 4) & 0x8) /* rex.w000 */); 991 } else if (u->vex_op == 0xc5) { 992 /* vex has rex.r in 1's complement */ 993 u->_rex = (~(u->vex_b1 >> 5)) & 4; 994 } else { 995 UD_ASSERT(u->vex_op == 0); 996 u->_rex = u->pfx_rex; 997 } 998 u->_rex &= REX_PFX_MASK(u->itab_entry->prefix); 999 1000 /* whether this instruction has a default operand size of 1001 * 64bit, also hardcoded into the opcode map. 1002 */ 1003 default64 = P_DEF64( u->itab_entry->prefix ); 1004 /* calculate effective operand size */ 1005 if (REX_W(u->_rex)) { 1006 u->opr_mode = 64; 1007 } else if ( u->pfx_opr ) { 1008 u->opr_mode = 16; 1009 } else { 1010 /* unless the default opr size of instruction is 64, 1011 * the effective operand size in the absence of rex.w 1012 * prefix is 32. 1013 */ 1014 u->opr_mode = default64 ? 64 : 32; 1015 } 1016 1017 /* calculate effective address size */ 1018 u->adr_mode = (u->pfx_adr) ? 32 : 64; 1019 } else if ( u->dis_mode == 32 ) { /* set 32bit-mode flags */ 1020 u->opr_mode = ( u->pfx_opr ) ? 16 : 32; 1021 u->adr_mode = ( u->pfx_adr ) ? 16 : 32; 1022 } else if ( u->dis_mode == 16 ) { /* set 16bit-mode flags */ 1023 u->opr_mode = ( u->pfx_opr ) ? 32 : 16; 1024 u->adr_mode = ( u->pfx_adr ) ? 32 : 16; 1025 } 1026 1027 return 0; 1028} 1029 1030 1031static UD_INLINE int 1032decode_insn(struct ud *u, uint16_t ptr) 1033{ 1034 UD_ASSERT((ptr & 0x8000) == 0); 1035 u->itab_entry = &ud_itab[ ptr ]; 1036 u->mnemonic = u->itab_entry->mnemonic; 1037 return (resolve_pfx_str(u) == 0 && 1038 resolve_mode(u) == 0 && 1039 decode_operands(u) == 0 && 1040 resolve_mnemonic(u) == 0) ? 0 : -1; 1041} 1042 1043 1044/* 1045 * decode_3dnow() 1046 * 1047 * Decoding 3dnow is a little tricky because of its strange opcode 1048 * structure. The final opcode disambiguation depends on the last 1049 * byte that comes after the operands have been decoded. Fortunately, 1050 * all 3dnow instructions have the same set of operand types. So we 1051 * go ahead and decode the instruction by picking an arbitrarily chosen 1052 * valid entry in the table, decode the operands, and read the final 1053 * byte to resolve the menmonic. 1054 */ 1055static UD_INLINE int 1056decode_3dnow(struct ud* u) 1057{ 1058 uint16_t ptr; 1059 UD_ASSERT(u->le->type == UD_TAB__OPC_3DNOW); 1060 UD_ASSERT(u->le->table[0xc] != 0); 1061 decode_insn(u, u->le->table[0xc]); 1062 inp_next(u); 1063 if (u->error) { 1064 return -1; 1065 } 1066 ptr = u->le->table[inp_curr(u)]; 1067 UD_ASSERT((ptr & 0x8000) == 0); 1068 u->mnemonic = ud_itab[ptr].mnemonic; 1069 return 0; 1070} 1071 1072 1073static int 1074decode_ssepfx(struct ud *u) 1075{ 1076 uint8_t idx; 1077 uint8_t pfx; 1078 1079 /* 1080 * String prefixes (f2, f3) take precedence over operand 1081 * size prefix (66). 1082 */ 1083 pfx = u->pfx_str; 1084 if (pfx == 0) { 1085 pfx = u->pfx_opr; 1086 } 1087 idx = ((pfx & 0xf) + 1) / 2; 1088 if (u->le->table[idx] == 0) { 1089 idx = 0; 1090 } 1091 if (idx && u->le->table[idx] != 0) { 1092 /* 1093 * "Consume" the prefix as a part of the opcode, so it is no 1094 * longer exported as an instruction prefix. 1095 */ 1096 u->pfx_str = 0; 1097 if (pfx == 0x66) { 1098 /* 1099 * consume "66" only if it was used for decoding, leaving 1100 * it to be used as an operands size override for some 1101 * simd instructions. 1102 */ 1103 u->pfx_opr = 0; 1104 } 1105 } 1106 return decode_ext(u, u->le->table[idx]); 1107} 1108 1109 1110static int 1111decode_vex(struct ud *u) 1112{ 1113 uint8_t index; 1114 if (u->dis_mode != 64 && MODRM_MOD(inp_peek(u)) != 0x3) { 1115 index = 0; 1116 } else { 1117 u->vex_op = inp_curr(u); 1118 u->vex_b1 = inp_next(u); 1119 if (u->vex_op == 0xc4) { 1120 uint8_t pp, m; 1121 /* 3-byte vex */ 1122 u->vex_b2 = inp_next(u); 1123 UD_RETURN_ON_ERROR(u); 1124 m = u->vex_b1 & 0x1f; 1125 if (m == 0 || m > 3) { 1126 UD_RETURN_WITH_ERROR(u, "reserved vex.m-mmmm value"); 1127 } 1128 pp = u->vex_b2 & 0x3; 1129 index = (pp << 2) | m; 1130 } else { 1131 /* 2-byte vex */ 1132 UD_ASSERT(u->vex_op == 0xc5); 1133 index = 0x1 | ((u->vex_b1 & 0x3) << 2); 1134 } 1135 } 1136 return decode_ext(u, u->le->table[index]); 1137} 1138 1139 1140/* 1141 * decode_ext() 1142 * 1143 * Decode opcode extensions (if any) 1144 */ 1145static int 1146decode_ext(struct ud *u, uint16_t ptr) 1147{ 1148 uint8_t idx = 0; 1149 if ((ptr & 0x8000) == 0) { 1150 return decode_insn(u, ptr); 1151 } 1152 u->le = &ud_lookup_table_list[(~0x8000 & ptr)]; 1153 if (u->le->type == UD_TAB__OPC_3DNOW) { 1154 return decode_3dnow(u); 1155 } 1156 1157 switch (u->le->type) { 1158 case UD_TAB__OPC_MOD: 1159 /* !11 = 0, 11 = 1 */ 1160 idx = (MODRM_MOD(modrm(u)) + 1) / 4; 1161 break; 1162 /* disassembly mode/operand size/address size based tables. 1163 * 16 = 0,, 32 = 1, 64 = 2 1164 */ 1165 case UD_TAB__OPC_MODE: 1166 idx = u->dis_mode != 64 ? 0 : 1; 1167 break; 1168 case UD_TAB__OPC_OSIZE: 1169 idx = eff_opr_mode(u->dis_mode, REX_W(u->pfx_rex), u->pfx_opr) / 32; 1170 break; 1171 case UD_TAB__OPC_ASIZE: 1172 idx = eff_adr_mode(u->dis_mode, u->pfx_adr) / 32; 1173 break; 1174 case UD_TAB__OPC_X87: 1175 idx = modrm(u) - 0xC0; 1176 break; 1177 case UD_TAB__OPC_VENDOR: 1178 if (u->vendor == UD_VENDOR_ANY) { 1179 /* choose a valid entry */ 1180 idx = (u->le->table[idx] != 0) ? 0 : 1; 1181 } else if (u->vendor == UD_VENDOR_AMD) { 1182 idx = 0; 1183 } else { 1184 idx = 1; 1185 } 1186 break; 1187 case UD_TAB__OPC_RM: 1188 idx = MODRM_RM(modrm(u)); 1189 break; 1190 case UD_TAB__OPC_REG: 1191 idx = MODRM_REG(modrm(u)); 1192 break; 1193 case UD_TAB__OPC_SSE: 1194 return decode_ssepfx(u); 1195 case UD_TAB__OPC_VEX: 1196 return decode_vex(u); 1197 case UD_TAB__OPC_VEX_W: 1198 idx = vex_w(u); 1199 break; 1200 case UD_TAB__OPC_VEX_L: 1201 idx = vex_l(u); 1202 break; 1203 case UD_TAB__OPC_TABLE: 1204 inp_next(u); 1205 return decode_opcode(u); 1206 default: 1207 UD_ASSERT(!"not reached"); 1208 break; 1209 } 1210 1211 return decode_ext(u, u->le->table[idx]); 1212} 1213 1214 1215static int 1216decode_opcode(struct ud *u) 1217{ 1218 uint16_t ptr; 1219 UD_ASSERT(u->le->type == UD_TAB__OPC_TABLE); 1220 UD_RETURN_ON_ERROR(u); 1221 ptr = u->le->table[inp_curr(u)]; 1222 return decode_ext(u, ptr); 1223} 1224 1225 1226/* ============================================================================= 1227 * ud_decode() - Instruction decoder. Returns the number of bytes decoded. 1228 * ============================================================================= 1229 */ 1230unsigned int 1231ud_decode(struct ud *u) 1232{ 1233 inp_start(u); 1234 clear_insn(u); 1235 u->le = &ud_lookup_table_list[0]; 1236 u->error = decode_prefixes(u) == -1 || 1237 decode_opcode(u) == -1 || 1238 u->error; 1239 /* Handle decode error. */ 1240 if (u->error) { 1241 /* clear out the decode data. */ 1242 clear_insn(u); 1243 /* mark the sequence of bytes as invalid. */ 1244 u->itab_entry = &ud_itab[0]; /* entry 0 is invalid */ 1245 u->mnemonic = u->itab_entry->mnemonic; 1246 } 1247 1248 /* maybe this stray segment override byte 1249 * should be spewed out? 1250 */ 1251 if ( !P_SEG( u->itab_entry->prefix ) && 1252 u->operand[0].type != UD_OP_MEM && 1253 u->operand[1].type != UD_OP_MEM ) 1254 u->pfx_seg = 0; 1255 1256 u->insn_offset = u->pc; /* set offset of instruction */ 1257 u->asm_buf_fill = 0; /* set translation buffer index to 0 */ 1258 u->pc += u->inp_ctr; /* move program counter by bytes decoded */ 1259 1260 /* return number of bytes disassembled. */ 1261 return u->inp_ctr; 1262} 1263 1264/* 1265vim: set ts=2 sw=2 expandtab 1266*/ 1267