1/* udis86 - libudis86/decode.c
2 *
3 * Copyright (c) 2002-2009 Vivek Thampi
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without modification,
7 * are permitted provided that the following conditions are met:
8 *
9 *     * Redistributions of source code must retain the above copyright notice,
10 *       this list of conditions and the following disclaimer.
11 *     * Redistributions in binary form must reproduce the above copyright notice,
12 *       this list of conditions and the following disclaimer in the documentation
13 *       and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
19 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
22 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26#include "udint.h"
27#include "types.h"
28#include "decode.h"
29#include "extern.h"
30
31#ifndef __UD_STANDALONE__
32# include <string.h>
33#endif /* __UD_STANDALONE__ */
34
35/* The max number of prefixes to an instruction */
36#define MAX_PREFIXES    15
37
38/* rex prefix bits */
39#define REX_W(r)        ( ( 0xF & ( r ) )  >> 3 )
40#define REX_R(r)        ( ( 0x7 & ( r ) )  >> 2 )
41#define REX_X(r)        ( ( 0x3 & ( r ) )  >> 1 )
42#define REX_B(r)        ( ( 0x1 & ( r ) )  >> 0 )
43#define REX_PFX_MASK(n) ( ( P_REXW(n) << 3 ) | \
44                          ( P_REXR(n) << 2 ) | \
45                          ( P_REXX(n) << 1 ) | \
46                          ( P_REXB(n) << 0 ) )
47
48/* scable-index-base bits */
49#define SIB_S(b)        ( ( b ) >> 6 )
50#define SIB_I(b)        ( ( ( b ) >> 3 ) & 7 )
51#define SIB_B(b)        ( ( b ) & 7 )
52
53/* modrm bits */
54#define MODRM_REG(b)    ( ( ( b ) >> 3 ) & 7 )
55#define MODRM_NNN(b)    ( ( ( b ) >> 3 ) & 7 )
56#define MODRM_MOD(b)    ( ( ( b ) >> 6 ) & 3 )
57#define MODRM_RM(b)     ( ( b ) & 7 )
58
59static int decode_ext(struct ud *u, uint16_t ptr);
60static int decode_opcode(struct ud *u);
61
62enum reg_class { /* register classes */
63  REGCLASS_GPR,
64  REGCLASS_MMX,
65  REGCLASS_CR,
66  REGCLASS_DB,
67  REGCLASS_SEG,
68  REGCLASS_XMM
69};
70
71 /*
72 * inp_start
73 *    Should be called before each de-code operation.
74 */
75static void
76inp_start(struct ud *u)
77{
78  u->inp_ctr = 0;
79}
80
81static uint8_t
82inp_peek(struct ud *u)
83{
84  if (u->inp_end == 0) {
85    if (u->inp_buf != NULL) {
86      if (u->inp_buf_index < u->inp_buf_size) {
87        return u->inp_buf[u->inp_buf_index];
88      }
89    } else if (u->inp_peek != UD_EOI) {
90      return u->inp_peek;
91    } else {
92      int c;
93      if ((c = u->inp_hook(u)) != UD_EOI) {
94        u->inp_peek = c;
95        return u->inp_peek;
96      }
97    }
98  }
99  u->inp_end = 1;
100  UDERR(u, "byte expected, eoi received\n");
101  return 0;
102}
103
104static uint8_t
105inp_next(struct ud *u)
106{
107  if (u->inp_end == 0) {
108    if (u->inp_buf != NULL) {
109      if (u->inp_buf_index < u->inp_buf_size) {
110        u->inp_ctr++;
111        return (u->inp_curr = u->inp_buf[u->inp_buf_index++]);
112      }
113    } else {
114      int c = u->inp_peek;
115      if (c != UD_EOI || (c = u->inp_hook(u)) != UD_EOI) {
116        u->inp_peek = UD_EOI;
117        u->inp_curr = c;
118        u->inp_sess[u->inp_ctr++] = u->inp_curr;
119        return u->inp_curr;
120      }
121    }
122  }
123  u->inp_end = 1;
124  UDERR(u, "byte expected, eoi received\n");
125  return 0;
126}
127
128static uint8_t
129inp_curr(struct ud *u)
130{
131  return u->inp_curr;
132}
133
134
135/*
136 * inp_uint8
137 * int_uint16
138 * int_uint32
139 * int_uint64
140 *    Load little-endian values from input
141 */
142static uint8_t
143inp_uint8(struct ud* u)
144{
145  return inp_next(u);
146}
147
148static uint16_t
149inp_uint16(struct ud* u)
150{
151  uint16_t r, ret;
152
153  ret = inp_next(u);
154  r = inp_next(u);
155  return ret | (r << 8);
156}
157
158static uint32_t
159inp_uint32(struct ud* u)
160{
161  uint32_t r, ret;
162
163  ret = inp_next(u);
164  r = inp_next(u);
165  ret = ret | (r << 8);
166  r = inp_next(u);
167  ret = ret | (r << 16);
168  r = inp_next(u);
169  return ret | (r << 24);
170}
171
172static uint64_t
173inp_uint64(struct ud* u)
174{
175  uint64_t r, ret;
176
177  ret = inp_next(u);
178  r = inp_next(u);
179  ret = ret | (r << 8);
180  r = inp_next(u);
181  ret = ret | (r << 16);
182  r = inp_next(u);
183  ret = ret | (r << 24);
184  r = inp_next(u);
185  ret = ret | (r << 32);
186  r = inp_next(u);
187  ret = ret | (r << 40);
188  r = inp_next(u);
189  ret = ret | (r << 48);
190  r = inp_next(u);
191  return ret | (r << 56);
192}
193
194
195static UD_INLINE int
196eff_opr_mode(int dis_mode, int rex_w, int pfx_opr)
197{
198  if (dis_mode == 64) {
199    return rex_w ? 64 : (pfx_opr ? 16 : 32);
200  } else if (dis_mode == 32) {
201    return pfx_opr ? 16 : 32;
202  } else {
203    UD_ASSERT(dis_mode == 16);
204    return pfx_opr ? 32 : 16;
205  }
206}
207
208
209static UD_INLINE int
210eff_adr_mode(int dis_mode, int pfx_adr)
211{
212  if (dis_mode == 64) {
213    return pfx_adr ? 32 : 64;
214  } else if (dis_mode == 32) {
215    return pfx_adr ? 16 : 32;
216  } else {
217    UD_ASSERT(dis_mode == 16);
218    return pfx_adr ? 32 : 16;
219  }
220}
221
222
223/*
224 * decode_prefixes
225 *
226 *  Extracts instruction prefixes.
227 */
228static int
229decode_prefixes(struct ud *u)
230{
231  int done = 0;
232  uint8_t curr = 0, last = 0;
233  UD_RETURN_ON_ERROR(u);
234
235  do {
236    last = curr;
237    curr = inp_next(u);
238    UD_RETURN_ON_ERROR(u);
239    if (u->inp_ctr == MAX_INSN_LENGTH) {
240      UD_RETURN_WITH_ERROR(u, "max instruction length");
241    }
242
243    switch (curr)
244    {
245    case 0x2E:
246      u->pfx_seg = UD_R_CS;
247      break;
248    case 0x36:
249      u->pfx_seg = UD_R_SS;
250      break;
251    case 0x3E:
252      u->pfx_seg = UD_R_DS;
253      break;
254    case 0x26:
255      u->pfx_seg = UD_R_ES;
256      break;
257    case 0x64:
258      u->pfx_seg = UD_R_FS;
259      break;
260    case 0x65:
261      u->pfx_seg = UD_R_GS;
262      break;
263    case 0x67: /* adress-size override prefix */
264      u->pfx_adr = 0x67;
265      break;
266    case 0xF0:
267      u->pfx_lock = 0xF0;
268      break;
269    case 0x66:
270      u->pfx_opr = 0x66;
271      break;
272    case 0xF2:
273      u->pfx_str = 0xf2;
274      break;
275    case 0xF3:
276      u->pfx_str = 0xf3;
277      break;
278    default:
279      /* consume if rex */
280      done = (u->dis_mode == 64 && (curr & 0xF0) == 0x40) ? 0 : 1;
281      break;
282    }
283  } while (!done);
284  /* rex prefixes in 64bit mode, must be the last prefix */
285  if (u->dis_mode == 64 && (last & 0xF0) == 0x40) {
286    u->pfx_rex = last;
287  }
288  return 0;
289}
290
291
292/*
293 * vex_l, vex_w
294 *  Return the vex.L and vex.W bits
295 */
296static UD_INLINE uint8_t
297vex_l(const struct ud *u)
298{
299  UD_ASSERT(u->vex_op != 0);
300  return ((u->vex_op == 0xc4 ? u->vex_b2 : u->vex_b1) >> 2) & 1;
301}
302
303static UD_INLINE uint8_t
304vex_w(const struct ud *u)
305{
306  UD_ASSERT(u->vex_op != 0);
307  return u->vex_op == 0xc4 ? ((u->vex_b2 >> 7) & 1) : 0;
308}
309
310
311static UD_INLINE uint8_t
312modrm(struct ud * u)
313{
314    if ( !u->have_modrm ) {
315        u->modrm = inp_next( u );
316        u->modrm_offset = (uint8_t) (u->inp_ctr - 1);
317        u->have_modrm = 1;
318    }
319    return u->modrm;
320}
321
322
323static unsigned int
324resolve_operand_size(const struct ud* u, ud_operand_size_t osize)
325{
326  switch (osize) {
327  case SZ_V:
328    return u->opr_mode;
329  case SZ_Z:
330    return u->opr_mode == 16 ? 16 : 32;
331  case SZ_Y:
332    return u->opr_mode == 16 ? 32 : u->opr_mode;
333  case SZ_RDQ:
334    return u->dis_mode == 64 ? 64 : 32;
335  case SZ_X:
336    UD_ASSERT(u->vex_op != 0);
337    return (P_VEXL(u->itab_entry->prefix) && vex_l(u)) ?  SZ_QQ : SZ_DQ;
338  default:
339    return osize;
340  }
341}
342
343
344static int resolve_mnemonic( struct ud* u )
345{
346  /* resolve 3dnow weirdness. */
347  if ( u->mnemonic == UD_I3dnow ) {
348    u->mnemonic = ud_itab[ u->le->table[ inp_curr( u )  ] ].mnemonic;
349  }
350  /* SWAPGS is only valid in 64bits mode */
351  if ( u->mnemonic == UD_Iswapgs && u->dis_mode != 64 ) {
352    UDERR(u, "swapgs invalid in 64bits mode\n");
353    return -1;
354  }
355
356  if (u->mnemonic == UD_Ixchg) {
357    if ((u->operand[0].type == UD_OP_REG && u->operand[0].base == UD_R_AX  &&
358         u->operand[1].type == UD_OP_REG && u->operand[1].base == UD_R_AX) ||
359        (u->operand[0].type == UD_OP_REG && u->operand[0].base == UD_R_EAX &&
360         u->operand[1].type == UD_OP_REG && u->operand[1].base == UD_R_EAX)) {
361      u->operand[0].type = UD_NONE;
362      u->operand[1].type = UD_NONE;
363      u->mnemonic = UD_Inop;
364    }
365  }
366
367  if (u->mnemonic == UD_Inop && u->pfx_repe) {
368    u->pfx_repe = 0;
369    u->mnemonic = UD_Ipause;
370  }
371  return 0;
372}
373
374
375/* -----------------------------------------------------------------------------
376 * decode_a()- Decodes operands of the type seg:offset
377 * -----------------------------------------------------------------------------
378 */
379static void
380decode_a(struct ud* u, struct ud_operand *op)
381{
382  if (u->opr_mode == 16) {
383    /* seg16:off16 */
384    op->type = UD_OP_PTR;
385    op->size = 32;
386    op->lval.ptr.off = inp_uint16(u);
387    op->lval.ptr.seg = inp_uint16(u);
388  } else {
389    /* seg16:off32 */
390    op->type = UD_OP_PTR;
391    op->size = 48;
392    op->lval.ptr.off = inp_uint32(u);
393    op->lval.ptr.seg = inp_uint16(u);
394  }
395}
396
397/* -----------------------------------------------------------------------------
398 * decode_gpr() - Returns decoded General Purpose Register
399 * -----------------------------------------------------------------------------
400 */
401static enum ud_type
402decode_gpr(register struct ud* u, unsigned int s, unsigned char rm)
403{
404  switch (s) {
405    case 64:
406        return UD_R_RAX + rm;
407    case 32:
408        return UD_R_EAX + rm;
409    case 16:
410        return UD_R_AX  + rm;
411    case  8:
412        if (u->dis_mode == 64 && u->pfx_rex) {
413            if (rm >= 4)
414                return UD_R_SPL + (rm-4);
415            return UD_R_AL + rm;
416        } else return UD_R_AL + rm;
417    case 0:
418        /* invalid size in case of a decode error */
419        UD_ASSERT(u->error);
420        return UD_NONE;
421    default:
422        UD_ASSERT(!"invalid operand size");
423        return UD_NONE;
424  }
425}
426
427static void
428decode_reg(struct ud *u,
429           struct ud_operand *opr,
430           int type,
431           int num,
432           int size)
433{
434  int reg;
435  size = resolve_operand_size(u, size);
436  switch (type) {
437    case REGCLASS_GPR : reg = decode_gpr(u, size, num); break;
438    case REGCLASS_MMX : reg = UD_R_MM0  + (num & 7); break;
439    case REGCLASS_XMM :
440      reg = num + (size == SZ_QQ ? UD_R_YMM0 : UD_R_XMM0);
441      break;
442    case REGCLASS_CR : reg = UD_R_CR0  + num; break;
443    case REGCLASS_DB : reg = UD_R_DR0  + num; break;
444    case REGCLASS_SEG : {
445      /*
446       * Only 6 segment registers, anything else is an error.
447       */
448      if ((num & 7) > 5) {
449        UDERR(u, "invalid segment register value\n");
450        return;
451      } else {
452        reg = UD_R_ES + (num & 7);
453      }
454      break;
455    }
456    default:
457      UD_ASSERT(!"invalid register type");
458      return;
459  }
460  opr->type = UD_OP_REG;
461  opr->base = reg;
462  opr->size = size;
463}
464
465
466/*
467 * decode_imm
468 *
469 *    Decode Immediate values.
470 */
471static void
472decode_imm(struct ud* u, unsigned int size, struct ud_operand *op)
473{
474  op->size = resolve_operand_size(u, size);
475  op->type = UD_OP_IMM;
476
477  switch (op->size) {
478  case  8: op->lval.sbyte = inp_uint8(u);   break;
479  case 16: op->lval.uword = inp_uint16(u);  break;
480  case 32: op->lval.udword = inp_uint32(u); break;
481  case 64: op->lval.uqword = inp_uint64(u); break;
482  default: return;
483  }
484}
485
486
487/*
488 * decode_mem_disp
489 *
490 *    Decode mem address displacement.
491 */
492static void
493decode_mem_disp(struct ud* u, unsigned int size, struct ud_operand *op)
494{
495  switch (size) {
496  case 8:
497    op->offset = 8;
498    op->lval.ubyte  = inp_uint8(u);
499    break;
500  case 16:
501    op->offset = 16;
502    op->lval.uword  = inp_uint16(u);
503    break;
504  case 32:
505    op->offset = 32;
506    op->lval.udword = inp_uint32(u);
507    break;
508  case 64:
509    op->offset = 64;
510    op->lval.uqword = inp_uint64(u);
511    break;
512  default:
513      return;
514  }
515}
516
517
518/*
519 * decode_modrm_reg
520 *
521 *    Decodes reg field of mod/rm byte
522 *
523 */
524static UD_INLINE void
525decode_modrm_reg(struct ud         *u,
526                 struct ud_operand *operand,
527                 unsigned int       type,
528                 unsigned int       size)
529{
530  uint8_t reg = (REX_R(u->_rex) << 3) | MODRM_REG(modrm(u));
531  decode_reg(u, operand, type, reg, size);
532}
533
534
535/*
536 * decode_modrm_rm
537 *
538 *    Decodes rm field of mod/rm byte
539 *
540 */
541static void
542decode_modrm_rm(struct ud         *u,
543                struct ud_operand *op,
544                unsigned char      type,    /* register type */
545                unsigned int       size)    /* operand size */
546
547{
548  size_t offset = 0;
549  unsigned char mod, rm;
550
551  /* get mod, r/m and reg fields */
552  mod = MODRM_MOD(modrm(u));
553  rm  = (REX_B(u->_rex) << 3) | MODRM_RM(modrm(u));
554
555  /*
556   * If mod is 11b, then the modrm.rm specifies a register.
557   *
558   */
559  if (mod == 3) {
560    decode_reg(u, op, type, rm, size);
561    return;
562  }
563
564  /*
565   * !11b => Memory Address
566   */
567  op->type = UD_OP_MEM;
568  op->size = resolve_operand_size(u, size);
569
570  if (u->adr_mode == 64) {
571    op->base = UD_R_RAX + rm;
572    if (mod == 1) {
573      offset = 8;
574    } else if (mod == 2) {
575      offset = 32;
576    } else if (mod == 0 && (rm & 7) == 5) {
577      op->base = UD_R_RIP;
578      offset = 32;
579    } else {
580      offset = 0;
581    }
582    /*
583     * Scale-Index-Base (SIB)
584     */
585    if ((rm & 7) == 4) {
586      inp_next(u);
587
588      op->base  = UD_R_RAX + (SIB_B(inp_curr(u)) | (REX_B(u->_rex) << 3));
589      op->index = UD_R_RAX + (SIB_I(inp_curr(u)) | (REX_X(u->_rex) << 3));
590      /* special conditions for base reference */
591      if (op->index == UD_R_RSP) {
592        op->index = UD_NONE;
593        op->scale = UD_NONE;
594      } else {
595        op->scale = (1 << SIB_S(inp_curr(u))) & ~1;
596      }
597
598      if (op->base == UD_R_RBP || op->base == UD_R_R13) {
599        if (mod == 0) {
600          op->base = UD_NONE;
601        }
602        if (mod == 1) {
603          offset = 8;
604        } else {
605          offset = 32;
606        }
607      }
608    } else {
609        op->scale = UD_NONE;
610        op->index = UD_NONE;
611    }
612  } else if (u->adr_mode == 32) {
613    op->base = UD_R_EAX + rm;
614    if (mod == 1) {
615      offset = 8;
616    } else if (mod == 2) {
617      offset = 32;
618    } else if (mod == 0 && rm == 5) {
619      op->base = UD_NONE;
620      offset = 32;
621    } else {
622      offset = 0;
623    }
624
625    /* Scale-Index-Base (SIB) */
626    if ((rm & 7) == 4) {
627      inp_next(u);
628
629      op->scale = (1 << SIB_S(inp_curr(u))) & ~1;
630      op->index = UD_R_EAX + (SIB_I(inp_curr(u)) | (REX_X(u->pfx_rex) << 3));
631      op->base  = UD_R_EAX + (SIB_B(inp_curr(u)) | (REX_B(u->pfx_rex) << 3));
632
633      if (op->index == UD_R_ESP) {
634        op->index = UD_NONE;
635        op->scale = UD_NONE;
636      }
637
638      /* special condition for base reference */
639      if (op->base == UD_R_EBP) {
640        if (mod == 0) {
641          op->base = UD_NONE;
642        }
643        if (mod == 1) {
644          offset = 8;
645        } else {
646          offset = 32;
647        }
648      }
649    } else {
650      op->scale = UD_NONE;
651      op->index = UD_NONE;
652    }
653  } else {
654    const unsigned int bases[]   = { UD_R_BX, UD_R_BX, UD_R_BP, UD_R_BP,
655                                     UD_R_SI, UD_R_DI, UD_R_BP, UD_R_BX };
656    const unsigned int indices[] = { UD_R_SI, UD_R_DI, UD_R_SI, UD_R_DI,
657                                     UD_NONE, UD_NONE, UD_NONE, UD_NONE };
658    op->base  = bases[rm & 7];
659    op->index = indices[rm & 7];
660    op->scale = UD_NONE;
661    if (mod == 0 && rm == 6) {
662      offset = 16;
663      op->base = UD_NONE;
664    } else if (mod == 1) {
665      offset = 8;
666    } else if (mod == 2) {
667      offset = 16;
668    }
669  }
670
671  if (offset) {
672    decode_mem_disp(u, offset, op);
673  } else {
674    op->offset = 0;
675  }
676}
677
678
679/*
680 * decode_moffset
681 *    Decode offset-only memory operand
682 */
683static void
684decode_moffset(struct ud *u, unsigned int size, struct ud_operand *opr)
685{
686  opr->type  = UD_OP_MEM;
687  opr->base  = UD_NONE;
688  opr->index = UD_NONE;
689  opr->scale = UD_NONE;
690  opr->size  = resolve_operand_size(u, size);
691  decode_mem_disp(u, u->adr_mode, opr);
692}
693
694
695static void
696decode_vex_vvvv(struct ud *u, struct ud_operand *opr, unsigned size)
697{
698  uint8_t vvvv;
699  UD_ASSERT(u->vex_op != 0);
700  vvvv = ((u->vex_op == 0xc4 ? u->vex_b2 : u->vex_b1) >> 3) & 0xf;
701  decode_reg(u, opr, REGCLASS_XMM, (0xf & ~vvvv), size);
702}
703
704
705/*
706 * decode_vex_immreg
707 *    Decode source operand encoded in immediate byte [7:4]
708 */
709static int
710decode_vex_immreg(struct ud *u, struct ud_operand *opr, unsigned size)
711{
712  uint8_t imm  = inp_next(u);
713  uint8_t mask = u->dis_mode == 64 ? 0xf : 0x7;
714  UD_RETURN_ON_ERROR(u);
715  UD_ASSERT(u->vex_op != 0);
716  decode_reg(u, opr, REGCLASS_XMM, mask & (imm >> 4), size);
717  return 0;
718}
719
720
721/*
722 * decode_operand
723 *
724 *      Decodes a single operand.
725 *      Returns the type of the operand (UD_NONE if none)
726 */
727static int
728decode_operand(struct ud           *u,
729               struct ud_operand   *operand,
730               enum ud_operand_code type,
731               unsigned int         size)
732{
733  operand->type = UD_NONE;
734  operand->_oprcode = type;
735
736  switch (type) {
737    case OP_A :
738      decode_a(u, operand);
739      break;
740    case OP_MR:
741      decode_modrm_rm(u, operand, REGCLASS_GPR,
742                      MODRM_MOD(modrm(u)) == 3 ?
743                        Mx_reg_size(size) : Mx_mem_size(size));
744      break;
745    case OP_F:
746      u->br_far  = 1;
747      /* intended fall through */
748    case OP_M:
749      if (MODRM_MOD(modrm(u)) == 3) {
750        UDERR(u, "expected modrm.mod != 3\n");
751      }
752      /* intended fall through */
753    case OP_E:
754      decode_modrm_rm(u, operand, REGCLASS_GPR, size);
755      break;
756    case OP_G:
757      decode_modrm_reg(u, operand, REGCLASS_GPR, size);
758      break;
759    case OP_sI:
760    case OP_I:
761      decode_imm(u, size, operand);
762      break;
763    case OP_I1:
764      operand->type = UD_OP_CONST;
765      operand->lval.udword = 1;
766      break;
767    case OP_N:
768      if (MODRM_MOD(modrm(u)) != 3) {
769        UDERR(u, "expected modrm.mod == 3\n");
770      }
771      /* intended fall through */
772    case OP_Q:
773      decode_modrm_rm(u, operand, REGCLASS_MMX, size);
774      break;
775    case OP_P:
776      decode_modrm_reg(u, operand, REGCLASS_MMX, size);
777      break;
778    case OP_U:
779      if (MODRM_MOD(modrm(u)) != 3) {
780        UDERR(u, "expected modrm.mod == 3\n");
781      }
782      /* intended fall through */
783    case OP_W:
784      decode_modrm_rm(u, operand, REGCLASS_XMM, size);
785      break;
786    case OP_V:
787      decode_modrm_reg(u, operand, REGCLASS_XMM, size);
788      break;
789    case OP_H:
790      decode_vex_vvvv(u, operand, size);
791      break;
792    case OP_MU:
793      decode_modrm_rm(u, operand, REGCLASS_XMM,
794                      MODRM_MOD(modrm(u)) == 3 ?
795                        Mx_reg_size(size) : Mx_mem_size(size));
796      break;
797    case OP_S:
798      decode_modrm_reg(u, operand, REGCLASS_SEG, size);
799      break;
800    case OP_O:
801      decode_moffset(u, size, operand);
802      break;
803    case OP_R0:
804    case OP_R1:
805    case OP_R2:
806    case OP_R3:
807    case OP_R4:
808    case OP_R5:
809    case OP_R6:
810    case OP_R7:
811      decode_reg(u, operand, REGCLASS_GPR,
812                 (REX_B(u->_rex) << 3) | (type - OP_R0), size);
813      break;
814    case OP_AL:
815    case OP_AX:
816    case OP_eAX:
817    case OP_rAX:
818      decode_reg(u, operand, REGCLASS_GPR, 0, size);
819      break;
820    case OP_CL:
821    case OP_CX:
822    case OP_eCX:
823      decode_reg(u, operand, REGCLASS_GPR, 1, size);
824      break;
825    case OP_DL:
826    case OP_DX:
827    case OP_eDX:
828      decode_reg(u, operand, REGCLASS_GPR, 2, size);
829      break;
830    case OP_ES:
831    case OP_CS:
832    case OP_DS:
833    case OP_SS:
834    case OP_FS:
835    case OP_GS:
836      /* in 64bits mode, only fs and gs are allowed */
837      if (u->dis_mode == 64) {
838        if (type != OP_FS && type != OP_GS) {
839          UDERR(u, "invalid segment register in 64bits\n");
840        }
841      }
842      operand->type = UD_OP_REG;
843      operand->base = (type - OP_ES) + UD_R_ES;
844      operand->size = 16;
845      break;
846    case OP_J :
847      decode_imm(u, size, operand);
848      operand->type = UD_OP_JIMM;
849      break ;
850    case OP_R :
851      if (MODRM_MOD(modrm(u)) != 3) {
852        UDERR(u, "expected modrm.mod == 3\n");
853      }
854      decode_modrm_rm(u, operand, REGCLASS_GPR, size);
855      break;
856    case OP_C:
857      decode_modrm_reg(u, operand, REGCLASS_CR, size);
858      break;
859    case OP_D:
860      decode_modrm_reg(u, operand, REGCLASS_DB, size);
861      break;
862    case OP_I3 :
863      operand->type = UD_OP_CONST;
864      operand->lval.sbyte = 3;
865      break;
866    case OP_ST0:
867    case OP_ST1:
868    case OP_ST2:
869    case OP_ST3:
870    case OP_ST4:
871    case OP_ST5:
872    case OP_ST6:
873    case OP_ST7:
874      operand->type = UD_OP_REG;
875      operand->base = (type - OP_ST0) + UD_R_ST0;
876      operand->size = 80;
877      break;
878    case OP_L:
879      decode_vex_immreg(u, operand, size);
880      break;
881    default :
882      operand->type = UD_NONE;
883      break;
884  }
885  return operand->type;
886}
887
888
889/*
890 * decode_operands
891 *
892 *    Disassemble upto 3 operands of the current instruction being
893 *    disassembled. By the end of the function, the operand fields
894 *    of the ud structure will have been filled.
895 */
896static int
897decode_operands(struct ud* u)
898{
899  decode_operand(u, &u->operand[0],
900                    u->itab_entry->operand1.type,
901                    u->itab_entry->operand1.size);
902  if (u->operand[0].type != UD_NONE) {
903      decode_operand(u, &u->operand[1],
904                        u->itab_entry->operand2.type,
905                        u->itab_entry->operand2.size);
906  }
907  if (u->operand[1].type != UD_NONE) {
908      decode_operand(u, &u->operand[2],
909                        u->itab_entry->operand3.type,
910                        u->itab_entry->operand3.size);
911  }
912  if (u->operand[2].type != UD_NONE) {
913      decode_operand(u, &u->operand[3],
914                        u->itab_entry->operand4.type,
915                        u->itab_entry->operand4.size);
916  }
917  return 0;
918}
919
920/* -----------------------------------------------------------------------------
921 * clear_insn() - clear instruction structure
922 * -----------------------------------------------------------------------------
923 */
924static void
925clear_insn(register struct ud* u)
926{
927  u->error     = 0;
928  u->pfx_seg   = 0;
929  u->pfx_opr   = 0;
930  u->pfx_adr   = 0;
931  u->pfx_lock  = 0;
932  u->pfx_repne = 0;
933  u->pfx_rep   = 0;
934  u->pfx_repe  = 0;
935  u->pfx_rex   = 0;
936  u->pfx_str   = 0;
937  u->mnemonic  = UD_Inone;
938  u->itab_entry = NULL;
939  u->have_modrm = 0;
940  u->br_far    = 0;
941  u->vex_op    = 0;
942  u->_rex      = 0;
943  u->operand[0].type = UD_NONE;
944  u->operand[1].type = UD_NONE;
945  u->operand[2].type = UD_NONE;
946  u->operand[3].type = UD_NONE;
947}
948
949
950static UD_INLINE int
951resolve_pfx_str(struct ud* u)
952{
953  if (u->pfx_str == 0xf3) {
954    if (P_STR(u->itab_entry->prefix)) {
955        u->pfx_rep  = 0xf3;
956    } else {
957        u->pfx_repe = 0xf3;
958    }
959  } else if (u->pfx_str == 0xf2) {
960    u->pfx_repne = 0xf3;
961  }
962  return 0;
963}
964
965
966static int
967resolve_mode( struct ud* u )
968{
969  int default64;
970  /* if in error state, bail out */
971  if ( u->error ) return -1;
972
973  /* propagate prefix effects */
974  if ( u->dis_mode == 64 ) {  /* set 64bit-mode flags */
975
976    /* Check validity of  instruction m64 */
977    if ( P_INV64( u->itab_entry->prefix ) ) {
978      UDERR(u, "instruction invalid in 64bits\n");
979      return -1;
980    }
981
982    /* compute effective rex based on,
983     *  - vex prefix (if any)
984     *  - rex prefix (if any, and not vex)
985     *  - allowed prefixes specified by the opcode map
986     */
987    if (u->vex_op == 0xc4) {
988        /* vex has rex.rxb in 1's complement */
989        u->_rex = ((~(u->vex_b1 >> 5) & 0x7) /* rex.0rxb */ |
990                   ((u->vex_b2  >> 4) & 0x8) /* rex.w000 */);
991    } else if (u->vex_op == 0xc5) {
992        /* vex has rex.r in 1's complement */
993        u->_rex = (~(u->vex_b1 >> 5)) & 4;
994    } else {
995        UD_ASSERT(u->vex_op == 0);
996        u->_rex = u->pfx_rex;
997    }
998    u->_rex &= REX_PFX_MASK(u->itab_entry->prefix);
999
1000    /* whether this instruction has a default operand size of
1001     * 64bit, also hardcoded into the opcode map.
1002     */
1003    default64 = P_DEF64( u->itab_entry->prefix );
1004    /* calculate effective operand size */
1005    if (REX_W(u->_rex)) {
1006        u->opr_mode = 64;
1007    } else if ( u->pfx_opr ) {
1008        u->opr_mode = 16;
1009    } else {
1010        /* unless the default opr size of instruction is 64,
1011         * the effective operand size in the absence of rex.w
1012         * prefix is 32.
1013         */
1014        u->opr_mode = default64 ? 64 : 32;
1015    }
1016
1017    /* calculate effective address size */
1018    u->adr_mode = (u->pfx_adr) ? 32 : 64;
1019  } else if ( u->dis_mode == 32 ) { /* set 32bit-mode flags */
1020    u->opr_mode = ( u->pfx_opr ) ? 16 : 32;
1021    u->adr_mode = ( u->pfx_adr ) ? 16 : 32;
1022  } else if ( u->dis_mode == 16 ) { /* set 16bit-mode flags */
1023    u->opr_mode = ( u->pfx_opr ) ? 32 : 16;
1024    u->adr_mode = ( u->pfx_adr ) ? 32 : 16;
1025  }
1026
1027  return 0;
1028}
1029
1030
1031static UD_INLINE int
1032decode_insn(struct ud *u, uint16_t ptr)
1033{
1034  UD_ASSERT((ptr & 0x8000) == 0);
1035  u->itab_entry = &ud_itab[ ptr ];
1036  u->mnemonic = u->itab_entry->mnemonic;
1037  return (resolve_pfx_str(u)  == 0 &&
1038          resolve_mode(u)     == 0 &&
1039          decode_operands(u)  == 0 &&
1040          resolve_mnemonic(u) == 0) ? 0 : -1;
1041}
1042
1043
1044/*
1045 * decode_3dnow()
1046 *
1047 *    Decoding 3dnow is a little tricky because of its strange opcode
1048 *    structure. The final opcode disambiguation depends on the last
1049 *    byte that comes after the operands have been decoded. Fortunately,
1050 *    all 3dnow instructions have the same set of operand types. So we
1051 *    go ahead and decode the instruction by picking an arbitrarily chosen
1052 *    valid entry in the table, decode the operands, and read the final
1053 *    byte to resolve the menmonic.
1054 */
1055static UD_INLINE int
1056decode_3dnow(struct ud* u)
1057{
1058  uint16_t ptr;
1059  UD_ASSERT(u->le->type == UD_TAB__OPC_3DNOW);
1060  UD_ASSERT(u->le->table[0xc] != 0);
1061  decode_insn(u, u->le->table[0xc]);
1062  inp_next(u);
1063  if (u->error) {
1064    return -1;
1065  }
1066  ptr = u->le->table[inp_curr(u)];
1067  UD_ASSERT((ptr & 0x8000) == 0);
1068  u->mnemonic = ud_itab[ptr].mnemonic;
1069  return 0;
1070}
1071
1072
1073static int
1074decode_ssepfx(struct ud *u)
1075{
1076  uint8_t idx;
1077  uint8_t pfx;
1078
1079  /*
1080   * String prefixes (f2, f3) take precedence over operand
1081   * size prefix (66).
1082   */
1083  pfx = u->pfx_str;
1084  if (pfx == 0) {
1085    pfx = u->pfx_opr;
1086  }
1087  idx = ((pfx & 0xf) + 1) / 2;
1088  if (u->le->table[idx] == 0) {
1089    idx = 0;
1090  }
1091  if (idx && u->le->table[idx] != 0) {
1092    /*
1093     * "Consume" the prefix as a part of the opcode, so it is no
1094     * longer exported as an instruction prefix.
1095     */
1096    u->pfx_str = 0;
1097    if (pfx == 0x66) {
1098        /*
1099         * consume "66" only if it was used for decoding, leaving
1100         * it to be used as an operands size override for some
1101         * simd instructions.
1102         */
1103        u->pfx_opr = 0;
1104    }
1105  }
1106  return decode_ext(u, u->le->table[idx]);
1107}
1108
1109
1110static int
1111decode_vex(struct ud *u)
1112{
1113  uint8_t index;
1114  if (u->dis_mode != 64 && MODRM_MOD(inp_peek(u)) != 0x3) {
1115    index = 0;
1116  } else {
1117    u->vex_op = inp_curr(u);
1118    u->vex_b1 = inp_next(u);
1119    if (u->vex_op == 0xc4) {
1120      uint8_t pp, m;
1121      /* 3-byte vex */
1122      u->vex_b2 = inp_next(u);
1123      UD_RETURN_ON_ERROR(u);
1124      m  = u->vex_b1 & 0x1f;
1125      if (m == 0 || m > 3) {
1126        UD_RETURN_WITH_ERROR(u, "reserved vex.m-mmmm value");
1127      }
1128      pp = u->vex_b2 & 0x3;
1129      index = (pp << 2) | m;
1130    } else {
1131      /* 2-byte vex */
1132      UD_ASSERT(u->vex_op == 0xc5);
1133      index = 0x1 | ((u->vex_b1 & 0x3) << 2);
1134    }
1135  }
1136  return decode_ext(u, u->le->table[index]);
1137}
1138
1139
1140/*
1141 * decode_ext()
1142 *
1143 *    Decode opcode extensions (if any)
1144 */
1145static int
1146decode_ext(struct ud *u, uint16_t ptr)
1147{
1148  uint8_t idx = 0;
1149  if ((ptr & 0x8000) == 0) {
1150    return decode_insn(u, ptr);
1151  }
1152  u->le = &ud_lookup_table_list[(~0x8000 & ptr)];
1153  if (u->le->type == UD_TAB__OPC_3DNOW) {
1154    return decode_3dnow(u);
1155  }
1156
1157  switch (u->le->type) {
1158    case UD_TAB__OPC_MOD:
1159      /* !11 = 0, 11 = 1 */
1160      idx = (MODRM_MOD(modrm(u)) + 1) / 4;
1161      break;
1162      /* disassembly mode/operand size/address size based tables.
1163       * 16 = 0,, 32 = 1, 64 = 2
1164       */
1165    case UD_TAB__OPC_MODE:
1166      idx = u->dis_mode != 64 ? 0 : 1;
1167      break;
1168    case UD_TAB__OPC_OSIZE:
1169      idx = eff_opr_mode(u->dis_mode, REX_W(u->pfx_rex), u->pfx_opr) / 32;
1170      break;
1171    case UD_TAB__OPC_ASIZE:
1172      idx = eff_adr_mode(u->dis_mode, u->pfx_adr) / 32;
1173      break;
1174    case UD_TAB__OPC_X87:
1175      idx = modrm(u) - 0xC0;
1176      break;
1177    case UD_TAB__OPC_VENDOR:
1178      if (u->vendor == UD_VENDOR_ANY) {
1179        /* choose a valid entry */
1180        idx = (u->le->table[idx] != 0) ? 0 : 1;
1181      } else if (u->vendor == UD_VENDOR_AMD) {
1182        idx = 0;
1183      } else {
1184        idx = 1;
1185      }
1186      break;
1187    case UD_TAB__OPC_RM:
1188      idx = MODRM_RM(modrm(u));
1189      break;
1190    case UD_TAB__OPC_REG:
1191      idx = MODRM_REG(modrm(u));
1192      break;
1193    case UD_TAB__OPC_SSE:
1194      return decode_ssepfx(u);
1195    case UD_TAB__OPC_VEX:
1196      return decode_vex(u);
1197    case UD_TAB__OPC_VEX_W:
1198      idx = vex_w(u);
1199      break;
1200    case UD_TAB__OPC_VEX_L:
1201      idx = vex_l(u);
1202      break;
1203    case UD_TAB__OPC_TABLE:
1204      inp_next(u);
1205      return decode_opcode(u);
1206    default:
1207      UD_ASSERT(!"not reached");
1208      break;
1209  }
1210
1211  return decode_ext(u, u->le->table[idx]);
1212}
1213
1214
1215static int
1216decode_opcode(struct ud *u)
1217{
1218  uint16_t ptr;
1219  UD_ASSERT(u->le->type == UD_TAB__OPC_TABLE);
1220  UD_RETURN_ON_ERROR(u);
1221  ptr = u->le->table[inp_curr(u)];
1222  return decode_ext(u, ptr);
1223}
1224
1225
1226/* =============================================================================
1227 * ud_decode() - Instruction decoder. Returns the number of bytes decoded.
1228 * =============================================================================
1229 */
1230unsigned int
1231ud_decode(struct ud *u)
1232{
1233  inp_start(u);
1234  clear_insn(u);
1235  u->le = &ud_lookup_table_list[0];
1236  u->error = decode_prefixes(u) == -1 ||
1237             decode_opcode(u)   == -1 ||
1238             u->error;
1239  /* Handle decode error. */
1240  if (u->error) {
1241    /* clear out the decode data. */
1242    clear_insn(u);
1243    /* mark the sequence of bytes as invalid. */
1244    u->itab_entry = &ud_itab[0]; /* entry 0 is invalid */
1245    u->mnemonic = u->itab_entry->mnemonic;
1246  }
1247
1248    /* maybe this stray segment override byte
1249     * should be spewed out?
1250     */
1251    if ( !P_SEG( u->itab_entry->prefix ) &&
1252            u->operand[0].type != UD_OP_MEM &&
1253            u->operand[1].type != UD_OP_MEM )
1254        u->pfx_seg = 0;
1255
1256  u->insn_offset = u->pc; /* set offset of instruction */
1257  u->asm_buf_fill = 0;   /* set translation buffer index to 0 */
1258  u->pc += u->inp_ctr;    /* move program counter by bytes decoded */
1259
1260  /* return number of bytes disassembled. */
1261  return u->inp_ctr;
1262}
1263
1264/*
1265vim: set ts=2 sw=2 expandtab
1266*/
1267