1/* -----------------------------------------------------------------------------
2 * decode.c
3 *
4 * Copyright (c) 2005, 2006, Vivek Mohan <vivek@sig9.com>
5 * All rights reserved. See LICENSE
6 * -----------------------------------------------------------------------------
7 */
8
9#include <assert.h>
10#include <string.h>
11
12#include "types.h"
13#include "itab.h"
14#include "input.h"
15#include "decode.h"
16#include "extern.h"
17
18/* The max number of prefixes to an instruction */
19#define MAX_PREFIXES    15
20
21static struct ud_itab_entry ie_invalid = { UD_Iinvalid, O_NONE, O_NONE, O_NONE, P_none };
22static struct ud_itab_entry ie_pause   = { UD_Ipause,   O_NONE, O_NONE, O_NONE, P_none };
23static struct ud_itab_entry ie_nop     = { UD_Inop,     O_NONE, O_NONE, O_NONE, P_none };
24
25
26/* Looks up mnemonic code in the mnemonic string table
27 * Returns NULL if the mnemonic code is invalid
28 */
29const char * ud_lookup_mnemonic( enum ud_mnemonic_code c )
30{
31    if ( c < UD_Id3vil )
32        return ud_mnemonics_str[ c ];
33    return NULL;
34}
35
36
37/* Extracts instruction prefixes.
38 */
39static int get_prefixes( struct ud* u )
40{
41    unsigned int have_pfx = 1;
42    unsigned int i;
43    uint8_t curr;
44
45    /* if in error state, bail out */
46    if ( u->error )
47        return -1;
48
49    /* keep going as long as there are prefixes available */
50    for ( i = 0; have_pfx ; ++i ) {
51
52        /* Get next byte. */
53        inp_next(u);
54        if ( u->error )
55            return -1;
56        curr = inp_curr( u );
57
58        /* rex prefixes in 64bit mode */
59        if ( u->dis_mode == 64 && ( curr & 0xF0 ) == 0x40 ) {
60            u->pfx_rex = curr;
61        } else {
62            switch ( curr )
63            {
64            case 0x2E :
65                u->pfx_seg = UD_R_CS;
66                u->pfx_rex = 0;
67                break;
68            case 0x36 :
69                u->pfx_seg = UD_R_SS;
70                u->pfx_rex = 0;
71                break;
72            case 0x3E :
73                u->pfx_seg = UD_R_DS;
74                u->pfx_rex = 0;
75                break;
76            case 0x26 :
77                u->pfx_seg = UD_R_ES;
78                u->pfx_rex = 0;
79                break;
80            case 0x64 :
81                u->pfx_seg = UD_R_FS;
82                u->pfx_rex = 0;
83                break;
84            case 0x65 :
85                u->pfx_seg = UD_R_GS;
86                u->pfx_rex = 0;
87                break;
88            case 0x67 : /* adress-size override prefix */
89                u->pfx_adr = 0x67;
90                u->pfx_rex = 0;
91                break;
92            case 0xF0 :
93                u->pfx_lock = 0xF0;
94                u->pfx_rex  = 0;
95                break;
96            case 0x66:
97                /* the 0x66 sse prefix is only effective if no other sse prefix
98                 * has already been specified.
99                 */
100                if ( !u->pfx_insn ) u->pfx_insn = 0x66;
101                u->pfx_opr = 0x66;
102                u->pfx_rex = 0;
103                break;
104            case 0xF2:
105                u->pfx_insn  = 0xF2;
106                u->pfx_repne = 0xF2;
107                u->pfx_rex   = 0;
108                break;
109            case 0xF3:
110                u->pfx_insn = 0xF3;
111                u->pfx_rep  = 0xF3;
112                u->pfx_repe = 0xF3;
113                u->pfx_rex  = 0;
114                break;
115            default :
116                /* No more prefixes */
117                have_pfx = 0;
118                break;
119            }
120        }
121
122        /* check if we reached max instruction length */
123        if ( i + 1 == MAX_INSN_LENGTH ) {
124            u->error = 1;
125            break;
126        }
127    }
128
129    /* return status */
130    if ( u->error )
131        return -1;
132
133    /* rewind back one byte in stream, since the above loop
134     * stops with a non-prefix byte.
135     */
136    inp_back(u);
137
138    /* speculatively determine the effective operand mode,
139     * based on the prefixes and the current disassembly
140     * mode. This may be inaccurate, but useful for mode
141     * dependent decoding.
142     */
143    if ( u->dis_mode == 64 ) {
144        u->opr_mode = REX_W( u->pfx_rex ) ? 64 : ( ( u->pfx_opr ) ? 16 : 32 ) ;
145        u->adr_mode = ( u->pfx_adr ) ? 32 : 64;
146    } else if ( u->dis_mode == 32 ) {
147        u->opr_mode = ( u->pfx_opr ) ? 16 : 32;
148        u->adr_mode = ( u->pfx_adr ) ? 16 : 32;
149    } else if ( u->dis_mode == 16 ) {
150        u->opr_mode = ( u->pfx_opr ) ? 32 : 16;
151        u->adr_mode = ( u->pfx_adr ) ? 32 : 16;
152    }
153
154    return 0;
155}
156
157
158/* Searches the instruction tables for the right entry.
159 */
160static int search_itab( struct ud * u )
161{
162    struct ud_itab_entry * e = NULL;
163    enum ud_itab_index table;
164    uint8_t peek;
165    uint8_t did_peek = 0;
166    uint8_t curr;
167    uint8_t index;
168
169    /* if in state of error, return */
170    if ( u->error )
171        return -1;
172
173    /* get first byte of opcode. */
174    inp_next(u);
175    if ( u->error )
176        return -1;
177    curr = inp_curr(u);
178
179    /* resolve xchg, nop, pause crazyness */
180    if ( 0x90 == curr ) {
181        if ( !( u->dis_mode == 64 && REX_B( u->pfx_rex ) ) ) {
182            if ( u->pfx_rep ) {
183                u->pfx_rep = 0;
184                e = & ie_pause;
185            } else {
186                e = & ie_nop;
187            }
188            goto found_entry;
189        }
190    }
191
192    /* get top-level table */
193    if ( 0x0F == curr ) {
194        table = ITAB__0F;
195        curr  = inp_next(u);
196        if ( u->error )
197            return -1;
198
199        /* 2byte opcodes can be modified by 0x66, F3, and F2 prefixes */
200        if ( 0x66 == u->pfx_insn ) {
201            if ( ud_itab_list[ ITAB__PFX_SSE66__0F ][ curr ].mnemonic != UD_Iinvalid ) {
202                table = ITAB__PFX_SSE66__0F;
203                u->pfx_opr = 0;
204            }
205        } else if ( 0xF2 == u->pfx_insn ) {
206            if ( ud_itab_list[ ITAB__PFX_SSEF2__0F ][ curr ].mnemonic != UD_Iinvalid ) {
207                table = ITAB__PFX_SSEF2__0F;
208                u->pfx_repne = 0;
209            }
210        } else if ( 0xF3 == u->pfx_insn ) {
211            if ( ud_itab_list[ ITAB__PFX_SSEF3__0F ][ curr ].mnemonic != UD_Iinvalid ) {
212                table = ITAB__PFX_SSEF3__0F;
213                u->pfx_repe = 0;
214                u->pfx_rep  = 0;
215            }
216        }
217    /* pick an instruction from the 1byte table */
218    } else {
219        table = ITAB__1BYTE;
220    }
221
222    index = curr;
223
224search:
225
226    e = & ud_itab_list[ table ][ index ];
227
228    /* if mnemonic constant is a standard instruction constant
229     * our search is over.
230     */
231
232    if ( e->mnemonic < UD_Id3vil ) {
233        if ( e->mnemonic == UD_Iinvalid ) {
234            if ( did_peek ) {
235                inp_next( u ); if ( u->error ) return -1;
236            }
237            goto found_entry;
238        }
239        goto found_entry;
240    }
241
242    table = e->prefix;
243
244    switch ( e->mnemonic )
245    {
246    case UD_Igrp_reg:
247        peek     = inp_peek( u );
248        did_peek = 1;
249        index    = MODRM_REG( peek );
250        break;
251
252    case UD_Igrp_mod:
253        peek     = inp_peek( u );
254        did_peek = 1;
255        index    = MODRM_MOD( peek );
256        if ( index == 3 )
257           index = ITAB__MOD_INDX__11;
258        else
259           index = ITAB__MOD_INDX__NOT_11;
260        break;
261
262    case UD_Igrp_rm:
263        curr     = inp_next( u );
264        did_peek = 0;
265        if ( u->error )
266            return -1;
267        index    = MODRM_RM( curr );
268        break;
269
270    case UD_Igrp_x87:
271        curr     = inp_next( u );
272        did_peek = 0;
273        if ( u->error )
274            return -1;
275        index    = curr - 0xC0;
276        break;
277
278    case UD_Igrp_osize:
279        if ( u->opr_mode == 64 )
280            index = ITAB__MODE_INDX__64;
281        else if ( u->opr_mode == 32 )
282            index = ITAB__MODE_INDX__32;
283        else
284            index = ITAB__MODE_INDX__16;
285        break;
286
287    case UD_Igrp_asize:
288        if ( u->adr_mode == 64 )
289            index = ITAB__MODE_INDX__64;
290        else if ( u->adr_mode == 32 )
291            index = ITAB__MODE_INDX__32;
292        else
293            index = ITAB__MODE_INDX__16;
294        break;
295
296    case UD_Igrp_mode:
297        if ( u->dis_mode == 64 )
298            index = ITAB__MODE_INDX__64;
299        else if ( u->dis_mode == 32 )
300            index = ITAB__MODE_INDX__32;
301        else
302            index = ITAB__MODE_INDX__16;
303        break;
304
305    case UD_Igrp_vendor:
306        if ( u->vendor == UD_VENDOR_INTEL )
307            index = ITAB__VENDOR_INDX__INTEL;
308        else if ( u->vendor == UD_VENDOR_AMD )
309            index = ITAB__VENDOR_INDX__AMD;
310        else
311            assert( !"unrecognized vendor id" );
312        break;
313
314    case UD_Id3vil:
315        assert( !"invalid instruction mnemonic constant Id3vil" );
316        break;
317
318    default:
319        assert( !"invalid instruction mnemonic constant" );
320        break;
321    }
322
323    goto search;
324
325found_entry:
326
327    u->itab_entry = e;
328    u->mnemonic = u->itab_entry->mnemonic;
329
330    return 0;
331}
332
333
334static unsigned int resolve_operand_size( const struct ud * u, unsigned int s )
335{
336    switch ( s )
337    {
338    case SZ_V:
339        return ( u->opr_mode );
340    case SZ_Z:
341        return ( u->opr_mode == 16 ) ? 16 : 32;
342    case SZ_P:
343        return ( u->opr_mode == 16 ) ? SZ_WP : SZ_DP;
344    case SZ_MDQ:
345        return ( u->opr_mode == 16 ) ? 32 : u->opr_mode;
346    case SZ_RDQ:
347        return ( u->dis_mode == 64 ) ? 64 : 32;
348    default:
349        return s;
350    }
351}
352
353
354static int resolve_mnemonic( struct ud* u )
355{
356  /* far/near flags */
357  u->br_far = 0;
358  u->br_near = 0;
359  /* readjust operand sizes for call/jmp instrcutions */
360  if ( u->mnemonic == UD_Icall || u->mnemonic == UD_Ijmp ) {
361    /* WP: 16bit pointer */
362    if ( u->operand[ 0 ].size == SZ_WP ) {
363        u->operand[ 0 ].size = 16;
364        u->br_far = 1;
365        u->br_near= 0;
366    /* DP: 32bit pointer */
367    } else if ( u->operand[ 0 ].size == SZ_DP ) {
368        u->operand[ 0 ].size = 32;
369        u->br_far = 1;
370        u->br_near= 0;
371    } else {
372        u->br_far = 0;
373        u->br_near= 1;
374    }
375  /* resolve 3dnow weirdness. */
376  } else if ( u->mnemonic == UD_I3dnow ) {
377    u->mnemonic = ud_itab_list[ ITAB__3DNOW ][ inp_curr( u )  ].mnemonic;
378  }
379  /* SWAPGS is only valid in 64bits mode */
380  if ( u->mnemonic == UD_Iswapgs && u->dis_mode != 64 ) {
381    u->error = 1;
382    return -1;
383  }
384
385  return 0;
386}
387
388
389/* -----------------------------------------------------------------------------
390 * decode_a()- Decodes operands of the type seg:offset
391 * -----------------------------------------------------------------------------
392 */
393static void
394decode_a(struct ud* u, struct ud_operand *op)
395{
396  if (u->opr_mode == 16) {
397    /* seg16:off16 */
398    op->type = UD_OP_PTR;
399    op->size = 32;
400    op->lval.ptr.off = inp_uint16(u);
401    op->lval.ptr.seg = inp_uint16(u);
402  } else {
403    /* seg16:off32 */
404    op->type = UD_OP_PTR;
405    op->size = 48;
406    op->lval.ptr.off = inp_uint32(u);
407    op->lval.ptr.seg = inp_uint16(u);
408  }
409}
410
411/* -----------------------------------------------------------------------------
412 * decode_gpr() - Returns decoded General Purpose Register
413 * -----------------------------------------------------------------------------
414 */
415static enum ud_type
416decode_gpr(register struct ud* u, unsigned int s, unsigned char rm)
417{
418  s = resolve_operand_size(u, s);
419
420  switch (s) {
421    case 64:
422        return UD_R_RAX + rm;
423    case SZ_DP:
424    case 32:
425        return UD_R_EAX + rm;
426    case SZ_WP:
427    case 16:
428        return UD_R_AX  + rm;
429    case  8:
430        if (u->dis_mode == 64 && u->pfx_rex) {
431            if (rm >= 4)
432                return UD_R_SPL + (rm-4);
433            return UD_R_AL + rm;
434        } else return UD_R_AL + rm;
435    default:
436        return 0;
437  }
438}
439
440/* -----------------------------------------------------------------------------
441 * resolve_gpr64() - 64bit General Purpose Register-Selection.
442 * -----------------------------------------------------------------------------
443 */
444static enum ud_type
445resolve_gpr64(struct ud* u, enum ud_operand_code gpr_op)
446{
447  if (gpr_op >= OP_rAXr8 && gpr_op <= OP_rDIr15)
448    gpr_op = (gpr_op - OP_rAXr8) | (REX_B(u->pfx_rex) << 3);
449  else  gpr_op = (gpr_op - OP_rAX);
450
451  if (u->opr_mode == 16)
452    return gpr_op + UD_R_AX;
453  if (u->dis_mode == 32 ||
454    (u->opr_mode == 32 && ! (REX_W(u->pfx_rex) || u->default64))) {
455    return gpr_op + UD_R_EAX;
456  }
457
458  return gpr_op + UD_R_RAX;
459}
460
461/* -----------------------------------------------------------------------------
462 * resolve_gpr32 () - 32bit General Purpose Register-Selection.
463 * -----------------------------------------------------------------------------
464 */
465static enum ud_type
466resolve_gpr32(struct ud* u, enum ud_operand_code gpr_op)
467{
468  gpr_op = gpr_op - OP_eAX;
469
470  if (u->opr_mode == 16)
471    return gpr_op + UD_R_AX;
472
473  return gpr_op +  UD_R_EAX;
474}
475
476/* -----------------------------------------------------------------------------
477 * resolve_reg() - Resolves the register type
478 * -----------------------------------------------------------------------------
479 */
480static enum ud_type
481resolve_reg(struct ud* u, unsigned int type, unsigned char i)
482{
483  switch (type) {
484    case T_MMX :    return UD_R_MM0  + (i & 7);
485    case T_XMM :    return UD_R_XMM0 + i;
486    case T_CRG :    return UD_R_CR0  + i;
487    case T_DBG :    return UD_R_DR0  + i;
488    case T_SEG :    return UD_R_ES   + (i & 7);
489    case T_NONE:
490    default:    return UD_NONE;
491  }
492}
493
494/* -----------------------------------------------------------------------------
495 * decode_imm() - Decodes Immediate values.
496 * -----------------------------------------------------------------------------
497 */
498static void
499decode_imm(struct ud* u, unsigned int s, struct ud_operand *op)
500{
501  op->size = resolve_operand_size(u, s);
502  op->type = UD_OP_IMM;
503
504  switch (op->size) {
505    case  8: op->lval.sbyte = inp_uint8(u);   break;
506    case 16: op->lval.uword = inp_uint16(u);  break;
507    case 32: op->lval.udword = inp_uint32(u); break;
508    case 64: op->lval.uqword = inp_uint64(u); break;
509    default: return;
510  }
511}
512
513/* -----------------------------------------------------------------------------
514 * decode_modrm() - Decodes ModRM Byte
515 * -----------------------------------------------------------------------------
516 */
517static void
518decode_modrm(struct ud* u, struct ud_operand *op, unsigned int s,
519         unsigned char rm_type, struct ud_operand *opreg,
520         unsigned char reg_size, unsigned char reg_type)
521{
522  unsigned char mod, rm, reg;
523
524  inp_next(u);
525
526  /* get mod, r/m and reg fields */
527  mod = MODRM_MOD(inp_curr(u));
528  rm  = (REX_B(u->pfx_rex) << 3) | MODRM_RM(inp_curr(u));
529  reg = (REX_R(u->pfx_rex) << 3) | MODRM_REG(inp_curr(u));
530
531  op->size = resolve_operand_size(u, s);
532
533  /* if mod is 11b, then the UD_R_m specifies a gpr/mmx/sse/control/debug */
534  if (mod == 3) {
535    op->type = UD_OP_REG;
536    if (rm_type ==  T_GPR)
537        op->base = decode_gpr(u, op->size, rm);
538    else    op->base = resolve_reg(u, rm_type, (REX_B(u->pfx_rex) << 3) | (rm&7));
539  }
540  /* else its memory addressing */
541  else {
542    op->type = UD_OP_MEM;
543
544    /* 64bit addressing */
545    if (u->adr_mode == 64) {
546
547        op->base = UD_R_RAX + rm;
548
549        /* get offset type */
550        if (mod == 1)
551            op->offset = 8;
552        else if (mod == 2)
553            op->offset = 32;
554        else if (mod == 0 && (rm & 7) == 5) {
555            op->base = UD_R_RIP;
556            op->offset = 32;
557        } else  op->offset = 0;
558
559        /* Scale-Index-Base (SIB) */
560        if ((rm & 7) == 4) {
561            inp_next(u);
562
563            op->scale = (1 << SIB_S(inp_curr(u))) & ~1;
564            op->index = UD_R_RAX + (SIB_I(inp_curr(u)) | (REX_X(u->pfx_rex) << 3));
565            op->base  = UD_R_RAX + (SIB_B(inp_curr(u)) | (REX_B(u->pfx_rex) << 3));
566
567            /* special conditions for base reference */
568            if (op->index == UD_R_RSP) {
569                op->index = UD_NONE;
570                op->scale = UD_NONE;
571            }
572
573            if (op->base == UD_R_RBP || op->base == UD_R_R13) {
574                if (mod == 0)
575                    op->base = UD_NONE;
576                if (mod == 1)
577                    op->offset = 8;
578                else op->offset = 32;
579            }
580        }
581    }
582
583    /* 32-Bit addressing mode */
584    else if (u->adr_mode == 32) {
585
586        /* get base */
587        op->base = UD_R_EAX + rm;
588
589        /* get offset type */
590        if (mod == 1)
591            op->offset = 8;
592        else if (mod == 2)
593            op->offset = 32;
594        else if (mod == 0 && rm == 5) {
595            op->base = UD_NONE;
596            op->offset = 32;
597        } else  op->offset = 0;
598
599        /* Scale-Index-Base (SIB) */
600        if ((rm & 7) == 4) {
601            inp_next(u);
602
603            op->scale = (1 << SIB_S(inp_curr(u))) & ~1;
604            op->index = UD_R_EAX + (SIB_I(inp_curr(u)) | (REX_X(u->pfx_rex) << 3));
605            op->base  = UD_R_EAX + (SIB_B(inp_curr(u)) | (REX_B(u->pfx_rex) << 3));
606
607            if (op->index == UD_R_ESP) {
608                op->index = UD_NONE;
609                op->scale = UD_NONE;
610            }
611
612            /* special condition for base reference */
613            if (op->base == UD_R_EBP) {
614                if (mod == 0)
615                    op->base = UD_NONE;
616                if (mod == 1)
617                    op->offset = 8;
618                else op->offset = 32;
619            }
620        }
621    }
622
623    /* 16bit addressing mode */
624    else  {
625        switch (rm) {
626            case 0: op->base = UD_R_BX; op->index = UD_R_SI; break;
627            case 1: op->base = UD_R_BX; op->index = UD_R_DI; break;
628            case 2: op->base = UD_R_BP; op->index = UD_R_SI; break;
629            case 3: op->base = UD_R_BP; op->index = UD_R_DI; break;
630            case 4: op->base = UD_R_SI; break;
631            case 5: op->base = UD_R_DI; break;
632            case 6: op->base = UD_R_BP; break;
633            case 7: op->base = UD_R_BX; break;
634        }
635
636        if (mod == 0 && rm == 6) {
637            op->offset= 16;
638            op->base = UD_NONE;
639        }
640        else if (mod == 1)
641            op->offset = 8;
642        else if (mod == 2)
643            op->offset = 16;
644    }
645  }
646
647  /* extract offset, if any */
648  switch(op->offset) {
649    case 8 : op->lval.ubyte  = inp_uint8(u);  break;
650    case 16: op->lval.uword  = inp_uint16(u);  break;
651    case 32: op->lval.udword = inp_uint32(u); break;
652    case 64: op->lval.uqword = inp_uint64(u); break;
653    default: break;
654  }
655
656  /* resolve register encoded in reg field */
657  if (opreg) {
658    opreg->type = UD_OP_REG;
659    opreg->size = resolve_operand_size(u, reg_size);
660    if (reg_type == T_GPR)
661        opreg->base = decode_gpr(u, opreg->size, reg);
662    else opreg->base = resolve_reg(u, reg_type, reg);
663  }
664}
665
666/* -----------------------------------------------------------------------------
667 * decode_o() - Decodes offset
668 * -----------------------------------------------------------------------------
669 */
670static void
671decode_o(struct ud* u, unsigned int s, struct ud_operand *op)
672{
673  switch (u->adr_mode) {
674    case 64:
675        op->offset = 64;
676        op->lval.uqword = inp_uint64(u);
677        break;
678    case 32:
679        op->offset = 32;
680        op->lval.udword = inp_uint32(u);
681        break;
682    case 16:
683        op->offset = 16;
684        op->lval.uword  = inp_uint16(u);
685        break;
686    default:
687        return;
688  }
689  op->type = UD_OP_MEM;
690  op->size = resolve_operand_size(u, s);
691}
692
693/* -----------------------------------------------------------------------------
694 * disasm_operands() - Disassembles Operands.
695 * -----------------------------------------------------------------------------
696 */
697static int disasm_operands(register struct ud* u)
698{
699
700
701  /* mopXt = map entry, operand X, type; */
702  enum ud_operand_code mop1t = u->itab_entry->operand1.type;
703  enum ud_operand_code mop2t = u->itab_entry->operand2.type;
704  enum ud_operand_code mop3t = u->itab_entry->operand3.type;
705
706  /* mopXs = map entry, operand X, size */
707  unsigned int mop1s = u->itab_entry->operand1.size;
708  unsigned int mop2s = u->itab_entry->operand2.size;
709  unsigned int mop3s = u->itab_entry->operand3.size;
710
711  /* iop = instruction operand */
712  register struct ud_operand* iop = u->operand;
713
714  switch(mop1t) {
715
716    case OP_A :
717        decode_a(u, &(iop[0]));
718        break;
719
720    /* M[b] ... */
721    case OP_M :
722        if (MODRM_MOD(inp_peek(u)) == 3)
723            u->error= 1;
724    /* E, G/P/V/I/CL/1/S */
725    case OP_E :
726        if (mop2t == OP_G) {
727            decode_modrm(u, &(iop[0]), mop1s, T_GPR, &(iop[1]), mop2s, T_GPR);
728            if (mop3t == OP_I)
729                decode_imm(u, mop3s, &(iop[2]));
730            else if (mop3t == OP_CL) {
731                iop[2].type = UD_OP_REG;
732                iop[2].base = UD_R_CL;
733                iop[2].size = 8;
734            }
735        }
736        else if (mop2t == OP_P)
737            decode_modrm(u, &(iop[0]), mop1s, T_GPR, &(iop[1]), mop2s, T_MMX);
738        else if (mop2t == OP_V)
739            decode_modrm(u, &(iop[0]), mop1s, T_GPR, &(iop[1]), mop2s, T_XMM);
740        else if (mop2t == OP_S)
741            decode_modrm(u, &(iop[0]), mop1s, T_GPR, &(iop[1]), mop2s, T_SEG);
742        else {
743            decode_modrm(u, &(iop[0]), mop1s, T_GPR, NULL, 0, T_NONE);
744            if (mop2t == OP_CL) {
745                iop[1].type = UD_OP_REG;
746                iop[1].base = UD_R_CL;
747                iop[1].size = 8;
748            } else if (mop2t == OP_I1) {
749                iop[1].type = UD_OP_CONST;
750                u->operand[1].lval.udword = 1;
751            } else if (mop2t == OP_I) {
752                decode_imm(u, mop2s, &(iop[1]));
753            }
754        }
755        break;
756
757    /* G, E/PR[,I]/VR */
758    case OP_G :
759        if (mop2t == OP_M) {
760            if (MODRM_MOD(inp_peek(u)) == 3)
761                u->error= 1;
762            decode_modrm(u, &(iop[1]), mop2s, T_GPR, &(iop[0]), mop1s, T_GPR);
763        } else if (mop2t == OP_E) {
764            decode_modrm(u, &(iop[1]), mop2s, T_GPR, &(iop[0]), mop1s, T_GPR);
765            if (mop3t == OP_I)
766                decode_imm(u, mop3s, &(iop[2]));
767        } else if (mop2t == OP_PR) {
768            decode_modrm(u, &(iop[1]), mop2s, T_MMX, &(iop[0]), mop1s, T_GPR);
769            if (mop3t == OP_I)
770                decode_imm(u, mop3s, &(iop[2]));
771        } else if (mop2t == OP_VR) {
772            if (MODRM_MOD(inp_peek(u)) != 3)
773                u->error = 1;
774            decode_modrm(u, &(iop[1]), mop2s, T_XMM, &(iop[0]), mop1s, T_GPR);
775        } else if (mop2t == OP_W)
776            decode_modrm(u, &(iop[1]), mop2s, T_XMM, &(iop[0]), mop1s, T_GPR);
777        break;
778
779    /* AL..BH, I/O/DX */
780    case OP_AL : case OP_CL : case OP_DL : case OP_BL :
781    case OP_AH : case OP_CH : case OP_DH : case OP_BH :
782
783        iop[0].type = UD_OP_REG;
784        iop[0].base = UD_R_AL + (mop1t - OP_AL);
785        iop[0].size = 8;
786
787        if (mop2t == OP_I)
788            decode_imm(u, mop2s, &(iop[1]));
789        else if (mop2t == OP_DX) {
790            iop[1].type = UD_OP_REG;
791            iop[1].base = UD_R_DX;
792            iop[1].size = 16;
793        }
794        else if (mop2t == OP_O)
795            decode_o(u, mop2s, &(iop[1]));
796        break;
797
798    /* rAX[r8]..rDI[r15], I/rAX..rDI/O */
799    case OP_rAXr8 : case OP_rCXr9 : case OP_rDXr10 : case OP_rBXr11 :
800    case OP_rSPr12: case OP_rBPr13: case OP_rSIr14 : case OP_rDIr15 :
801    case OP_rAX : case OP_rCX : case OP_rDX : case OP_rBX :
802    case OP_rSP : case OP_rBP : case OP_rSI : case OP_rDI :
803
804        iop[0].type = UD_OP_REG;
805        iop[0].base = resolve_gpr64(u, mop1t);
806
807        if (mop2t == OP_I)
808            decode_imm(u, mop2s, &(iop[1]));
809        else if (mop2t >= OP_rAX && mop2t <= OP_rDI) {
810            iop[1].type = UD_OP_REG;
811            iop[1].base = resolve_gpr64(u, mop2t);
812        }
813        else if (mop2t == OP_O) {
814            decode_o(u, mop2s, &(iop[1]));
815            iop[0].size = resolve_operand_size(u, mop2s);
816        }
817        break;
818
819    /* AL[r8b]..BH[r15b], I */
820    case OP_ALr8b : case OP_CLr9b : case OP_DLr10b : case OP_BLr11b :
821    case OP_AHr12b: case OP_CHr13b: case OP_DHr14b : case OP_BHr15b :
822    {
823        ud_type_t gpr = (mop1t - OP_ALr8b) + UD_R_AL +
824                        (REX_B(u->pfx_rex) << 3);
825        if (UD_R_AH <= gpr && u->pfx_rex)
826            gpr = gpr + 4;
827        iop[0].type = UD_OP_REG;
828        iop[0].base = gpr;
829        if (mop2t == OP_I)
830            decode_imm(u, mop2s, &(iop[1]));
831        break;
832    }
833
834    /* eAX..eDX, DX/I */
835    case OP_eAX : case OP_eCX : case OP_eDX : case OP_eBX :
836    case OP_eSP : case OP_eBP : case OP_eSI : case OP_eDI :
837        iop[0].type = UD_OP_REG;
838        iop[0].base = resolve_gpr32(u, mop1t);
839        if (mop2t == OP_DX) {
840            iop[1].type = UD_OP_REG;
841            iop[1].base = UD_R_DX;
842            iop[1].size = 16;
843        } else if (mop2t == OP_I)
844            decode_imm(u, mop2s, &(iop[1]));
845        break;
846
847    /* ES..GS */
848    case OP_ES : case OP_CS : case OP_DS :
849    case OP_SS : case OP_FS : case OP_GS :
850
851        /* in 64bits mode, only fs and gs are allowed */
852        if (u->dis_mode == 64)
853            if (mop1t != OP_FS && mop1t != OP_GS)
854                u->error= 1;
855        iop[0].type = UD_OP_REG;
856        iop[0].base = (mop1t - OP_ES) + UD_R_ES;
857        iop[0].size = 16;
858
859        break;
860
861    /* J */
862    case OP_J :
863        decode_imm(u, mop1s, &(iop[0]));
864        iop[0].type = UD_OP_JIMM;
865        break ;
866
867    /* PR, I */
868    case OP_PR:
869        if (MODRM_MOD(inp_peek(u)) != 3)
870            u->error = 1;
871        decode_modrm(u, &(iop[0]), mop1s, T_MMX, NULL, 0, T_NONE);
872        if (mop2t == OP_I)
873            decode_imm(u, mop2s, &(iop[1]));
874        break;
875
876    /* VR, I */
877    case OP_VR:
878        if (MODRM_MOD(inp_peek(u)) != 3)
879            u->error = 1;
880        decode_modrm(u, &(iop[0]), mop1s, T_XMM, NULL, 0, T_NONE);
881        if (mop2t == OP_I)
882            decode_imm(u, mop2s, &(iop[1]));
883        break;
884
885    /* P, Q[,I]/W/E[,I],VR */
886    case OP_P :
887        if (mop2t == OP_Q) {
888            decode_modrm(u, &(iop[1]), mop2s, T_MMX, &(iop[0]), mop1s, T_MMX);
889            if (mop3t == OP_I)
890                decode_imm(u, mop3s, &(iop[2]));
891        } else if (mop2t == OP_W) {
892            decode_modrm(u, &(iop[1]), mop2s, T_XMM, &(iop[0]), mop1s, T_MMX);
893        } else if (mop2t == OP_VR) {
894            if (MODRM_MOD(inp_peek(u)) != 3)
895                u->error = 1;
896            decode_modrm(u, &(iop[1]), mop2s, T_XMM, &(iop[0]), mop1s, T_MMX);
897        } else if (mop2t == OP_E) {
898            decode_modrm(u, &(iop[1]), mop2s, T_GPR, &(iop[0]), mop1s, T_MMX);
899            if (mop3t == OP_I)
900                decode_imm(u, mop3s, &(iop[2]));
901        }
902        break;
903
904    /* R, C/D */
905    case OP_R :
906        if (mop2t == OP_C)
907            decode_modrm(u, &(iop[0]), mop1s, T_GPR, &(iop[1]), mop2s, T_CRG);
908        else if (mop2t == OP_D)
909            decode_modrm(u, &(iop[0]), mop1s, T_GPR, &(iop[1]), mop2s, T_DBG);
910        break;
911
912    /* C, R */
913    case OP_C :
914        decode_modrm(u, &(iop[1]), mop2s, T_GPR, &(iop[0]), mop1s, T_CRG);
915        break;
916
917    /* D, R */
918    case OP_D :
919        decode_modrm(u, &(iop[1]), mop2s, T_GPR, &(iop[0]), mop1s, T_DBG);
920        break;
921
922    /* Q, P */
923    case OP_Q :
924        decode_modrm(u, &(iop[0]), mop1s, T_MMX, &(iop[1]), mop2s, T_MMX);
925        break;
926
927    /* S, E */
928    case OP_S :
929        decode_modrm(u, &(iop[1]), mop2s, T_GPR, &(iop[0]), mop1s, T_SEG);
930        break;
931
932    /* W, V */
933    case OP_W :
934        decode_modrm(u, &(iop[0]), mop1s, T_XMM, &(iop[1]), mop2s, T_XMM);
935        break;
936
937    /* V, W[,I]/Q/M/E */
938    case OP_V :
939        if (mop2t == OP_W) {
940            /* special cases for movlps and movhps */
941            if (MODRM_MOD(inp_peek(u)) == 3) {
942                if (u->mnemonic == UD_Imovlps)
943                    u->mnemonic = UD_Imovhlps;
944                else
945                if (u->mnemonic == UD_Imovhps)
946                    u->mnemonic = UD_Imovlhps;
947            }
948            decode_modrm(u, &(iop[1]), mop2s, T_XMM, &(iop[0]), mop1s, T_XMM);
949            if (mop3t == OP_I)
950                decode_imm(u, mop3s, &(iop[2]));
951        } else if (mop2t == OP_Q)
952            decode_modrm(u, &(iop[1]), mop2s, T_MMX, &(iop[0]), mop1s, T_XMM);
953        else if (mop2t == OP_M) {
954            if (MODRM_MOD(inp_peek(u)) == 3)
955                u->error= 1;
956            decode_modrm(u, &(iop[1]), mop2s, T_GPR, &(iop[0]), mop1s, T_XMM);
957        } else if (mop2t == OP_E) {
958            decode_modrm(u, &(iop[1]), mop2s, T_GPR, &(iop[0]), mop1s, T_XMM);
959        } else if (mop2t == OP_PR) {
960            decode_modrm(u, &(iop[1]), mop2s, T_MMX, &(iop[0]), mop1s, T_XMM);
961        }
962        break;
963
964    /* DX, eAX/AL */
965    case OP_DX :
966        iop[0].type = UD_OP_REG;
967        iop[0].base = UD_R_DX;
968        iop[0].size = 16;
969
970        if (mop2t == OP_eAX) {
971            iop[1].type = UD_OP_REG;
972            iop[1].base = resolve_gpr32(u, mop2t);
973        } else if (mop2t == OP_AL) {
974            iop[1].type = UD_OP_REG;
975            iop[1].base = UD_R_AL;
976            iop[1].size = 8;
977        }
978
979        break;
980
981    /* I, I/AL/eAX */
982    case OP_I :
983        decode_imm(u, mop1s, &(iop[0]));
984        if (mop2t == OP_I)
985            decode_imm(u, mop2s, &(iop[1]));
986        else if (mop2t == OP_AL) {
987            iop[1].type = UD_OP_REG;
988            iop[1].base = UD_R_AL;
989            iop[1].size = 16;
990        } else if (mop2t == OP_eAX) {
991            iop[1].type = UD_OP_REG;
992            iop[1].base = resolve_gpr32(u, mop2t);
993        }
994        break;
995
996    /* O, AL/eAX */
997    case OP_O :
998        decode_o(u, mop1s, &(iop[0]));
999        iop[1].type = UD_OP_REG;
1000        iop[1].size = resolve_operand_size(u, mop1s);
1001        if (mop2t == OP_AL)
1002            iop[1].base = UD_R_AL;
1003        else if (mop2t == OP_eAX)
1004            iop[1].base = resolve_gpr32(u, mop2t);
1005        else if (mop2t == OP_rAX)
1006            iop[1].base = resolve_gpr64(u, mop2t);
1007        break;
1008
1009    /* 3 */
1010    case OP_I3 :
1011        iop[0].type = UD_OP_CONST;
1012        iop[0].lval.sbyte = 3;
1013        break;
1014
1015    /* ST(n), ST(n) */
1016    case OP_ST0 : case OP_ST1 : case OP_ST2 : case OP_ST3 :
1017    case OP_ST4 : case OP_ST5 : case OP_ST6 : case OP_ST7 :
1018
1019        iop[0].type = UD_OP_REG;
1020        iop[0].base = (mop1t-OP_ST0) + UD_R_ST0;
1021        iop[0].size = 0;
1022
1023        if (mop2t >= OP_ST0 && mop2t <= OP_ST7) {
1024            iop[1].type = UD_OP_REG;
1025            iop[1].base = (mop2t-OP_ST0) + UD_R_ST0;
1026            iop[1].size = 0;
1027        }
1028        break;
1029
1030    /* AX */
1031    case OP_AX:
1032        iop[0].type = UD_OP_REG;
1033        iop[0].base = UD_R_AX;
1034        iop[0].size = 16;
1035        break;
1036
1037    /* none */
1038    default :
1039        iop[0].type = iop[1].type = iop[2].type = UD_NONE;
1040  }
1041
1042  return 0;
1043}
1044
1045/* -----------------------------------------------------------------------------
1046 * clear_insn() - clear instruction pointer
1047 * -----------------------------------------------------------------------------
1048 */
1049static int clear_insn(register struct ud* u)
1050{
1051  u->error     = 0;
1052  u->pfx_seg   = 0;
1053  u->pfx_opr   = 0;
1054  u->pfx_adr   = 0;
1055  u->pfx_lock  = 0;
1056  u->pfx_repne = 0;
1057  u->pfx_rep   = 0;
1058  u->pfx_repe  = 0;
1059  u->pfx_seg   = 0;
1060  u->pfx_rex   = 0;
1061  u->pfx_insn  = 0;
1062  u->mnemonic  = UD_Inone;
1063  u->itab_entry = NULL;
1064
1065  memset( &u->operand[ 0 ], 0, sizeof( struct ud_operand ) );
1066  memset( &u->operand[ 1 ], 0, sizeof( struct ud_operand ) );
1067  memset( &u->operand[ 2 ], 0, sizeof( struct ud_operand ) );
1068
1069  return 0;
1070}
1071
1072static int do_mode( struct ud* u )
1073{
1074  /* if in error state, bail out */
1075  if ( u->error ) return -1;
1076
1077  /* propagate perfix effects */
1078  if ( u->dis_mode == 64 ) {  /* set 64bit-mode flags */
1079
1080    /* Check validity of  instruction m64 */
1081    if ( P_INV64( u->itab_entry->prefix ) ) {
1082        u->error = 1;
1083        return -1;
1084    }
1085
1086    /* effective rex prefix is the  effective mask for the
1087     * instruction hard-coded in the opcode map.
1088     */
1089    u->pfx_rex = ( u->pfx_rex & 0x40 ) |
1090                 ( u->pfx_rex & REX_PFX_MASK( u->itab_entry->prefix ) );
1091
1092    /* whether this instruction has a default operand size of
1093     * 64bit, also hardcoded into the opcode map.
1094     */
1095    u->default64 = P_DEF64( u->itab_entry->prefix );
1096    /* calculate effective operand size */
1097    if ( REX_W( u->pfx_rex ) ) {
1098        u->opr_mode = 64;
1099    } else if ( u->pfx_opr ) {
1100        u->opr_mode = 16;
1101    } else {
1102        /* unless the default opr size of instruction is 64,
1103         * the effective operand size in the absence of rex.w
1104         * prefix is 32.
1105         */
1106        u->opr_mode = ( u->default64 ) ? 64 : 32;
1107    }
1108
1109    /* calculate effective address size */
1110    u->adr_mode = (u->pfx_adr) ? 32 : 64;
1111  } else if ( u->dis_mode == 32 ) { /* set 32bit-mode flags */
1112    u->opr_mode = ( u->pfx_opr ) ? 16 : 32;
1113    u->adr_mode = ( u->pfx_adr ) ? 16 : 32;
1114  } else if ( u->dis_mode == 16 ) { /* set 16bit-mode flags */
1115    u->opr_mode = ( u->pfx_opr ) ? 32 : 16;
1116    u->adr_mode = ( u->pfx_adr ) ? 32 : 16;
1117  }
1118
1119  /* These flags determine which operand to apply the operand size
1120   * cast to.
1121   */
1122  u->c1 = ( P_C1( u->itab_entry->prefix ) ) ? 1 : 0;
1123  u->c2 = ( P_C2( u->itab_entry->prefix ) ) ? 1 : 0;
1124  u->c3 = ( P_C3( u->itab_entry->prefix ) ) ? 1 : 0;
1125
1126  /* set flags for implicit addressing */
1127  u->implicit_addr = P_IMPADDR( u->itab_entry->prefix );
1128
1129  return 0;
1130}
1131
1132static int gen_hex( struct ud *u )
1133{
1134  unsigned int i;
1135  unsigned char *src_ptr = inp_sess( u );
1136  char* src_hex;
1137
1138  /* bail out if in error stat. */
1139  if ( u->error ) return -1;
1140  /* output buffer pointe */
1141  src_hex = ( char* ) u->insn_hexcode;
1142  /* for each byte used to decode instruction */
1143  for ( i = 0; i < u->inp_ctr; ++i, ++src_ptr) {
1144    sprintf( src_hex, "%02x", *src_ptr & 0xFF );
1145    src_hex += 2;
1146  }
1147  return 0;
1148}
1149
1150/* =============================================================================
1151 * ud_decode() - Instruction decoder. Returns the number of bytes decoded.
1152 * =============================================================================
1153 */
1154unsigned int ud_decode( struct ud* u )
1155{
1156  inp_start(u);
1157
1158  if ( clear_insn( u ) ) {
1159    ; /* error */
1160  } else if ( get_prefixes( u ) != 0 ) {
1161    ; /* error */
1162  } else if ( search_itab( u ) != 0 ) {
1163    ; /* error */
1164  } else if ( do_mode( u ) != 0 ) {
1165    ; /* error */
1166  } else if ( disasm_operands( u ) != 0 ) {
1167    ; /* error */
1168  } else if ( resolve_mnemonic( u ) != 0 ) {
1169    ; /* error */
1170  }
1171
1172  /* Handle decode error. */
1173  if ( u->error ) {
1174    /* clear out the decode data. */
1175    clear_insn( u );
1176    /* mark the sequence of bytes as invalid. */
1177    u->itab_entry = & ie_invalid;
1178    u->mnemonic = u->itab_entry->mnemonic;
1179  }
1180
1181  u->insn_offset = u->pc; /* set offset of instruction */
1182  u->insn_fill = 0;   /* set translation buffer index to 0 */
1183  u->pc += u->inp_ctr;    /* move program counter by bytes decoded */
1184  gen_hex( u );       /* generate hex code */
1185
1186  /* return number of bytes disassembled. */
1187  return u->inp_ctr;
1188}
1189
1190/* vim:cindent
1191 * vim:ts=4
1192 * vim:sw=4
1193 * vim:expandtab
1194 */
1195