1
2%include "Tools.inc"
3
4segment_code
5
6;
7; void  Adapt ( short* pM, const short* pAdapt, int nDirection, int nOrder )
8;
9;   [esp+16]    nOrder
10;   [esp+12]    nDirection
11;   [esp+ 8]    pAdapt
12;   [esp+ 4]    pM
13;   [esp+ 0]    Return Address
14
15            align 16
16            nop
17            nop
18            nop
19            nop
20            nop
21            nop
22            nop
23            nop
24            nop
25            nop
26proc        Adapt
27
28            mov  eax, [esp +  4]                ; pM
29            mov  ecx, [esp +  8]                ; pAdapt
30            mov  edx, [esp + 16]                ; nOrder
31            shr  edx, 4
32
33            cmp  dword [esp + 12], byte 0       ; nDirection
34            jle  short AdaptSub
35
36AdaptAddLoop:
37            movq  mm0, [eax]
38            paddw mm0, [ecx]
39            movq  [eax], mm0
40            movq  mm1, [eax + 8]
41            paddw mm1, [ecx + 8]
42            movq  [eax + 8], mm1
43            movq  mm2, [eax + 16]
44            paddw mm2, [ecx + 16]
45            movq  [eax + 16], mm2
46            movq  mm3, [eax + 24]
47            paddw mm3, [ecx + 24]
48            movq  [eax + 24], mm3
49            add   eax, byte 32
50            add   ecx, byte 32
51            dec   edx
52            jnz   AdaptAddLoop
53
54            emms
55            ret
56
57            align 16
58            nop
59            nop
60            nop
61            nop
62            nop
63            nop
64            nop
65            nop
66            nop
67            nop
68            nop
69            nop
70            nop
71            nop
72
73AdaptSub:   je    short AdaptDone
74
75AdaptSubLoop:
76            movq  mm0, [eax]
77            psubw mm0, [ecx]
78            movq  [eax], mm0
79            movq  mm1, [eax + 8]
80            psubw mm1, [ecx + 8]
81            movq  [eax + 8], mm1
82            movq  mm2, [eax + 16]
83            psubw mm2, [ecx + 16]
84            movq  [eax + 16], mm2
85            movq  mm3, [eax + 24]
86            psubw mm3, [ecx + 24]
87            movq  [eax + 24], mm3
88            add   eax, byte 32
89            add   ecx, byte 32
90            dec   edx
91            jnz   AdaptSubLoop
92
93            emms
94AdaptDone:
95
96endproc
97
98;
99; int  CalculateDotProduct ( const short* pA, const short* pB, int nOrder )
100;
101;   [esp+12]    nOrder
102;   [esp+ 8]    pB
103;   [esp+ 4]    pA
104;   [esp+ 0]    Return Address
105
106            align   16
107            nop
108            nop
109            nop
110            nop
111            nop
112            nop
113            nop
114            nop
115            nop
116            nop
117            nop
118            nop
119            nop
120            nop
121
122proc        CalculateDotProduct
123
124            mov     eax, [esp +  4]             ; pA
125            mov     ecx, [esp +  8]             ; pB
126            mov     edx, [esp + 12]             ; nOrder
127            shr     edx, 4
128            pxor    mm7, mm7
129
130loopDot:    movq    mm0, [eax]
131            pmaddwd mm0, [ecx]
132            paddd   mm7, mm0
133            movq    mm1, [eax +  8]
134            pmaddwd mm1, [ecx +  8]
135            paddd   mm7, mm1
136            movq    mm2, [eax + 16]
137            pmaddwd mm2, [ecx + 16]
138            paddd   mm7, mm2
139            movq    mm3, [eax + 24]
140            pmaddwd mm3, [ecx + 24]
141            add     eax, byte 32
142            add     ecx, byte 32
143            paddd   mm7, mm3
144            dec     edx
145            jnz     loopDot
146
147            movq    mm6, mm7
148            psrlq   mm7, 32
149            paddd   mm6, mm7
150            movd    [esp + 4], mm6
151            emms
152            mov     eax, [esp + 4]
153endproc
154
155
156;
157; BOOL GetMMXAvailable ( void );
158;
159
160proc        GetMMXAvailable
161            pushad
162            pushfd
163            pop     eax
164            mov     ecx, eax
165            xor     eax, 0x200000
166            push    eax
167            popfd
168            pushfd
169            pop     eax
170            cmp     eax, ecx
171            jz      short return        ; no CPUID command, so no MMX
172
173            mov     eax,1
174            CPUID
175            test    edx,0x800000
176return:     popad
177            setnz   al
178            and     eax, byte 1
179endproc
180
181            end
182