1139823Simp;uInt longest_match_x64( 21541Srgrimes; deflate_state *s, 31541Srgrimes; IPos cur_match); /* current match */ 41541Srgrimes 51541Srgrimes; gvmat64.asm -- Asm portion of the optimized longest_match for 32 bits x86 61541Srgrimes; Copyright (C) 1995-2005 Jean-loup Gailly, Brian Raiter and Gilles Vollant. 71541Srgrimes; 81541Srgrimes; File written by Gilles Vollant, by converting to assembly the longest_match 91541Srgrimes; from Jean-loup Gailly in deflate.c of zLib and infoZip zip. 101541Srgrimes; 111541Srgrimes; and by taking inspiration on asm686 with masm, optimised assembly code 121541Srgrimes; from Brian Raiter, written 1998 131541Srgrimes; 141541Srgrimes; http://www.zlib.net 151541Srgrimes; http://www.winimage.com/zLibDll 161541Srgrimes; http://www.muppetlabs.com/~breadbox/software/assembly.html 171541Srgrimes; 181541Srgrimes; to compile this file for infozip Zip, I use option: 191541Srgrimes; ml64.exe /Flgvmat64 /c /Zi /DINFOZIP gvmat64.asm 201541Srgrimes; 211541Srgrimes; to compile this file for zLib, I use option: 221541Srgrimes; ml64.exe /Flgvmat64 /c /Zi gvmat64.asm 231541Srgrimes; Be carrefull to adapt zlib1222add below to your version of zLib 241541Srgrimes; (if you use a version of zLib before 1.0.4 or after 1.2.2.2, change 251541Srgrimes; value of zlib1222add later) 261541Srgrimes; 271541Srgrimes; This file compile with Microsoft Macro Assembler (x64) for AMD64 281541Srgrimes; 291541Srgrimes; ml64.exe is given with Visual Studio 2005 and Windows 2003 server DDK 301541Srgrimes; 311541Srgrimes; (you can get Windows 2003 server DDK with ml64 and cl for AMD64 from 321541Srgrimes; http://www.microsoft.com/whdc/devtools/ddk/default.mspx for low price) 331541Srgrimes; 341541Srgrimes 351541Srgrimes 362531Swollman;uInt longest_match(s, cur_match) 372531Swollman; deflate_state *s; 382531Swollman; IPos cur_match; /* current match */ 392531Swollman.code 402531Swollmanlongest_match PROC 419209Swollman 4214622Sfenner 432531Swollman;LocalVarsSize equ 88 4414622Sfenner LocalVarsSize equ 72 452531Swollman 461541Srgrimes; register used : rax,rbx,rcx,rdx,rsi,rdi,r8,r9,r10,r11,r12 47172467Ssilby; free register : r14,r15 48172467Ssilby; register can be saved : rsp 49172467Ssilby 50101091Srwatson chainlenwmask equ rsp + 8 - LocalVarsSize ; high word: current chain len 51101091Srwatson ; low word: s->wmask 521541Srgrimes;window equ rsp + xx - LocalVarsSize ; local copy of s->window ; stored in r10 531549Srgrimes;windowbestlen equ rsp + xx - LocalVarsSize ; s->window + bestlen , use r10+r11 5429024Sbde;scanstart equ rsp + xx - LocalVarsSize ; first two bytes of string ; stored in r12w 551541Srgrimes;scanend equ rsp + xx - LocalVarsSize ; last two bytes of string use ebx 561541Srgrimes;scanalign equ rsp + xx - LocalVarsSize ; dword-misalignment of string r13 571541Srgrimes;bestlen equ rsp + xx - LocalVarsSize ; size of best match so far -> r11d 5812296Sphk;scan equ rsp + xx - LocalVarsSize ; ptr to string wanting match -> r9 596472SwollmanIFDEF INFOZIP 60181803SbzELSE 611541Srgrimes nicematch equ (rsp + 16 - LocalVarsSize) ; a good enough match size 621541SrgrimesENDIF 631541Srgrimes 641541Srgrimessave_rdi equ rsp + 24 - LocalVarsSize 651541Srgrimessave_rsi equ rsp + 32 - LocalVarsSize 661541Srgrimessave_rbx equ rsp + 40 - LocalVarsSize 671541Srgrimessave_rbp equ rsp + 48 - LocalVarsSize 681541Srgrimessave_r12 equ rsp + 56 - LocalVarsSize 691541Srgrimessave_r13 equ rsp + 64 - LocalVarsSize 70152592Sandre;save_r14 equ rsp + 72 - LocalVarsSize 711541Srgrimes;save_r15 equ rsp + 80 - LocalVarsSize 721541Srgrimes 731541Srgrimes 7460105Sjlemon 7560105Sjlemon; all the +4 offsets are due to the addition of pending_buf_size (in zlib 76163606Srwatson; in the deflate_state structure since the asm code was first written 77163606Srwatson; (if you compile with zlib 1.0.4 or older, remove the +4). 7842776Sfenner; Note : these value are good with a 8 bytes boundary pack structure 7930309Sphk 80119181Srwatson 81119181Srwatson MAX_MATCH equ 258 8212579Sbde MIN_MATCH equ 3 83185088Szec MIN_LOOKAHEAD equ (MAX_MATCH+MIN_MATCH+1) 8412704Sphk 85185088Szec 862531Swollman;;; Offsets for fields in the deflate_state structure. These numbers 87183550Szec;;; are calculated from the definition of deflate_state, with the 88183550Szec;;; assumption that the compiler will dword-align the fields. (Thus, 8912296Sphk;;; changing the definition of deflate_state could easily cause this 90130333Srwatson;;; program to crash horribly, without so much as a warning at 91164863Srwatson;;; compile time. Sigh.) 92164863Srwatson 93164863Srwatson; all the +zlib1222add offsets are due to the addition of fields 94164863Srwatson; in zlib in the deflate_state structure since the asm code was first written 95164863Srwatson; (if you compile with zlib 1.0.4 or older, use "zlib1222add equ (-4)"). 96130333Srwatson; (if you compile with zlib between 1.0.5 and 1.2.2.1, use "zlib1222add equ 0"). 97185088Szec; if you compile with zlib 1.2.2.2 or later , use "zlib1222add equ 8"). 98185088Szec 99185088Szec 100130333SrwatsonIFDEF INFOZIP 1019209Swollman 102130333Srwatson_DATA SEGMENT 103130333SrwatsonCOMM window_size:DWORD 104130333Srwatson; WMask ; 7fff 105130333SrwatsonCOMM window:BYTE:010040H 106130333SrwatsonCOMM prev:WORD:08000H 107130333Srwatson; MatchLen : unused 1081541Srgrimes; PrevMatch : unused 10914622SfennerCOMM strstart:DWORD 110130333SrwatsonCOMM match_start:DWORD 11114622Sfenner; Lookahead : ignore 112119181SrwatsonCOMM prev_length:DWORD ; PrevLen 1131541SrgrimesCOMM max_chain_length:DWORD 114119180SrwatsonCOMM good_match:DWORD 115119180SrwatsonCOMM nice_match:DWORD 116119180Srwatsonprev_ad equ OFFSET prev 117119180Srwatsonwindow_ad equ OFFSET window 118119180Srwatsonnicematch equ nice_match 119119180Srwatson_DATA ENDS 1201541SrgrimesWMask equ 07fffh 121119181Srwatson 1221541SrgrimesELSE 123183550Szec 12414622Sfenner IFNDEF zlib1222add 12514622Sfenner zlib1222add equ 8 1261541Srgrimes ENDIF 1271541SrgrimesdsWSize equ 56+zlib1222add+(zlib1222add/2) 1281541SrgrimesdsWMask equ 64+zlib1222add+(zlib1222add/2) 1291541SrgrimesdsWindow equ 72+zlib1222add 13014622SfennerdsPrev equ 88+zlib1222add 1319209SwollmandsMatchLen equ 128+zlib1222add 1329209SwollmandsPrevMatch equ 132+zlib1222add 1339209SwollmandsStrStart equ 140+zlib1222add 13414622SfennerdsMatchStart equ 144+zlib1222add 135164863SrwatsondsLookahead equ 148+zlib1222add 13614622SfennerdsPrevLen equ 152+zlib1222add 137111119SimpdsMaxChainLen equ 156+zlib1222add 13814622SfennerdsGoodMatch equ 172+zlib1222add 13914622SfennerdsNiceMatch equ 176+zlib1222add 14014622Sfenner 14114622Sfennerwindow_size equ [ rcx + dsWSize] 14214622SfennerWMask equ [ rcx + dsWMask] 14314622Sfennerwindow_ad equ [ rcx + dsWindow] 14414622Sfennerprev_ad equ [ rcx + dsPrev] 14514622Sfennerstrstart equ [ rcx + dsStrStart] 146130333Srwatsonmatch_start equ [ rcx + dsMatchStart] 147181803SbzLookahead equ [ rcx + dsLookahead] ; 0ffffffffh on infozip 1481541Srgrimesprev_length equ [ rcx + dsPrevLen] 1491541Srgrimesmax_chain_length equ [ rcx + dsMaxChainLen] 15012704Sphkgood_match equ [ rcx + dsGoodMatch] 151119181Srwatsonnice_match equ [ rcx + dsNiceMatch] 1522531SwollmanENDIF 153183550Szec 154119180Srwatson; parameter 1 in r8(deflate state s), param 2 in rdx (cur match) 1552531Swollman 156130333Srwatson; see http://weblogs.asp.net/oldnewthing/archive/2004/01/14/58579.aspx and 157119180Srwatson; http://msdn.microsoft.com/library/en-us/kmarch/hh/kmarch/64bitAMD_8e951dd2-ee77-4728-8702-55ce4b5dd24a.xml.asp 158181803Sbz; 159119180Srwatson; All registers must be preserved across the call, except for 160119180Srwatson; rax, rcx, rdx, r8, r9, r10, and r11, which are scratch. 161119180Srwatson 162164863Srwatson 163119181Srwatson 164119181Srwatson;;; Save registers that the compiler may be using, and adjust esp to 165184205Sdes;;; make room for our stack frame. 166144163Ssam 167164863Srwatson 168164863Srwatson;;; Retrieve the function arguments. r8d will hold cur_match 169144163Ssam;;; throughout the entire function. edx will hold the pointer to the 170119181Srwatson;;; deflate_state structure during the function's setup (before 171119181Srwatson;;; entering the main loop. 172119181Srwatson 173181803Sbz; parameter 1 in rcx (deflate_state* s), param 2 in edx -> r8 (cur match) 174119180Srwatson 175164863Srwatson; this clear high 32 bits of r8, which can be garbage in both r8 and rdx 1762531Swollman 1772531Swollman mov [save_rdi],rdi 1781541Srgrimes mov [save_rsi],rsi 179119181Srwatson mov [save_rbx],rbx 1801541Srgrimes mov [save_rbp],rbp 181107113SluigiIFDEF INFOZIP 182107113Sluigi mov r8d,ecx 183107113SluigiELSE 184107113Sluigi mov r8d,edx 185107113SluigiENDIF 186107113Sluigi mov [save_r12],r12 187107113Sluigi mov [save_r13],r13 188107113Sluigi; mov [save_r14],r14 1891541Srgrimes; mov [save_r15],r15 1902531Swollman 1918546Sdg 192183550Szec;;; uInt wmask = s->w_mask; 1931541Srgrimes;;; unsigned chain_length = s->max_chain_length; 194181803Sbz;;; if (s->prev_length >= s->good_match) { 1951541Srgrimes;;; chain_length >>= 2; 1961541Srgrimes;;; } 1971541Srgrimes 1981541Srgrimes mov edi, prev_length 1991541Srgrimes mov esi, good_match 200164863Srwatson mov eax, WMask 2011541Srgrimes mov ebx, max_chain_length 2021541Srgrimes cmp edi, esi 203181803Sbz jl LastMatchGood 2041541Srgrimes shr ebx, 2 2051541SrgrimesLastMatchGood: 2061541Srgrimes 2071541Srgrimes;;; chainlen is decremented once beforehand so that the function can 2081541Srgrimes;;; use the sign flag instead of the zero flag for the exit test. 2091541Srgrimes;;; It is then shifted into the high word, to make room for the wmask 210181803Sbz;;; value, which it will always accompany. 2111541Srgrimes 2121541Srgrimes dec ebx 2131541Srgrimes shl ebx, 16 2141541Srgrimes or ebx, eax 215164863Srwatson 2161541Srgrimes;;; on zlib only 2171541Srgrimes;;; if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead; 2181541Srgrimes 2191541SrgrimesIFDEF INFOZIP 2201541Srgrimes mov [chainlenwmask], ebx 221181803Sbz; on infozip nice_match = [nice_match] 2221541SrgrimesELSE 2231541Srgrimes mov eax, nice_match 2241541Srgrimes mov [chainlenwmask], ebx 2251541Srgrimes mov r10d, Lookahead 2261541Srgrimes cmp r10d, eax 2272531Swollman cmovnl r10d, eax 2281541Srgrimes mov [nicematch],r10d 2298546SdgENDIF 23041702Sdillon 23141702Sdillon;;; register Bytef *scan = s->window + s->strstart; 2321541Srgrimes mov r10, window_ad 23314622Sfenner mov ebp, strstart 23414622Sfenner lea r13, [r10 + rbp] 23514622Sfenner 23614622Sfenner;;; Determine how many bytes the scan ptr is off from being 237164863Srwatson;;; dword-aligned. 238164863Srwatson 23914622Sfenner mov r9,r13 240164863Srwatson neg r13 241164863Srwatson and r13,3 242164863Srwatson 24314622Sfenner;;; IPos limit = s->strstart > (IPos)MAX_DIST(s) ? 2441541Srgrimes;;; s->strstart - (IPos)MAX_DIST(s) : NIL; 24514622SfennerIFDEF INFOZIP 246181803Sbz mov eax,07efah ; MAX_DIST = (WSIZE-MIN_LOOKAHEAD) (0x8000-(3+8+1)) 2471541SrgrimesELSE 2488090Spst mov eax, window_size 2491541Srgrimes sub eax, MIN_LOOKAHEAD 2501541SrgrimesENDIF 2512531Swollman xor edi,edi 25214622Sfenner sub ebp, eax 25314622Sfenner 254164863Srwatson mov r11d, prev_length 255164863Srwatson 25614622Sfenner cmovng ebp,edi 2574028Spst 258130333Srwatson;;; int best_len = s->prev_length; 259130333Srwatson 260144163Ssam 261144163Ssam;;; Store the sum of s->window + best_len in esi locally, and in esi. 262144163Ssam 263144163Ssam lea rsi,[r10+r11] 264144163Ssam 26514622Sfenner;;; register ush scan_start = *(ushf*)scan; 26614622Sfenner;;; register ush scan_end = *(ushf*)(scan+best_len-1); 267130333Srwatson;;; Posf *prev = s->prev; 2684028Spst 26914622Sfenner movzx r12d,word ptr [r9] 2704028Spst movzx ebx, word ptr [r9 + r11 - 1] 27114622Sfenner 27214622Sfenner mov rdi, prev_ad 273181803Sbz 2742531Swollman;;; Jump into the main loop. 2752531Swollman 2762531Swollman mov edx, [chainlenwmask] 27714622Sfenner 2782531Swollman cmp bx,word ptr [rsi + r8 - 1] 27914622Sfenner jz LookupLoopIsZero 2802531Swollman 28114622SfennerLookupLoop1: 28214622Sfenner and r8d, edx 28314622Sfenner 284181803Sbz movzx r8d, word ptr [rdi + r8*2] 28514622Sfenner cmp r8d, ebp 28614622Sfenner jbe LeaveNow 28714622Sfenner sub edx, 00010000h 28814622Sfenner js LeaveNow 2892531Swollman 29014622SfennerLoopEntry1: 291164863Srwatson cmp bx,word ptr [rsi + r8 - 1] 292164863Srwatson jz LookupLoopIsZero 293164863Srwatson 294164863SrwatsonLookupLoop2: 295164863Srwatson and r8d, edx 296164863Srwatson 297164863Srwatson movzx r8d, word ptr [rdi + r8*2] 298164863Srwatson cmp r8d, ebp 29914622Sfenner jbe LeaveNow 300148682Srwatson sub edx, 00010000h 30114622Sfenner js LeaveNow 30214622Sfenner 30314622SfennerLoopEntry2: 30414622Sfenner cmp bx,word ptr [rsi + r8 - 1] 30514622Sfenner jz LookupLoopIsZero 30614622Sfenner 30714622SfennerLookupLoop4: 30814622Sfenner and r8d, edx 30914622Sfenner 31014622Sfenner movzx r8d, word ptr [rdi + r8*2] 3112531Swollman cmp r8d, ebp 3122531Swollman jbe LeaveNow 3131541Srgrimes sub edx, 00010000h 3141541Srgrimes js LeaveNow 3151541Srgrimes 316148682SrwatsonLoopEntry4: 3171541Srgrimes 3181541Srgrimes cmp bx,word ptr [rsi + r8 - 1] 31914622Sfenner jnz LookupLoop1 32014622Sfenner jmp LookupLoopIsZero 3219209Swollman 32214622Sfenner 323164863Srwatson;;; do { 324164863Srwatson;;; match = s->window + cur_match; 325164863Srwatson;;; if (*(ushf*)(match+best_len-1) != scan_end || 3269209Swollman;;; *(ushf*)match != scan_start) continue; 32714622Sfenner;;; [...] 328164863Srwatson;;; } while ((cur_match = prev[cur_match & wmask]) > limit 329164863Srwatson;;; && --chain_length != 0); 33014622Sfenner;;; 33114622Sfenner;;; Here is the inner loop of the function. The function will spend the 332181803Sbz;;; majority of its time in this loop, and majority of that time will 3331541Srgrimes;;; be spent in the first ten instructions. 3348090Spst;;; 3351541Srgrimes;;; Within this loop: 3361541Srgrimes;;; ebx = scanend 33714622Sfenner;;; r8d = curmatch 338181803Sbz;;; edx = chainlenwmask - i.e., ((chainlen << 16) | wmask) 3391541Srgrimes;;; esi = windowbestlen - i.e., (window + bestlen) 3401541Srgrimes;;; edi = prev 3411541Srgrimes;;; ebp = limit 3421541Srgrimes 3431541SrgrimesLookupLoop: 3441541Srgrimes and r8d, edx 3451541Srgrimes 3461541Srgrimes movzx r8d, word ptr [rdi + r8*2] 34796432Sdd cmp r8d, ebp 3481541Srgrimes jbe LeaveNow 3491541Srgrimes sub edx, 00010000h 3501541Srgrimes js LeaveNow 3511541Srgrimes 352164863SrwatsonLoopEntry: 353164863Srwatson 354164863Srwatson cmp bx,word ptr [rsi + r8 - 1] 355164863Srwatson jnz LookupLoop1 3561541SrgrimesLookupLoopIsZero: 3571541Srgrimes cmp r12w, word ptr [r10 + r8] 358164863Srwatson jnz LookupLoop1 359164863Srwatson 3601541Srgrimes 361148682Srwatson;;; Store the current value of chainlen. 3621541Srgrimes mov [chainlenwmask], edx 3632531Swollman 36414622Sfenner;;; Point edi to the string under scrutiny, and esi to the string we 365181803Sbz;;; are hoping to match it up with. In actuality, esi and edi are 36614622Sfenner;;; both pointed (MAX_MATCH_8 - scanalign) bytes ahead, and edx is 3672531Swollman;;; initialized to -(MAX_MATCH_8 - scanalign). 368148682Srwatson 3691541Srgrimes lea rsi,[r8+r10] 3701541Srgrimes mov rdx, 0fffffffffffffef8h; -(MAX_MATCH_8) 3711541Srgrimes lea rsi, [rsi + r13 + 0108h] ;MAX_MATCH_8] 3721541Srgrimes lea rdi, [r9 + r13 + 0108h] ;MAX_MATCH_8] 373164863Srwatson 374164863Srwatson prefetcht1 [rsi+rdx] 3751541Srgrimes prefetcht1 [rdi+rdx] 37682890Sjulian 3771541Srgrimes 3781541Srgrimes;;; Test the strings for equality, 8 bytes at a time. At the end, 3791541Srgrimes;;; adjust rdx so that it is offset to the exact byte that mismatched. 380119181Srwatson;;; 3811541Srgrimes;;; We already know at this point that the first three bytes of the 3821541Srgrimes;;; strings match each other, and they can be safely passed over before 383148682Srwatson;;; starting the compare loop. So what this code does is skip over 0-3 384148682Srwatson;;; bytes, as much as necessary in order to dword-align the edi 38514622Sfenner;;; pointer. (rsi will still be misaligned three times out of four.) 38614622Sfenner;;; 3871541Srgrimes;;; It should be confessed that this loop usually does not represent 38814622Sfenner;;; much of the total running time. Replacing it with a more 38914622Sfenner;;; straightforward "rep cmpsb" would not drastically degrade 390130333Srwatson;;; performance. 39114622Sfenner 392130333Srwatson 393144163SsamLoopCmps: 394144163Ssam mov rax, [rsi + rdx] 395144163Ssam xor rax, [rdi + rdx] 3962531Swollman jnz LeaveLoopCmps 397144163Ssam 398144163Ssam mov rax, [rsi + rdx + 8] 399144163Ssam xor rax, [rdi + rdx + 8] 400144163Ssam jnz LeaveLoopCmps8 4011541Srgrimes 4021541Srgrimes 4031541Srgrimes mov rax, [rsi + rdx + 8+8] 4041541Srgrimes xor rax, [rdi + rdx + 8+8] 405119181Srwatson jnz LeaveLoopCmps16 4061541Srgrimes 407119181Srwatson add rdx,8+8+8 408148682Srwatson 409148682Srwatson jmp short LoopCmps 41014622SfennerLeaveLoopCmps16: add rdx,8 41114622SfennerLeaveLoopCmps8: add rdx,8 41214622SfennerLeaveLoopCmps: 41314622Sfenner 41414622Sfenner test eax, 0000FFFFh 4151541Srgrimes jnz LenLower 4161541Srgrimes 4171541Srgrimes test eax,0ffffffffh 418119181Srwatson 4191541Srgrimes jnz LenLower32 420183550Szec 421107113Sluigi add rdx,4 4221541Srgrimes shr rax,32 4231541Srgrimes or ax,ax 4241541Srgrimes jnz LenLower 425164863Srwatson 426164863SrwatsonLenLower32: 4271541Srgrimes shr eax,16 4289209Swollman add rdx,2 4291541SrgrimesLenLower: sub al, 1 4301541Srgrimes adc rdx, 0 4311541Srgrimes;;; Calculate the length of the match. If it is longer than MAX_MATCH, 432148682Srwatson;;; then automatically accept it as the best possible match and leave. 4331541Srgrimes 434183550Szec lea rax, [rdi + rdx] 435183550Szec sub rax, r9 436183550Szec cmp eax, MAX_MATCH 437183550Szec jge LenMaximum 438183550Szec 439183550Szec;;; If the length of the match is not longer than the best match we 440183550Szec;;; have so far, then forget it and return to the lookup loop. 441183550Szec;/////////////////////////////////// 442183550Szec 443183550Szec cmp eax, r11d 444183550Szec jg LongerMatch 445183550Szec 446183550Szec lea rsi,[r10+r11] 447183550Szec 448183550Szec mov rdi, prev_ad 4491541Srgrimes mov edx, [chainlenwmask] 450183550Szec jmp LookupLoop 4511541Srgrimes 452183550Szec;;; s->match_start = cur_match; 453148682Srwatson;;; best_len = len; 4541541Srgrimes;;; if (len >= nice_match) break; 4551541Srgrimes;;; scan_end = *(ushf*)(scan+best_len-1); 4562531Swollman 457119181SrwatsonLongerMatch: 4582531Swollman mov r11d, eax 459183550Szec mov match_start, r8d 460119180Srwatson cmp eax, [nicematch] 4612531Swollman jge LeaveNow 462119180Srwatson 463130333Srwatson lea rsi,[r10+rax] 464183550Szec 465183550Szec movzx ebx, word ptr [r9 + rax - 1] 466183550Szec mov rdi, prev_ad 467183550Szec mov edx, [chainlenwmask] 468183550Szec jmp LookupLoop 469183550Szec 470183550Szec;;; Accept the current string, with the maximum possible length. 471183550Szec 472183550SzecLenMaximum: 473183550Szec mov r11d,MAX_MATCH 4742531Swollman mov match_start, r8d 475183550Szec 4762531Swollman;;; if ((uInt)best_len <= s->lookahead) return (uInt)best_len; 477183550Szec;;; return s->lookahead; 478130333Srwatson 479119180SrwatsonLeaveNow: 4802531SwollmanIFDEF INFOZIP 4812531Swollman mov eax,r11d 4821541SrgrimesELSE 483119181Srwatson mov eax, Lookahead 4841541Srgrimes cmp r11d, eax 485183550Szec cmovng eax, r11d 486183550SzecENDIF 487119181Srwatson 488119181Srwatson;;; Restore the stack and return from whence we came. 489119181Srwatson 490119181Srwatson 4911541Srgrimes mov rsi,[save_rsi] 492148682Srwatson mov rdi,[save_rdi] 493148682Srwatson mov rbx,[save_rbx] 494151967Sandre mov rbp,[save_rbp] 495119181Srwatson mov r12,[save_r12] 496119181Srwatson mov r13,[save_r13] 4972531Swollman; mov r14,[save_r14] 498181803Sbz; mov r15,[save_r15] 499101091Srwatson 500173095Srwatson 501101091Srwatson ret 0 5021541Srgrimes; please don't remove this string ! 5032531Swollman; Your can freely use gvmat64 in any free or commercial app 5042531Swollman; but it is far better don't remove the string in the binary! 505119181Srwatson db 0dh,0ah,"asm686 with masm, optimised assembly code from Brian Raiter, written 1998, converted to amd 64 by Gilles Vollant 2005",0dh,0ah,0 506119181Srwatsonlongest_match ENDP 507119181Srwatson 508119181Srwatsonmatch_init PROC 509119181Srwatson ret 0 510119181Srwatsonmatch_init ENDP 511119181Srwatson 5121541Srgrimes 513119181SrwatsonEND 514119181Srwatson