1139823Simp;uInt longest_match_x64(
21541Srgrimes;    deflate_state *s,
31541Srgrimes;    IPos cur_match);                             /* current match */
41541Srgrimes
51541Srgrimes; gvmat64.asm -- Asm portion of the optimized longest_match for 32 bits x86
61541Srgrimes; Copyright (C) 1995-2005 Jean-loup Gailly, Brian Raiter and Gilles Vollant.
71541Srgrimes;
81541Srgrimes; File written by Gilles Vollant, by converting to assembly the longest_match
91541Srgrimes;  from Jean-loup Gailly in deflate.c of zLib and infoZip zip.
101541Srgrimes;
111541Srgrimes;  and by taking inspiration on asm686 with masm, optimised assembly code
121541Srgrimes;        from Brian Raiter, written 1998
131541Srgrimes;
141541Srgrimes;         http://www.zlib.net
151541Srgrimes;         http://www.winimage.com/zLibDll
161541Srgrimes;         http://www.muppetlabs.com/~breadbox/software/assembly.html
171541Srgrimes;
181541Srgrimes; to compile this file for infozip Zip, I use option:
191541Srgrimes;   ml64.exe /Flgvmat64 /c /Zi /DINFOZIP gvmat64.asm
201541Srgrimes;
211541Srgrimes; to compile this file for zLib, I use option:
221541Srgrimes;   ml64.exe /Flgvmat64 /c /Zi gvmat64.asm
231541Srgrimes; Be carrefull to adapt zlib1222add below to your version of zLib
241541Srgrimes;   (if you use a version of zLib before 1.0.4 or after 1.2.2.2, change
251541Srgrimes;    value of zlib1222add later)
261541Srgrimes;
271541Srgrimes; This file compile with Microsoft Macro Assembler (x64) for AMD64
281541Srgrimes;
291541Srgrimes;   ml64.exe is given with Visual Studio 2005 and Windows 2003 server DDK
301541Srgrimes;
311541Srgrimes;   (you can get Windows 2003 server DDK with ml64 and cl for AMD64 from
321541Srgrimes;      http://www.microsoft.com/whdc/devtools/ddk/default.mspx for low price)
331541Srgrimes;
341541Srgrimes
351541Srgrimes
362531Swollman;uInt longest_match(s, cur_match)
372531Swollman;    deflate_state *s;
382531Swollman;    IPos cur_match;                             /* current match */
392531Swollman.code
402531Swollmanlongest_match PROC
419209Swollman
4214622Sfenner
432531Swollman;LocalVarsSize   equ 88
4414622Sfenner LocalVarsSize   equ 72
452531Swollman
461541Srgrimes; register used : rax,rbx,rcx,rdx,rsi,rdi,r8,r9,r10,r11,r12
47172467Ssilby; free register :  r14,r15
48172467Ssilby; register can be saved : rsp
49172467Ssilby
50101091Srwatson chainlenwmask   equ  rsp + 8 - LocalVarsSize    ; high word: current chain len
51101091Srwatson                                                 ; low word: s->wmask
521541Srgrimes;window          equ  rsp + xx - LocalVarsSize   ; local copy of s->window ; stored in r10
531549Srgrimes;windowbestlen   equ  rsp + xx - LocalVarsSize   ; s->window + bestlen , use r10+r11
5429024Sbde;scanstart       equ  rsp + xx - LocalVarsSize   ; first two bytes of string ; stored in r12w
551541Srgrimes;scanend         equ  rsp + xx - LocalVarsSize   ; last two bytes of string use ebx
561541Srgrimes;scanalign       equ  rsp + xx - LocalVarsSize   ; dword-misalignment of string r13
571541Srgrimes;bestlen         equ  rsp + xx - LocalVarsSize   ; size of best match so far -> r11d
5812296Sphk;scan            equ  rsp + xx - LocalVarsSize   ; ptr to string wanting match -> r9
596472SwollmanIFDEF INFOZIP
60181803SbzELSE
611541Srgrimes nicematch       equ  (rsp + 16 - LocalVarsSize) ; a good enough match size
621541SrgrimesENDIF
631541Srgrimes
641541Srgrimessave_rdi        equ  rsp + 24 - LocalVarsSize
651541Srgrimessave_rsi        equ  rsp + 32 - LocalVarsSize
661541Srgrimessave_rbx        equ  rsp + 40 - LocalVarsSize
671541Srgrimessave_rbp        equ  rsp + 48 - LocalVarsSize
681541Srgrimessave_r12        equ  rsp + 56 - LocalVarsSize
691541Srgrimessave_r13        equ  rsp + 64 - LocalVarsSize
70152592Sandre;save_r14        equ  rsp + 72 - LocalVarsSize
711541Srgrimes;save_r15        equ  rsp + 80 - LocalVarsSize
721541Srgrimes
731541Srgrimes
7460105Sjlemon
7560105Sjlemon;  all the +4 offsets are due to the addition of pending_buf_size (in zlib
76163606Srwatson;  in the deflate_state structure since the asm code was first written
77163606Srwatson;  (if you compile with zlib 1.0.4 or older, remove the +4).
7842776Sfenner;  Note : these value are good with a 8 bytes boundary pack structure
7930309Sphk
80119181Srwatson
81119181Srwatson    MAX_MATCH           equ     258
8212579Sbde    MIN_MATCH           equ     3
83185088Szec    MIN_LOOKAHEAD       equ     (MAX_MATCH+MIN_MATCH+1)
8412704Sphk
85185088Szec
862531Swollman;;; Offsets for fields in the deflate_state structure. These numbers
87183550Szec;;; are calculated from the definition of deflate_state, with the
88183550Szec;;; assumption that the compiler will dword-align the fields. (Thus,
8912296Sphk;;; changing the definition of deflate_state could easily cause this
90130333Srwatson;;; program to crash horribly, without so much as a warning at
91164863Srwatson;;; compile time. Sigh.)
92164863Srwatson
93164863Srwatson;  all the +zlib1222add offsets are due to the addition of fields
94164863Srwatson;  in zlib in the deflate_state structure since the asm code was first written
95164863Srwatson;  (if you compile with zlib 1.0.4 or older, use "zlib1222add equ (-4)").
96130333Srwatson;  (if you compile with zlib between 1.0.5 and 1.2.2.1, use "zlib1222add equ 0").
97185088Szec;  if you compile with zlib 1.2.2.2 or later , use "zlib1222add equ 8").
98185088Szec
99185088Szec
100130333SrwatsonIFDEF INFOZIP
1019209Swollman
102130333Srwatson_DATA   SEGMENT
103130333SrwatsonCOMM    window_size:DWORD
104130333Srwatson; WMask ; 7fff
105130333SrwatsonCOMM    window:BYTE:010040H
106130333SrwatsonCOMM    prev:WORD:08000H
107130333Srwatson; MatchLen : unused
1081541Srgrimes; PrevMatch : unused
10914622SfennerCOMM    strstart:DWORD
110130333SrwatsonCOMM    match_start:DWORD
11114622Sfenner; Lookahead : ignore
112119181SrwatsonCOMM    prev_length:DWORD ; PrevLen
1131541SrgrimesCOMM    max_chain_length:DWORD
114119180SrwatsonCOMM    good_match:DWORD
115119180SrwatsonCOMM    nice_match:DWORD
116119180Srwatsonprev_ad equ OFFSET prev
117119180Srwatsonwindow_ad equ OFFSET window
118119180Srwatsonnicematch equ nice_match
119119180Srwatson_DATA ENDS
1201541SrgrimesWMask equ 07fffh
121119181Srwatson
1221541SrgrimesELSE
123183550Szec
12414622Sfenner  IFNDEF zlib1222add
12514622Sfenner    zlib1222add equ 8
1261541Srgrimes  ENDIF
1271541SrgrimesdsWSize         equ 56+zlib1222add+(zlib1222add/2)
1281541SrgrimesdsWMask         equ 64+zlib1222add+(zlib1222add/2)
1291541SrgrimesdsWindow        equ 72+zlib1222add
13014622SfennerdsPrev          equ 88+zlib1222add
1319209SwollmandsMatchLen      equ 128+zlib1222add
1329209SwollmandsPrevMatch     equ 132+zlib1222add
1339209SwollmandsStrStart      equ 140+zlib1222add
13414622SfennerdsMatchStart    equ 144+zlib1222add
135164863SrwatsondsLookahead     equ 148+zlib1222add
13614622SfennerdsPrevLen       equ 152+zlib1222add
137111119SimpdsMaxChainLen   equ 156+zlib1222add
13814622SfennerdsGoodMatch     equ 172+zlib1222add
13914622SfennerdsNiceMatch     equ 176+zlib1222add
14014622Sfenner
14114622Sfennerwindow_size     equ [ rcx + dsWSize]
14214622SfennerWMask           equ [ rcx + dsWMask]
14314622Sfennerwindow_ad       equ [ rcx + dsWindow]
14414622Sfennerprev_ad         equ [ rcx + dsPrev]
14514622Sfennerstrstart        equ [ rcx + dsStrStart]
146130333Srwatsonmatch_start     equ [ rcx + dsMatchStart]
147181803SbzLookahead       equ [ rcx + dsLookahead] ; 0ffffffffh on infozip
1481541Srgrimesprev_length     equ [ rcx + dsPrevLen]
1491541Srgrimesmax_chain_length equ [ rcx + dsMaxChainLen]
15012704Sphkgood_match      equ [ rcx + dsGoodMatch]
151119181Srwatsonnice_match      equ [ rcx + dsNiceMatch]
1522531SwollmanENDIF
153183550Szec
154119180Srwatson; parameter 1 in r8(deflate state s), param 2 in rdx (cur match)
1552531Swollman
156130333Srwatson; see http://weblogs.asp.net/oldnewthing/archive/2004/01/14/58579.aspx and
157119180Srwatson; http://msdn.microsoft.com/library/en-us/kmarch/hh/kmarch/64bitAMD_8e951dd2-ee77-4728-8702-55ce4b5dd24a.xml.asp
158181803Sbz;
159119180Srwatson; All registers must be preserved across the call, except for
160119180Srwatson;   rax, rcx, rdx, r8, r9, r10, and r11, which are scratch.
161119180Srwatson
162164863Srwatson
163119181Srwatson
164119181Srwatson;;; Save registers that the compiler may be using, and adjust esp to
165184205Sdes;;; make room for our stack frame.
166144163Ssam
167164863Srwatson
168164863Srwatson;;; Retrieve the function arguments. r8d will hold cur_match
169144163Ssam;;; throughout the entire function. edx will hold the pointer to the
170119181Srwatson;;; deflate_state structure during the function's setup (before
171119181Srwatson;;; entering the main loop.
172119181Srwatson
173181803Sbz; parameter 1 in rcx (deflate_state* s), param 2 in edx -> r8 (cur match)
174119180Srwatson
175164863Srwatson; this clear high 32 bits of r8, which can be garbage in both r8 and rdx
1762531Swollman
1772531Swollman        mov [save_rdi],rdi
1781541Srgrimes        mov [save_rsi],rsi
179119181Srwatson        mov [save_rbx],rbx
1801541Srgrimes        mov [save_rbp],rbp
181107113SluigiIFDEF INFOZIP
182107113Sluigi        mov r8d,ecx
183107113SluigiELSE
184107113Sluigi        mov r8d,edx
185107113SluigiENDIF
186107113Sluigi        mov [save_r12],r12
187107113Sluigi        mov [save_r13],r13
188107113Sluigi;        mov [save_r14],r14
1891541Srgrimes;        mov [save_r15],r15
1902531Swollman
1918546Sdg
192183550Szec;;; uInt wmask = s->w_mask;
1931541Srgrimes;;; unsigned chain_length = s->max_chain_length;
194181803Sbz;;; if (s->prev_length >= s->good_match) {
1951541Srgrimes;;;     chain_length >>= 2;
1961541Srgrimes;;; }
1971541Srgrimes
1981541Srgrimes        mov edi, prev_length
1991541Srgrimes        mov esi, good_match
200164863Srwatson        mov eax, WMask
2011541Srgrimes        mov ebx, max_chain_length
2021541Srgrimes        cmp edi, esi
203181803Sbz        jl  LastMatchGood
2041541Srgrimes        shr ebx, 2
2051541SrgrimesLastMatchGood:
2061541Srgrimes
2071541Srgrimes;;; chainlen is decremented once beforehand so that the function can
2081541Srgrimes;;; use the sign flag instead of the zero flag for the exit test.
2091541Srgrimes;;; It is then shifted into the high word, to make room for the wmask
210181803Sbz;;; value, which it will always accompany.
2111541Srgrimes
2121541Srgrimes        dec ebx
2131541Srgrimes        shl ebx, 16
2141541Srgrimes        or  ebx, eax
215164863Srwatson
2161541Srgrimes;;; on zlib only
2171541Srgrimes;;; if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead;
2181541Srgrimes
2191541SrgrimesIFDEF INFOZIP
2201541Srgrimes        mov [chainlenwmask], ebx
221181803Sbz; on infozip nice_match = [nice_match]
2221541SrgrimesELSE
2231541Srgrimes        mov eax, nice_match
2241541Srgrimes        mov [chainlenwmask], ebx
2251541Srgrimes        mov r10d, Lookahead
2261541Srgrimes        cmp r10d, eax
2272531Swollman        cmovnl r10d, eax
2281541Srgrimes        mov [nicematch],r10d
2298546SdgENDIF
23041702Sdillon
23141702Sdillon;;; register Bytef *scan = s->window + s->strstart;
2321541Srgrimes        mov r10, window_ad
23314622Sfenner        mov ebp, strstart
23414622Sfenner        lea r13, [r10 + rbp]
23514622Sfenner
23614622Sfenner;;; Determine how many bytes the scan ptr is off from being
237164863Srwatson;;; dword-aligned.
238164863Srwatson
23914622Sfenner         mov r9,r13
240164863Srwatson         neg r13
241164863Srwatson         and r13,3
242164863Srwatson
24314622Sfenner;;; IPos limit = s->strstart > (IPos)MAX_DIST(s) ?
2441541Srgrimes;;;     s->strstart - (IPos)MAX_DIST(s) : NIL;
24514622SfennerIFDEF INFOZIP
246181803Sbz        mov eax,07efah ; MAX_DIST = (WSIZE-MIN_LOOKAHEAD) (0x8000-(3+8+1))
2471541SrgrimesELSE
2488090Spst        mov eax, window_size
2491541Srgrimes        sub eax, MIN_LOOKAHEAD
2501541SrgrimesENDIF
2512531Swollman        xor edi,edi
25214622Sfenner        sub ebp, eax
25314622Sfenner
254164863Srwatson        mov r11d, prev_length
255164863Srwatson
25614622Sfenner        cmovng ebp,edi
2574028Spst
258130333Srwatson;;; int best_len = s->prev_length;
259130333Srwatson
260144163Ssam
261144163Ssam;;; Store the sum of s->window + best_len in esi locally, and in esi.
262144163Ssam
263144163Ssam       lea  rsi,[r10+r11]
264144163Ssam
26514622Sfenner;;; register ush scan_start = *(ushf*)scan;
26614622Sfenner;;; register ush scan_end   = *(ushf*)(scan+best_len-1);
267130333Srwatson;;; Posf *prev = s->prev;
2684028Spst
26914622Sfenner        movzx r12d,word ptr [r9]
2704028Spst        movzx ebx, word ptr [r9 + r11 - 1]
27114622Sfenner
27214622Sfenner        mov rdi, prev_ad
273181803Sbz
2742531Swollman;;; Jump into the main loop.
2752531Swollman
2762531Swollman        mov edx, [chainlenwmask]
27714622Sfenner
2782531Swollman        cmp bx,word ptr [rsi + r8 - 1]
27914622Sfenner        jz  LookupLoopIsZero
2802531Swollman
28114622SfennerLookupLoop1:
28214622Sfenner        and r8d, edx
28314622Sfenner
284181803Sbz        movzx   r8d, word ptr [rdi + r8*2]
28514622Sfenner        cmp r8d, ebp
28614622Sfenner        jbe LeaveNow
28714622Sfenner        sub edx, 00010000h
28814622Sfenner        js  LeaveNow
2892531Swollman
29014622SfennerLoopEntry1:
291164863Srwatson        cmp bx,word ptr [rsi + r8 - 1]
292164863Srwatson        jz  LookupLoopIsZero
293164863Srwatson
294164863SrwatsonLookupLoop2:
295164863Srwatson        and r8d, edx
296164863Srwatson
297164863Srwatson        movzx   r8d, word ptr [rdi + r8*2]
298164863Srwatson        cmp r8d, ebp
29914622Sfenner        jbe LeaveNow
300148682Srwatson        sub edx, 00010000h
30114622Sfenner        js  LeaveNow
30214622Sfenner
30314622SfennerLoopEntry2:
30414622Sfenner        cmp bx,word ptr [rsi + r8 - 1]
30514622Sfenner        jz  LookupLoopIsZero
30614622Sfenner
30714622SfennerLookupLoop4:
30814622Sfenner        and r8d, edx
30914622Sfenner
31014622Sfenner        movzx   r8d, word ptr [rdi + r8*2]
3112531Swollman        cmp r8d, ebp
3122531Swollman        jbe LeaveNow
3131541Srgrimes        sub edx, 00010000h
3141541Srgrimes        js  LeaveNow
3151541Srgrimes
316148682SrwatsonLoopEntry4:
3171541Srgrimes
3181541Srgrimes        cmp bx,word ptr [rsi + r8 - 1]
31914622Sfenner        jnz LookupLoop1
32014622Sfenner        jmp LookupLoopIsZero
3219209Swollman
32214622Sfenner
323164863Srwatson;;; do {
324164863Srwatson;;;     match = s->window + cur_match;
325164863Srwatson;;;     if (*(ushf*)(match+best_len-1) != scan_end ||
3269209Swollman;;;         *(ushf*)match != scan_start) continue;
32714622Sfenner;;;     [...]
328164863Srwatson;;; } while ((cur_match = prev[cur_match & wmask]) > limit
329164863Srwatson;;;          && --chain_length != 0);
33014622Sfenner;;;
33114622Sfenner;;; Here is the inner loop of the function. The function will spend the
332181803Sbz;;; majority of its time in this loop, and majority of that time will
3331541Srgrimes;;; be spent in the first ten instructions.
3348090Spst;;;
3351541Srgrimes;;; Within this loop:
3361541Srgrimes;;; ebx = scanend
33714622Sfenner;;; r8d = curmatch
338181803Sbz;;; edx = chainlenwmask - i.e., ((chainlen << 16) | wmask)
3391541Srgrimes;;; esi = windowbestlen - i.e., (window + bestlen)
3401541Srgrimes;;; edi = prev
3411541Srgrimes;;; ebp = limit
3421541Srgrimes
3431541SrgrimesLookupLoop:
3441541Srgrimes        and r8d, edx
3451541Srgrimes
3461541Srgrimes        movzx   r8d, word ptr [rdi + r8*2]
34796432Sdd        cmp r8d, ebp
3481541Srgrimes        jbe LeaveNow
3491541Srgrimes        sub edx, 00010000h
3501541Srgrimes        js  LeaveNow
3511541Srgrimes
352164863SrwatsonLoopEntry:
353164863Srwatson
354164863Srwatson        cmp bx,word ptr [rsi + r8 - 1]
355164863Srwatson        jnz LookupLoop1
3561541SrgrimesLookupLoopIsZero:
3571541Srgrimes        cmp     r12w, word ptr [r10 + r8]
358164863Srwatson        jnz LookupLoop1
359164863Srwatson
3601541Srgrimes
361148682Srwatson;;; Store the current value of chainlen.
3621541Srgrimes        mov [chainlenwmask], edx
3632531Swollman
36414622Sfenner;;; Point edi to the string under scrutiny, and esi to the string we
365181803Sbz;;; are hoping to match it up with. In actuality, esi and edi are
36614622Sfenner;;; both pointed (MAX_MATCH_8 - scanalign) bytes ahead, and edx is
3672531Swollman;;; initialized to -(MAX_MATCH_8 - scanalign).
368148682Srwatson
3691541Srgrimes        lea rsi,[r8+r10]
3701541Srgrimes        mov rdx, 0fffffffffffffef8h; -(MAX_MATCH_8)
3711541Srgrimes        lea rsi, [rsi + r13 + 0108h] ;MAX_MATCH_8]
3721541Srgrimes        lea rdi, [r9 + r13 + 0108h] ;MAX_MATCH_8]
373164863Srwatson
374164863Srwatson        prefetcht1 [rsi+rdx]
3751541Srgrimes        prefetcht1 [rdi+rdx]
37682890Sjulian
3771541Srgrimes
3781541Srgrimes;;; Test the strings for equality, 8 bytes at a time. At the end,
3791541Srgrimes;;; adjust rdx so that it is offset to the exact byte that mismatched.
380119181Srwatson;;;
3811541Srgrimes;;; We already know at this point that the first three bytes of the
3821541Srgrimes;;; strings match each other, and they can be safely passed over before
383148682Srwatson;;; starting the compare loop. So what this code does is skip over 0-3
384148682Srwatson;;; bytes, as much as necessary in order to dword-align the edi
38514622Sfenner;;; pointer. (rsi will still be misaligned three times out of four.)
38614622Sfenner;;;
3871541Srgrimes;;; It should be confessed that this loop usually does not represent
38814622Sfenner;;; much of the total running time. Replacing it with a more
38914622Sfenner;;; straightforward "rep cmpsb" would not drastically degrade
390130333Srwatson;;; performance.
39114622Sfenner
392130333Srwatson
393144163SsamLoopCmps:
394144163Ssam        mov rax, [rsi + rdx]
395144163Ssam        xor rax, [rdi + rdx]
3962531Swollman        jnz LeaveLoopCmps
397144163Ssam
398144163Ssam        mov rax, [rsi + rdx + 8]
399144163Ssam        xor rax, [rdi + rdx + 8]
400144163Ssam        jnz LeaveLoopCmps8
4011541Srgrimes
4021541Srgrimes
4031541Srgrimes        mov rax, [rsi + rdx + 8+8]
4041541Srgrimes        xor rax, [rdi + rdx + 8+8]
405119181Srwatson        jnz LeaveLoopCmps16
4061541Srgrimes
407119181Srwatson        add rdx,8+8+8
408148682Srwatson
409148682Srwatson        jmp short LoopCmps
41014622SfennerLeaveLoopCmps16: add rdx,8
41114622SfennerLeaveLoopCmps8: add rdx,8
41214622SfennerLeaveLoopCmps:
41314622Sfenner
41414622Sfenner        test    eax, 0000FFFFh
4151541Srgrimes        jnz LenLower
4161541Srgrimes
4171541Srgrimes        test eax,0ffffffffh
418119181Srwatson
4191541Srgrimes        jnz LenLower32
420183550Szec
421107113Sluigi        add rdx,4
4221541Srgrimes        shr rax,32
4231541Srgrimes        or ax,ax
4241541Srgrimes        jnz LenLower
425164863Srwatson
426164863SrwatsonLenLower32:
4271541Srgrimes        shr eax,16
4289209Swollman        add rdx,2
4291541SrgrimesLenLower:   sub al, 1
4301541Srgrimes        adc rdx, 0
4311541Srgrimes;;; Calculate the length of the match. If it is longer than MAX_MATCH,
432148682Srwatson;;; then automatically accept it as the best possible match and leave.
4331541Srgrimes
434183550Szec        lea rax, [rdi + rdx]
435183550Szec        sub rax, r9
436183550Szec        cmp eax, MAX_MATCH
437183550Szec        jge LenMaximum
438183550Szec
439183550Szec;;; If the length of the match is not longer than the best match we
440183550Szec;;; have so far, then forget it and return to the lookup loop.
441183550Szec;///////////////////////////////////
442183550Szec
443183550Szec        cmp eax, r11d
444183550Szec        jg  LongerMatch
445183550Szec
446183550Szec        lea rsi,[r10+r11]
447183550Szec
448183550Szec        mov rdi, prev_ad
4491541Srgrimes        mov edx, [chainlenwmask]
450183550Szec        jmp LookupLoop
4511541Srgrimes
452183550Szec;;;         s->match_start = cur_match;
453148682Srwatson;;;         best_len = len;
4541541Srgrimes;;;         if (len >= nice_match) break;
4551541Srgrimes;;;         scan_end = *(ushf*)(scan+best_len-1);
4562531Swollman
457119181SrwatsonLongerMatch:
4582531Swollman        mov r11d, eax
459183550Szec        mov match_start, r8d
460119180Srwatson        cmp eax, [nicematch]
4612531Swollman        jge LeaveNow
462119180Srwatson
463130333Srwatson        lea rsi,[r10+rax]
464183550Szec
465183550Szec        movzx   ebx, word ptr [r9 + rax - 1]
466183550Szec        mov rdi, prev_ad
467183550Szec        mov edx, [chainlenwmask]
468183550Szec        jmp LookupLoop
469183550Szec
470183550Szec;;; Accept the current string, with the maximum possible length.
471183550Szec
472183550SzecLenMaximum:
473183550Szec        mov r11d,MAX_MATCH
4742531Swollman        mov match_start, r8d
475183550Szec
4762531Swollman;;; if ((uInt)best_len <= s->lookahead) return (uInt)best_len;
477183550Szec;;; return s->lookahead;
478130333Srwatson
479119180SrwatsonLeaveNow:
4802531SwollmanIFDEF INFOZIP
4812531Swollman        mov eax,r11d
4821541SrgrimesELSE
483119181Srwatson        mov eax, Lookahead
4841541Srgrimes        cmp r11d, eax
485183550Szec        cmovng eax, r11d
486183550SzecENDIF
487119181Srwatson
488119181Srwatson;;; Restore the stack and return from whence we came.
489119181Srwatson
490119181Srwatson
4911541Srgrimes        mov rsi,[save_rsi]
492148682Srwatson        mov rdi,[save_rdi]
493148682Srwatson        mov rbx,[save_rbx]
494151967Sandre        mov rbp,[save_rbp]
495119181Srwatson        mov r12,[save_r12]
496119181Srwatson        mov r13,[save_r13]
4972531Swollman;        mov r14,[save_r14]
498181803Sbz;        mov r15,[save_r15]
499101091Srwatson
500173095Srwatson
501101091Srwatson        ret 0
5021541Srgrimes; please don't remove this string !
5032531Swollman; Your can freely use gvmat64 in any free or commercial app
5042531Swollman; but it is far better don't remove the string in the binary!
505119181Srwatson    db     0dh,0ah,"asm686 with masm, optimised assembly code from Brian Raiter, written 1998, converted to amd 64 by Gilles Vollant 2005",0dh,0ah,0
506119181Srwatsonlongest_match   ENDP
507119181Srwatson
508119181Srwatsonmatch_init PROC
509119181Srwatson  ret 0
510119181Srwatsonmatch_init ENDP
511119181Srwatson
5121541Srgrimes
513119181SrwatsonEND
514119181Srwatson