1#!/usr/local/bin/perl
2
3$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
4push(@INC,"${dir}","${dir}../../perlasm");
5require "x86asm.pl";
6
7&asm_init($ARGV[0],$0);
8
9&bn_mul_comba("bn_mul_comba8",8);
10&bn_mul_comba("bn_mul_comba4",4);
11&bn_sqr_comba("bn_sqr_comba8",8);
12&bn_sqr_comba("bn_sqr_comba4",4);
13
14&asm_finish();
15
16sub mul_add_c
17	{
18	local($a,$ai,$b,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_;
19
20	# pos == -1 if eax and edx are pre-loaded, 0 to load from next
21	# words, and 1 if load return value
22
23	&comment("mul a[$ai]*b[$bi]");
24
25	# "eax" and "edx" will always be pre-loaded.
26	# &mov("eax",&DWP($ai*4,$a,"",0)) ;
27	# &mov("edx",&DWP($bi*4,$b,"",0));
28
29	&mul("edx");
30	&add($c0,"eax");
31	 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0;	# laod next a
32	 &mov("eax",&wparam(0)) if $pos > 0;			# load r[]
33	 ###
34	&adc($c1,"edx");
35	 &mov("edx",&DWP(($nb)*4,$b,"",0)) if $pos == 0;	# laod next b
36	 &mov("edx",&DWP(($nb)*4,$b,"",0)) if $pos == 1;	# laod next b
37	 ###
38	&adc($c2,0);
39	 # is pos > 1, it means it is the last loop
40	 &mov(&DWP($i*4,"eax","",0),$c0) if $pos > 0;		# save r[];
41	&mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1;		# laod next a
42	}
43
44sub sqr_add_c
45	{
46	local($r,$a,$ai,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_;
47
48	# pos == -1 if eax and edx are pre-loaded, 0 to load from next
49	# words, and 1 if load return value
50
51	&comment("sqr a[$ai]*a[$bi]");
52
53	# "eax" and "edx" will always be pre-loaded.
54	# &mov("eax",&DWP($ai*4,$a,"",0)) ;
55	# &mov("edx",&DWP($bi*4,$b,"",0));
56
57	if ($ai == $bi)
58		{ &mul("eax");}
59	else
60		{ &mul("edx");}
61	&add($c0,"eax");
62	 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0;	# load next a
63	 ###
64	&adc($c1,"edx");
65	 &mov("edx",&DWP(($nb)*4,$a,"",0)) if ($pos == 1) && ($na != $nb);
66	 ###
67	&adc($c2,0);
68	 # is pos > 1, it means it is the last loop
69	 &mov(&DWP($i*4,$r,"",0),$c0) if $pos > 0;		# save r[];
70	&mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1;		# load next b
71	}
72
73sub sqr_add_c2
74	{
75	local($r,$a,$ai,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_;
76
77	# pos == -1 if eax and edx are pre-loaded, 0 to load from next
78	# words, and 1 if load return value
79
80	&comment("sqr a[$ai]*a[$bi]");
81
82	# "eax" and "edx" will always be pre-loaded.
83	# &mov("eax",&DWP($ai*4,$a,"",0)) ;
84	# &mov("edx",&DWP($bi*4,$a,"",0));
85
86	if ($ai == $bi)
87		{ &mul("eax");}
88	else
89		{ &mul("edx");}
90	&add("eax","eax");
91	 ###
92	&adc("edx","edx");
93	 ###
94	&adc($c2,0);
95	 &add($c0,"eax");
96	&adc($c1,"edx");
97	 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0;	# load next a
98	 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1;	# load next b
99	&adc($c2,0);
100	&mov(&DWP($i*4,$r,"",0),$c0) if $pos > 0;		# save r[];
101	 &mov("edx",&DWP(($nb)*4,$a,"",0)) if ($pos <= 1) && ($na != $nb);
102	 ###
103	}
104
105sub bn_mul_comba
106	{
107	local($name,$num)=@_;
108	local($a,$b,$c0,$c1,$c2);
109	local($i,$as,$ae,$bs,$be,$ai,$bi);
110	local($tot,$end);
111
112	&function_begin_B($name,"");
113
114	$c0="ebx";
115	$c1="ecx";
116	$c2="ebp";
117	$a="esi";
118	$b="edi";
119
120	$as=0;
121	$ae=0;
122	$bs=0;
123	$be=0;
124	$tot=$num+$num-1;
125
126	&push("esi");
127	 &mov($a,&wparam(1));
128	&push("edi");
129	 &mov($b,&wparam(2));
130	&push("ebp");
131	 &push("ebx");
132
133	&xor($c0,$c0);
134	 &mov("eax",&DWP(0,$a,"",0));	# load the first word
135	&xor($c1,$c1);
136	 &mov("edx",&DWP(0,$b,"",0));	# load the first second
137
138	for ($i=0; $i<$tot; $i++)
139		{
140		$ai=$as;
141		$bi=$bs;
142		$end=$be+1;
143
144		&comment("################## Calculate word $i");
145
146		for ($j=$bs; $j<$end; $j++)
147			{
148			&xor($c2,$c2) if ($j == $bs);
149			if (($j+1) == $end)
150				{
151				$v=1;
152				$v=2 if (($i+1) == $tot);
153				}
154			else
155				{ $v=0; }
156			if (($j+1) != $end)
157				{
158				$na=($ai-1);
159				$nb=($bi+1);
160				}
161			else
162				{
163				$na=$as+($i < ($num-1));
164				$nb=$bs+($i >= ($num-1));
165				}
166#printf STDERR "[$ai,$bi] -> [$na,$nb]\n";
167			&mul_add_c($a,$ai,$b,$bi,$c0,$c1,$c2,$v,$i,$na,$nb);
168			if ($v)
169				{
170				&comment("saved r[$i]");
171				# &mov("eax",&wparam(0));
172				# &mov(&DWP($i*4,"eax","",0),$c0);
173				($c0,$c1,$c2)=($c1,$c2,$c0);
174				}
175			$ai--;
176			$bi++;
177			}
178		$as++ if ($i < ($num-1));
179		$ae++ if ($i >= ($num-1));
180
181		$bs++ if ($i >= ($num-1));
182		$be++ if ($i < ($num-1));
183		}
184	&comment("save r[$i]");
185	# &mov("eax",&wparam(0));
186	&mov(&DWP($i*4,"eax","",0),$c0);
187
188	&pop("ebx");
189	&pop("ebp");
190	&pop("edi");
191	&pop("esi");
192	&ret();
193	&function_end_B($name);
194	}
195
196sub bn_sqr_comba
197	{
198	local($name,$num)=@_;
199	local($r,$a,$c0,$c1,$c2)=@_;
200	local($i,$as,$ae,$bs,$be,$ai,$bi);
201	local($b,$tot,$end,$half);
202
203	&function_begin_B($name,"");
204
205	$c0="ebx";
206	$c1="ecx";
207	$c2="ebp";
208	$a="esi";
209	$r="edi";
210
211	&push("esi");
212	 &push("edi");
213	&push("ebp");
214	 &push("ebx");
215	&mov($r,&wparam(0));
216	 &mov($a,&wparam(1));
217	&xor($c0,$c0);
218	 &xor($c1,$c1);
219	&mov("eax",&DWP(0,$a,"",0)); # load the first word
220
221	$as=0;
222	$ae=0;
223	$bs=0;
224	$be=0;
225	$tot=$num+$num-1;
226
227	for ($i=0; $i<$tot; $i++)
228		{
229		$ai=$as;
230		$bi=$bs;
231		$end=$be+1;
232
233		&comment("############### Calculate word $i");
234		for ($j=$bs; $j<$end; $j++)
235			{
236			&xor($c2,$c2) if ($j == $bs);
237			if (($ai-1) < ($bi+1))
238				{
239				$v=1;
240				$v=2 if ($i+1) == $tot;
241				}
242			else
243				{ $v=0; }
244			if (!$v)
245				{
246				$na=$ai-1;
247				$nb=$bi+1;
248				}
249			else
250				{
251				$na=$as+($i < ($num-1));
252				$nb=$bs+($i >= ($num-1));
253				}
254			if ($ai == $bi)
255				{
256				&sqr_add_c($r,$a,$ai,$bi,
257					$c0,$c1,$c2,$v,$i,$na,$nb);
258				}
259			else
260				{
261				&sqr_add_c2($r,$a,$ai,$bi,
262					$c0,$c1,$c2,$v,$i,$na,$nb);
263				}
264			if ($v)
265				{
266				&comment("saved r[$i]");
267				#&mov(&DWP($i*4,$r,"",0),$c0);
268				($c0,$c1,$c2)=($c1,$c2,$c0);
269				last;
270				}
271			$ai--;
272			$bi++;
273			}
274		$as++ if ($i < ($num-1));
275		$ae++ if ($i >= ($num-1));
276
277		$bs++ if ($i >= ($num-1));
278		$be++ if ($i < ($num-1));
279		}
280	&mov(&DWP($i*4,$r,"",0),$c0);
281	&pop("ebx");
282	&pop("ebp");
283	&pop("edi");
284	&pop("esi");
285	&ret();
286	&function_end_B($name);
287	}
288