1;; libgcc routines for the Renesas H8/300 CPU.
2;; Contributed by Steve Chamberlain <sac@cygnus.com>
3;; Optimizations by Toshiyasu Morita <toshiyasu.morita@renesas.com>
4
5/* Copyright (C) 1994-2015 Free Software Foundation, Inc.
6
7This file is free software; you can redistribute it and/or modify it
8under the terms of the GNU General Public License as published by the
9Free Software Foundation; either version 3, or (at your option) any
10later version.
11
12This file is distributed in the hope that it will be useful, but
13WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15General Public License for more details.
16
17Under Section 7 of GPL version 3, you are granted additional
18permissions described in the GCC Runtime Library Exception, version
193.1, as published by the Free Software Foundation.
20
21You should have received a copy of the GNU General Public License and
22a copy of the GCC Runtime Library Exception along with this program;
23see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
24<http://www.gnu.org/licenses/>.  */
25
26/* Assembler register definitions.  */
27
28#define A0 r0
29#define A0L r0l
30#define A0H r0h
31
32#define A1 r1
33#define A1L r1l
34#define A1H r1h
35
36#define A2 r2
37#define A2L r2l
38#define A2H r2h
39
40#define A3 r3
41#define A3L r3l
42#define A3H r3h
43
44#define S0 r4
45#define S0L r4l
46#define S0H r4h
47
48#define S1 r5
49#define S1L r5l
50#define S1H r5h
51
52#define S2 r6
53#define S2L r6l
54#define S2H r6h
55
56#ifdef __H8300__
57#define PUSHP	push
58#define POPP	pop
59
60#define A0P	r0
61#define A1P	r1
62#define A2P	r2
63#define A3P	r3
64#define S0P	r4
65#define S1P	r5
66#define S2P	r6
67#endif
68
69#if defined (__H8300H__) || defined (__H8300S__) || defined (__H8300SX__)
70#define PUSHP	push.l
71#define POPP	pop.l
72
73#define A0P	er0
74#define A1P	er1
75#define A2P	er2
76#define A3P	er3
77#define S0P	er4
78#define S1P	er5
79#define S2P	er6
80
81#define A0E	e0
82#define A1E	e1
83#define A2E	e2
84#define A3E	e3
85#endif
86
87#ifdef __H8300H__
88#ifdef __NORMAL_MODE__
89	.h8300hn
90#else
91	.h8300h
92#endif
93#endif
94
95#ifdef __H8300S__
96#ifdef __NORMAL_MODE__
97	.h8300sn
98#else
99	.h8300s
100#endif
101#endif
102#ifdef __H8300SX__
103#ifdef __NORMAL_MODE__
104	.h8300sxn
105#else
106	.h8300sx
107#endif
108#endif
109
110#ifdef L_cmpsi2
111#ifdef __H8300__
112	.section .text
113	.align 2
114	.global ___cmpsi2
115___cmpsi2:
116	cmp.w	A0,A2
117	bne	.L2
118	cmp.w	A1,A3
119	bne	.L4
120	mov.w	#1,A0
121	rts
122.L2:
123	bgt	.L5
124.L3:
125	mov.w	#2,A0
126	rts
127.L4:
128	bls	.L3
129.L5:
130	sub.w	A0,A0
131	rts
132	.end
133#endif
134#endif /* L_cmpsi2 */
135
136#ifdef L_ucmpsi2
137#ifdef __H8300__
138	.section .text
139	.align 2
140	.global ___ucmpsi2
141___ucmpsi2:
142	cmp.w	A0,A2
143	bne	.L2
144	cmp.w	A1,A3
145	bne	.L4
146	mov.w	#1,A0
147	rts
148.L2:
149	bhi	.L5
150.L3:
151	mov.w	#2,A0
152	rts
153.L4:
154	bls	.L3
155.L5:
156	sub.w	A0,A0
157	rts
158	.end
159#endif
160#endif /* L_ucmpsi2 */
161
162#ifdef L_divhi3
163
164;; HImode divides for the H8/300.
165;; We bunch all of this into one object file since there are several
166;; "supporting routines".
167
168; general purpose normalize routine
169;
170; divisor in A0
171; dividend in A1
172; turns both into +ve numbers, and leaves what the answer sign
173; should be in A2L
174
175#ifdef __H8300__
176	.section .text
177	.align 2
178divnorm:
179	or	A0H,A0H		; is divisor > 0
180	stc	ccr,A2L
181	bge	_lab1
182	not	A0H		; no - then make it +ve
183	not	A0L
184	adds	#1,A0
185_lab1:	or	A1H,A1H	; look at dividend
186	bge	_lab2
187	not	A1H		; it is -ve, make it positive
188	not	A1L
189	adds	#1,A1
190	xor	#0x8,A2L; and toggle sign of result
191_lab2:	rts
192;; Basically the same, except that the sign of the divisor determines
193;; the sign.
194modnorm:
195	or	A0H,A0H		; is divisor > 0
196	stc	ccr,A2L
197	bge	_lab7
198	not	A0H		; no - then make it +ve
199	not	A0L
200	adds	#1,A0
201_lab7:	or	A1H,A1H	; look at dividend
202	bge	_lab8
203	not	A1H		; it is -ve, make it positive
204	not	A1L
205	adds	#1,A1
206_lab8:	rts
207
208; A0=A0/A1 signed
209
210	.global	___divhi3
211___divhi3:
212	bsr	divnorm
213	bsr	___udivhi3
214negans:	btst	#3,A2L	; should answer be negative ?
215	beq	_lab4
216	not	A0H	; yes, so make it so
217	not	A0L
218	adds	#1,A0
219_lab4:	rts
220
221; A0=A0%A1 signed
222
223	.global	___modhi3
224___modhi3:
225	bsr	modnorm
226	bsr	___udivhi3
227	mov	A3,A0
228	bra	negans
229
230; A0=A0%A1 unsigned
231
232	.global	___umodhi3
233___umodhi3:
234	bsr	___udivhi3
235	mov	A3,A0
236	rts
237
238; A0=A0/A1 unsigned
239; A3=A0%A1 unsigned
240; A2H trashed
241; D high 8 bits of denom
242; d low 8 bits of denom
243; N high 8 bits of num
244; n low 8 bits of num
245; M high 8 bits of mod
246; m low 8 bits of mod
247; Q high 8 bits of quot
248; q low 8 bits of quot
249; P preserve
250
251; The H8/300 only has a 16/8 bit divide, so we look at the incoming and
252; see how to partition up the expression.
253
254	.global	___udivhi3
255___udivhi3:
256				; A0 A1 A2 A3
257				; Nn Dd       P
258	sub.w	A3,A3		; Nn Dd xP 00
259	or	A1H,A1H
260	bne	divlongway
261	or	A0H,A0H
262	beq	_lab6
263
264; we know that D == 0 and N is != 0
265	mov.b	A0H,A3L		; Nn Dd xP 0N
266	divxu	A1L,A3		;          MQ
267	mov.b	A3L,A0H	 	; Q
268; dealt with N, do n
269_lab6:	mov.b	A0L,A3L		;           n
270	divxu	A1L,A3		;          mq
271	mov.b	A3L,A0L		; Qq
272	mov.b	A3H,A3L         ;           m
273	mov.b	#0x0,A3H	; Qq       0m
274	rts
275
276; D != 0 - which means the denominator is
277;          loop around to get the result.
278
279divlongway:
280	mov.b	A0H,A3L		; Nn Dd xP 0N
281	mov.b	#0x0,A0H	; high byte of answer has to be zero
282	mov.b	#0x8,A2H	;       8
283div8:	add.b	A0L,A0L		; n*=2
284	rotxl	A3L		; Make remainder bigger
285	rotxl	A3H
286	sub.w	A1,A3		; Q-=N
287	bhs	setbit		; set a bit ?
288	add.w	A1,A3		;  no : too far , Q+=N
289
290	dec	A2H
291	bne	div8		; next bit
292	rts
293
294setbit:	inc	A0L		; do insert bit
295	dec	A2H
296	bne	div8		; next bit
297	rts
298
299#endif /* __H8300__ */
300#endif /* L_divhi3 */
301
302#ifdef L_divsi3
303
304;; 4 byte integer divides for the H8/300.
305;;
306;; We have one routine which does all the work and lots of
307;; little ones which prepare the args and massage the sign.
308;; We bunch all of this into one object file since there are several
309;; "supporting routines".
310
311	.section .text
312	.align 2
313
314; Put abs SIs into r0/r1 and r2/r3, and leave a 1 in r6l with sign of rest.
315; This function is here to keep branch displacements small.
316
317#ifdef __H8300__
318
319divnorm:
320	mov.b	A0H,A0H		; is the numerator -ve
321	stc	ccr,S2L		; keep the sign in bit 3 of S2L
322	bge	postive
323
324	; negate arg
325	not	A0H
326	not	A1H
327	not	A0L
328	not	A1L
329
330	add	#1,A1L
331	addx	#0,A1H
332	addx	#0,A0L
333	addx	#0,A0H
334postive:
335	mov.b	A2H,A2H		; is the denominator -ve
336	bge	postive2
337	not	A2L
338	not	A2H
339	not	A3L
340	not	A3H
341	add.b	#1,A3L
342	addx	#0,A3H
343	addx	#0,A2L
344	addx	#0,A2H
345	xor.b	#0x08,S2L	; toggle the result sign
346postive2:
347	rts
348
349;; Basically the same, except that the sign of the divisor determines
350;; the sign.
351modnorm:
352	mov.b	A0H,A0H		; is the numerator -ve
353	stc	ccr,S2L		; keep the sign in bit 3 of S2L
354	bge	mpostive
355
356	; negate arg
357	not	A0H
358	not	A1H
359	not	A0L
360	not	A1L
361
362	add	#1,A1L
363	addx	#0,A1H
364	addx	#0,A0L
365	addx	#0,A0H
366mpostive:
367	mov.b	A2H,A2H		; is the denominator -ve
368	bge	mpostive2
369	not	A2L
370	not	A2H
371	not	A3L
372	not	A3H
373	add.b	#1,A3L
374	addx	#0,A3H
375	addx	#0,A2L
376	addx	#0,A2H
377mpostive2:
378	rts
379
380#else /* __H8300H__ */
381
382divnorm:
383	mov.l	A0P,A0P		; is the numerator -ve
384	stc	ccr,S2L		; keep the sign in bit 3 of S2L
385	bge	postive
386
387	neg.l	A0P		; negate arg
388
389postive:
390	mov.l	A1P,A1P		; is the denominator -ve
391	bge	postive2
392
393	neg.l	A1P		; negate arg
394	xor.b	#0x08,S2L	; toggle the result sign
395
396postive2:
397	rts
398
399;; Basically the same, except that the sign of the divisor determines
400;; the sign.
401modnorm:
402	mov.l	A0P,A0P		; is the numerator -ve
403	stc	ccr,S2L		; keep the sign in bit 3 of S2L
404	bge	mpostive
405
406	neg.l	A0P		; negate arg
407
408mpostive:
409	mov.l	A1P,A1P		; is the denominator -ve
410	bge	mpostive2
411
412	neg.l	A1P		; negate arg
413
414mpostive2:
415	rts
416
417#endif
418
419; numerator in A0/A1
420; denominator in A2/A3
421	.global	___modsi3
422___modsi3:
423#ifdef __H8300__
424	PUSHP	S2P
425	PUSHP	S0P
426	PUSHP	S1P
427	bsr	modnorm
428	bsr	divmodsi4
429	mov	S0,A0
430	mov	S1,A1
431	bra	exitdiv
432#else
433	PUSHP	S2P
434	bsr	modnorm
435	bsr	___udivsi3
436	mov.l	er3,er0
437	bra	exitdiv
438#endif
439
440	;; H8/300H and H8S version of ___udivsi3 is defined later in
441	;; the file.
442#ifdef __H8300__
443	.global	___udivsi3
444___udivsi3:
445	PUSHP	S2P
446	PUSHP	S0P
447	PUSHP	S1P
448	bsr	divmodsi4
449	bra	reti
450#endif
451
452	.global	___umodsi3
453___umodsi3:
454#ifdef __H8300__
455	PUSHP	S2P
456	PUSHP	S0P
457	PUSHP	S1P
458	bsr	divmodsi4
459	mov	S0,A0
460	mov	S1,A1
461	bra	reti
462#else
463	bsr	___udivsi3
464	mov.l	er3,er0
465	rts
466#endif
467
468	.global	___divsi3
469___divsi3:
470#ifdef __H8300__
471	PUSHP	S2P
472	PUSHP	S0P
473	PUSHP	S1P
474	jsr	divnorm
475	jsr	divmodsi4
476#else
477	PUSHP	S2P
478	jsr	divnorm
479	bsr	___udivsi3
480#endif
481
482	; examine what the sign should be
483exitdiv:
484	btst	#3,S2L
485	beq	reti
486
487	; should be -ve
488#ifdef __H8300__
489	not	A0H
490	not	A1H
491	not	A0L
492	not	A1L
493
494	add	#1,A1L
495	addx	#0,A1H
496	addx	#0,A0L
497	addx	#0,A0H
498#else /* __H8300H__ */
499	neg.l	A0P
500#endif
501
502reti:
503#ifdef __H8300__
504	POPP	S1P
505	POPP	S0P
506#endif
507	POPP	S2P
508	rts
509
510	; takes A0/A1 numerator (A0P for H8/300H)
511	; A2/A3 denominator (A1P for H8/300H)
512	; returns A0/A1 quotient (A0P for H8/300H)
513	; S0/S1 remainder (S0P for H8/300H)
514	; trashes S2H
515
516#ifdef __H8300__
517
518divmodsi4:
519        sub.w	S0,S0		; zero play area
520        mov.w	S0,S1
521        mov.b	A2H,S2H
522        or	A2L,S2H
523        or	A3H,S2H
524        bne	DenHighNonZero
525        mov.b	A0H,A0H
526        bne	NumByte0Zero
527        mov.b	A0L,A0L
528        bne	NumByte1Zero
529        mov.b	A1H,A1H
530        bne	NumByte2Zero
531        bra	NumByte3Zero
532NumByte0Zero:
533	mov.b	A0H,S1L
534        divxu	A3L,S1
535        mov.b	S1L,A0H
536NumByte1Zero:
537	mov.b	A0L,S1L
538        divxu	A3L,S1
539        mov.b	S1L,A0L
540NumByte2Zero:
541	mov.b	A1H,S1L
542        divxu	A3L,S1
543        mov.b	S1L,A1H
544NumByte3Zero:
545	mov.b	A1L,S1L
546        divxu	A3L,S1
547        mov.b	S1L,A1L
548
549        mov.b	S1H,S1L
550        mov.b	#0x0,S1H
551        rts
552
553; have to do the divide by shift and test
554DenHighNonZero:
555	mov.b	A0H,S1L
556        mov.b	A0L,A0H
557        mov.b	A1H,A0L
558        mov.b	A1L,A1H
559
560        mov.b	#0,A1L
561        mov.b	#24,S2H	; only do 24 iterations
562
563nextbit:
564	add.w	A1,A1	; double the answer guess
565        rotxl	A0L
566        rotxl	A0H
567
568        rotxl	S1L	; double remainder
569        rotxl	S1H
570        rotxl	S0L
571        rotxl	S0H
572        sub.w	A3,S1	; does it all fit
573        subx	A2L,S0L
574        subx	A2H,S0H
575        bhs	setone
576
577        add.w	A3,S1	; no, restore mistake
578        addx	A2L,S0L
579        addx	A2H,S0H
580
581        dec	S2H
582        bne	nextbit
583        rts
584
585setone:
586	inc	A1L
587        dec	S2H
588        bne	nextbit
589        rts
590
591#else /* __H8300H__ */
592
593	;; This function also computes the remainder and stores it in er3.
594	.global	___udivsi3
595___udivsi3:
596	mov.w	A1E,A1E		; denominator top word 0?
597	bne	DenHighNonZero
598
599	; do it the easy way, see page 107 in manual
600	mov.w	A0E,A2
601	extu.l	A2P
602	divxu.w	A1,A2P
603	mov.w	A2E,A0E
604	divxu.w	A1,A0P
605	mov.w	A0E,A3
606	mov.w	A2,A0E
607	extu.l	A3P
608	rts
609
610 	; er0 = er0 / er1
611 	; er3 = er0 % er1
612 	; trashes er1 er2
613 	; expects er1 >= 2^16
614DenHighNonZero:
615	mov.l	er0,er3
616	mov.l	er1,er2
617#ifdef __H8300H__
618divmod_L21:
619	shlr.l	er0
620	shlr.l	er2		; make divisor < 2^16
621	mov.w	e2,e2
622	bne	divmod_L21
623#else
624	shlr.l	#2,er2		; make divisor < 2^16
625	mov.w	e2,e2
626	beq	divmod_L22A
627divmod_L21:
628	shlr.l	#2,er0
629divmod_L22:
630	shlr.l	#2,er2		; make divisor < 2^16
631	mov.w	e2,e2
632	bne	divmod_L21
633divmod_L22A:
634	rotxl.w	r2
635	bcs	divmod_L23
636	shlr.l	er0
637	bra	divmod_L24
638divmod_L23:
639	rotxr.w	r2
640	shlr.l	#2,er0
641divmod_L24:
642#endif
643	;; At this point,
644	;;  er0 contains shifted dividend
645	;;  er1 contains divisor
646	;;  er2 contains shifted divisor
647	;;  er3 contains dividend, later remainder
648	divxu.w	r2,er0		; r0 now contains the approximate quotient (AQ)
649	extu.l	er0
650	beq	divmod_L25
651	subs	#1,er0		; er0 = AQ - 1
652	mov.w	e1,r2
653	mulxu.w	r0,er2		; er2 = upper (AQ - 1) * divisor
654	sub.w	r2,e3		; dividend - 65536 * er2
655	mov.w	r1,r2
656	mulxu.w	r0,er2		; compute er3 = remainder (tentative)
657	sub.l	er2,er3		; er3 = dividend - (AQ - 1) * divisor
658divmod_L25:
659 	cmp.l	er1,er3		; is divisor < remainder?
660	blo	divmod_L26
661 	adds	#1,er0
662	sub.l	er1,er3		; correct the remainder
663divmod_L26:
664	rts
665
666#endif
667#endif /* L_divsi3 */
668
669#ifdef L_mulhi3
670
671;; HImode multiply.
672; The H8/300 only has an 8*8->16 multiply.
673; The answer is the same as:
674;
675; product = (srca.l * srcb.l) + ((srca.h * srcb.l) + (srcb.h * srca.l)) * 256
676; (we can ignore A1.h * A0.h cause that will all off the top)
677; A0 in
678; A1 in
679; A0 answer
680
681#ifdef __H8300__
682	.section .text
683	.align 2
684	.global	___mulhi3
685___mulhi3:
686	mov.b	A1L,A2L		; A2l gets srcb.l
687	mulxu	A0L,A2		; A2 gets first sub product
688
689	mov.b	A0H,A3L		; prepare for
690	mulxu	A1L,A3		; second sub product
691
692	add.b	A3L,A2H		; sum first two terms
693
694	mov.b	A1H,A3L		; third sub product
695	mulxu	A0L,A3
696
697	add.b	A3L,A2H		; almost there
698	mov.w	A2,A0		; that is
699	rts
700
701#endif
702#endif /* L_mulhi3 */
703
704#ifdef L_mulsi3
705
706;; SImode multiply.
707;;
708;; I think that shift and add may be sufficient for this.  Using the
709;; supplied 8x8->16 would need 10 ops of 14 cycles each + overhead.  This way
710;; the inner loop uses maybe 20 cycles + overhead, but terminates
711;; quickly on small args.
712;;
713;; A0/A1 src_a
714;; A2/A3 src_b
715;;
716;;  while (a)
717;;    {
718;;      if (a & 1)
719;;        r += b;
720;;      a >>= 1;
721;;      b <<= 1;
722;;    }
723
724	.section .text
725	.align 2
726
727#ifdef __H8300__
728
729	.global	___mulsi3
730___mulsi3:
731	PUSHP	S0P
732	PUSHP	S1P
733
734	sub.w	S0,S0
735	sub.w	S1,S1
736
737	; while (a)
738_top:	mov.w	A0,A0
739	bne	_more
740	mov.w	A1,A1
741	beq	_done
742_more:	; if (a & 1)
743	bld	#0,A1L
744	bcc	_nobit
745	; r += b
746	add.w	A3,S1
747	addx	A2L,S0L
748	addx	A2H,S0H
749_nobit:
750	; a >>= 1
751	shlr	A0H
752	rotxr	A0L
753	rotxr	A1H
754	rotxr	A1L
755
756	; b <<= 1
757	add.w	A3,A3
758	addx	A2L,A2L
759	addx	A2H,A2H
760	bra 	_top
761
762_done:
763	mov.w	S0,A0
764	mov.w	S1,A1
765	POPP	S1P
766	POPP	S0P
767	rts
768
769#else /* __H8300H__ */
770
771;
772; mulsi3 for H8/300H - based on Renesas SH implementation
773;
774; by Toshiyasu Morita
775;
776; Old code:
777;
778; 16b * 16b = 372 states (worst case)
779; 32b * 32b = 724 states (worst case)
780;
781; New code:
782;
783; 16b * 16b =  48 states
784; 16b * 32b =  72 states
785; 32b * 32b =  92 states
786;
787
788	.global	___mulsi3
789___mulsi3:
790	mov.w	r1,r2   ; ( 2 states) b * d
791	mulxu	r0,er2  ; (22 states)
792
793	mov.w	e0,r3   ; ( 2 states) a * d
794	beq	L_skip1 ; ( 4 states)
795	mulxu	r1,er3  ; (22 states)
796	add.w	r3,e2   ; ( 2 states)
797
798L_skip1:
799	mov.w	e1,r3   ; ( 2 states) c * b
800	beq	L_skip2 ; ( 4 states)
801	mulxu	r0,er3  ; (22 states)
802	add.w	r3,e2   ; ( 2 states)
803
804L_skip2:
805	mov.l	er2,er0	; ( 2 states)
806	rts		; (10 states)
807
808#endif
809#endif /* L_mulsi3 */
810#ifdef L_fixunssfsi_asm
811/* For the h8300 we use asm to save some bytes, to
812   allow more programs to fit into the tiny address
813   space.  For the H8/300H and H8S, the C version is good enough.  */
814#ifdef __H8300__
815/* We still treat NANs different than libgcc2.c, but then, the
816   behavior is undefined anyways.  */
817	.global	___fixunssfsi
818___fixunssfsi:
819	cmp.b #0x4f,r0h
820	bge Large_num
821	jmp     @___fixsfsi
822Large_num:
823	bhi L_huge_num
824	xor.b #0x80,A0L
825	bmi L_shift8
826L_huge_num:
827	mov.w #65535,A0
828	mov.w A0,A1
829	rts
830L_shift8:
831	mov.b A0L,A0H
832	mov.b A1H,A0L
833	mov.b A1L,A1H
834	mov.b #0,A1L
835	rts
836#endif
837#endif /* L_fixunssfsi_asm */
838