1// Copyright (c) 2003-2006 Hewlett-Packard Development Company, L.P.
2// Permission is hereby granted, free of charge, to any person
3// obtaining a copy of this software and associated documentation
4// files (the "Software"), to deal in the Software without
5// restriction, including without limitation the rights to use,
6// copy, modify, merge, publish, distribute, sublicense, and/or sell
7// copies of the Software, and to permit persons to whom the
8// Software is furnished to do so, subject to the following
9// conditions:
10//
11// The above copyright notice and this permission notice shall be
12// included in all copies or substantial portions of the Software.
13//
14// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
16// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
18// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
19// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21// OTHER DEALINGS IN THE SOFTWARE.
22
23#ifdef _LP64
24#define SWIZZLE add
25#define STPTR st8
26#else
27#define SWIZZLE addp4
28#define STPTR st4
29#endif
30
31rRP	= r14
32rPFS	= r15
33rUNAT	= r16
34rRNAT	= r17
35rENV0	= r18
36rENV1	= r19
37rENV2	= r20
38rNSLOT	= r21
39rBSP	= r22
40rPBSP	= r23
41rRSC	= r24
42rNATP	= r25
43rBIAS	= r26
44rRSC0	= r27
45rTMP1	= r28
46rTMP2	= r29
47rTMP3	= r30
48rTMP4	= r31
49rTMP5	= r8
50rMYPFS	= r9
51rPSP	= r10
52
53VALID_IP      = 1
54VALID_SP      = 1 << 1
55VALID_BSP     = 1 << 2
56VALID_CFM     = 1 << 3
57VALID_PREDS   = 1 << 7
58VALID_PRIUNAT = 1 << 8
59VALID_RNAT    = 1 << 10
60VALID_UNAT    = 1 << 11
61VALID_FPSR    = 1 << 12
62VALID_LC      = 1 << 13
63VALID_GRS     = 0xf << 16
64VALID_BRS     = 0x1f << 20
65VALID_BASIC4  = VALID_IP | VALID_SP | VALID_BSP | VALID_CFM
66VALID_SPEC    = VALID_PREDS | VALID_PRIUNAT | VALID_RNAT | VALID_UNAT | VALID_FPSR | VALID_LC
67VALID_REGS    = VALID_BASIC4 | VALID_SPEC | VALID_GRS | VALID_BRS
68VALID_FRS     = 0xfffff
69// valid_regs and valid_frs are separate unsigned int fields.
70// In order to store them with a single st8, we need to know
71// the endianness.
72#ifdef __LITTLE_ENDIAN__
73VALID_BITS   = (VALID_FRS << 32) | VALID_REGS
74#else
75VALID_BITS   = (VALID_REGS << 32) | VALID_FRS
76#endif
77
78	.text
79
80// int uwx_self_init_context(struct uwx_env *env);
81//
82// Stores a snapshot of the caller's context in the uwx_env structure.
83
84	.proc	uwx_self_init_context
85	.global uwx_self_init_context
86uwx_self_init_context:
87	.prologue
88	alloc	rPFS = ar.pfs, 1, 0, 0, 0
89	mov	rUNAT = ar.unat
90	.body
91	SWIZZLE	rENV0 = r0, r32		// rENV0 = &env
92	;;
93	flushrs
94	extr.u	rNSLOT = rPFS, 7, 7 	// nslots = pfs.sol
95	mov	rRP = b0
96	;;
97	mov	rRSC = ar.rsc
98	add	rENV1 = 136, rENV0	// rENV1 = &env->context.gr[0]
99	add	rENV2 = 144, rENV0	// rENV2 = &env->context.gr[1]
100	;;
101	and	rRSC0 = -4, rRSC	// clear ar.rsc.mode
102	adds	rNATP = 0x1f8, r0
103	mov	rTMP1 = b1
104	;;
105	st8.spill [rENV1] = r4, 16	// env+136: r4
106	st8.spill [rENV2] = r5, 16	// env+144: r5
107	mov	rTMP2 = b2
108	;;
109	st8.spill [rENV1] = r6, 16	// env+152: r6
110	st8.spill [rENV2] = r7, 16	// env+160: r7
111	mov	rTMP3 = b3
112	;;
113	st8	[rENV1] = rTMP1, 16	// env+168: b1
114	st8	[rENV2] = rTMP2, 16	// env+176: b2
115	mov	rTMP1 = b4
116	;;
117	st8	[rENV1] = rTMP3, 16	// env+184: b3
118	st8	[rENV2] = rTMP1, 16	// env+192: b4
119	mov	rTMP2 = b5
120	;;
121	st8	[rENV1] = rTMP2		// env+200: b5
122	mov	ar.rsc = rRSC0		// enforced lazy mode
123	add	rENV1 = 8, rENV0
124	;;
125	mov	rRNAT = ar.rnat		// get copy of ar.rnat
126	movl	rTMP1 = VALID_BITS	// valid_regs: ip, sp, bsp, cfm,
127					// preds, priunat, rnat, unat, fpsr,
128					// lc, grs, brs
129					// = 0x1ff3d8f00000000
130	;;
131	mov	ar.rsc = rRSC		// restore ar.rsc
132	mov	rBSP = ar.bsp
133	add	rTMP3 = 136, rENV0	// spill_loc = &env->context.gr[0]
134	;;
135	mov	rTMP2 = ar.unat
136	nop
137	extr.u	rTMP3 = rTMP3, 3, 6	// bitpos = spill_loc{8:3}
138	;;
139	and	rBIAS = rBSP, rNATP	// bias = (bsp & 0x1f8) ...
140	sub	rTMP4 = 64, rTMP3	// (64 - bitpos)
141	shr	rTMP5 = rTMP2, rTMP3	// (unat >> bitpos)
142	;;
143	nop
144	extr.u	rBIAS = rBIAS, 3, 6	//   ... div 8
145	shl	rTMP2 = rTMP2, rTMP4	// (unat << (64 - bitpos))
146	;;
147	or	rTMP2 = rTMP2, rTMP5	// rotate_right(unat, bitpos)
148	nop
149	mov	rTMP4 = pr
150	;;
151	st8	[rENV0] = rTMP1, 16	// env+0: valid_regs mask
152	st8	[rENV1] = rRP, 24	// env+8: ip (my rp)
153	sub	rBIAS = rNSLOT, rBIAS	// bias = nslots - bias
154	;;
155	cmp.lt	p6, p0 = 0, rBIAS	// if (0 < bias) ...
156	cmp.lt	p7, p0 = 63, rBIAS	// if (63 < bias) ...
157	;;
158	st8	[rENV0] = r12, 48	// env+16: sp
159	st8	[rENV1] = rPFS, 40	// env+32: cfm (my pfs)
160(p6)	add	rNSLOT = 1, rNSLOT	//   ... nslots++
161	;;
162	st8	[rENV0] = rTMP4, 24	// env+64: preds
163	st8	[rENV1] = rTMP2, 24	// env+72: priunat
164(p7)	add	rNSLOT = 1, rNSLOT	//   ... nslots++
165	;;
166	st8	[rENV0] = rRNAT, -64	// env+88: ar.rnat
167	st8	[rENV1] = rUNAT, 8	// env+96: ar.unat
168	dep.z	rTMP3 = rNSLOT, 3, 7 	// (nslots << 3)
169	;;
170	sub	rPBSP = rBSP, rTMP3	// prev_bsp = bsp - (nslots << 3)
171	mov	rTMP3 = ar.fpsr
172	mov	rTMP1 = ar.lc
173	;;
174	st8	[rENV0] = rPBSP, 184	// env+24: bsp (my prev bsp)
175	st8	[rENV1] = rTMP3, 8	// env+104: ar.fpsr
176	add	rENV2 = 320, rENV2	// rENV2 = &env->context.rstate
177	;;
178	st8	[rENV1] = rTMP1, 112	// env+112: ar.lc
179	STPTR	[rENV2] = r0		// env+528: env->rstate = 0
180	nop
181	;;
182	// THIS CODE NEEDS TO BE SCHEDULED!!!
183	stf.spill [rENV0] = f2, 32	// env+208: f2
184	stf.spill [rENV1] = f3, 32	// env+224: f3
185	;;
186	stf.spill [rENV0] = f4, 32	// env+240: f4
187	stf.spill [rENV1] = f5, 32	// env+256: f5
188	;;
189	stf.spill [rENV0] = f16, 32	// env+272: f16
190	stf.spill [rENV1] = f17, 32	// env+288: f17
191	;;
192	stf.spill [rENV0] = f18, 32	// env+304: f16
193	stf.spill [rENV1] = f19, 32	// env+320: f17
194	;;
195	stf.spill [rENV0] = f20, 32	// env+336: f16
196	stf.spill [rENV1] = f21, 32	// env+352: f17
197	;;
198	stf.spill [rENV0] = f22, 32	// env+368: f16
199	stf.spill [rENV1] = f23, 32	// env+384: f17
200	;;
201	stf.spill [rENV0] = f24, 32	// env+400: f16
202	stf.spill [rENV1] = f25, 32	// env+416: f17
203	;;
204	stf.spill [rENV0] = f26, 32	// env+432: f16
205	stf.spill [rENV1] = f27, 32	// env+448: f17
206	;;
207	stf.spill [rENV0] = f28, 32	// env+464: f16
208	stf.spill [rENV1] = f29, 32	// env+480: f17
209	;;
210	stf.spill [rENV0] = f30, 32	// env+496: f16
211	stf.spill [rENV1] = f31, 32	// env+512: f17
212	;;
213	mov	ar.unat = rUNAT
214	mov	ret0 = r0		// return UWX_OK
215	br.ret.sptk b0
216	.endp
217
218// uwx_self_install_context(
219//		struct uwx_env *env,
220//		uint64_t r15,
221//		uint64_t r16,
222//		uint64_t r17,
223//		uint64_t r18,
224//		uint64_t ret
225//		);
226//
227// Installs the given context, and sets the landing pad binding
228// registers r15-r18 to the values given.
229// Returns the value "ret" to the new context (for testing --
230// when transferring to a landing pad, the new context won't
231// care about the return value).
232
233	.proc	uwx_self_install_context
234	.global uwx_self_install_context
235uwx_self_install_context:
236	.prologue
237	alloc	rMYPFS = ar.pfs, 6, 0, 0, 0
238	.body
239	SWIZZLE	rENV0 = r0, r32		// rENV0 = &env
240	;;
241
242	// THIS CODE NEEDS TO BE SCHEDULED!!!
243
244	// Restore GR 4-7 and ar.unat
245	add	rENV1 = 136, rENV0	// &env->context.gr[0]
246	add	rENV2 = 72, rENV0	// &env->context.priunat
247	;;
248	ld8	rTMP2 = [rENV2], 24	// env+72: priunat
249	extr.u	rTMP3 = rENV1, 3, 6	// bitpos = spill_loc{8:3}
250	;;
251	ld8	rUNAT = [rENV2], 48	// env+96: ar.unat
252	sub	rTMP4 = 64, rTMP3	// (64 - bitpos)
253	shl	rTMP5 = rTMP2, rTMP3	// (unat << bitpos)
254	;;
255	shr	rTMP2 = rTMP2, rTMP4	// (unat >> (64 - bitpos))
256	;;
257	or	rTMP2 = rTMP2, rTMP5	// rotate_left(unat, bitpos)
258	;;
259	mov	ar.unat = rTMP2		// put priunat in place
260	;;
261	ld8.fill r4 = [rENV1], 16	// env+136: r4
262	ld8.fill r5 = [rENV2], 16	// env+144: r5
263	;;
264	ld8.fill r6 = [rENV1], 16	// env+152: r6
265	ld8.fill r7 = [rENV2], 16	// env+160: r7
266	;;
267	mov	ar.unat = rUNAT		// restore real ar.unat
268
269	// Restore BR 1-5
270	ld8	rTMP1 = [rENV1], 16	// env+168: b1
271	ld8	rTMP2 = [rENV2], 16	// env+176: b2
272	;;
273	ld8	rTMP3 = [rENV1], 16	// env+184: b3
274	ld8	rTMP4 = [rENV2], -168	// env+192: b4
275	mov	b1 = rTMP1
276	;;
277	ld8	rTMP1 = [rENV1], -168	// env+200: b5
278	mov	b2 = rTMP2
279	mov	b3 = rTMP3
280	mov	b4 = rTMP4
281	;;
282	mov	b5 = rTMP1
283
284	// Restore ar.bsp, ar.pfs, and ar.rnat
285	ld8	rPFS = [rENV1], 56	// env+32: cfm (+saved ar.ec)
286	mov	rRSC = ar.rsc
287	adds	rBIAS = 0x1f8, r0
288	;;
289	flushrs
290	ld8	rRNAT = [rENV1], -24	// env+88: ar.rnat
291	ld8	rPBSP = [rENV2], 88	// env+24: prev_bsp
292	and	rRSC0 = -4, rRSC	// clear ar.rsc.mode
293	;;
294	mov	ar.rsc = rRSC0		// enforced lazy mode
295	extr.u	rNSLOT = rPFS, 7, 7 	// nslots = pfs.sol
296	;;
297	invala
298	and	rBIAS = rPBSP, rBIAS	// bias = prev_bsp & 0x1f8 ...
299	;;
300	extr.u	rBIAS = rBIAS, 3, 6	// ... div 8
301	;;
302	add	rBIAS = rNSLOT, rBIAS	// bias += nslots
303	;;
304	cmp.lt	p6, p0 = 63, rBIAS	// if (63 < bias) ...
305	cmp.lt	p7, p0 = 126, rBIAS	// if (126 < bias) ...
306	;;
307(p6)	add	rNSLOT = 1, rNSLOT	//   ... nslots++
308	;;
309(p7)	add	rNSLOT = 1, rNSLOT	//   ... nslots++
310	;;
311	dep.z	rTMP3 = rNSLOT, 3, 7 	// (nslots << 3)
312	;;
313	add	rBSP = rPBSP, rTMP3	// bsp = prev_bsp + (nslots << 3)
314	;;
315	mov	ar.bspstore = rBSP	// restore ar.bsp
316	;;
317	mov	ar.rnat = rRNAT		// restore ar.rnat
318	mov	ar.pfs = rPFS		// restore ar.pfs
319	;;
320	mov	ar.rsc = rRSC		// restore ar.rsc
321
322	// Restore preds and ar.lc
323	ld8	rTMP1 = [rENV1], -56	// env+64: preds
324	ld8	rTMP2 = [rENV2], -96	// env+112: ar.lc
325	;;
326	mov	pr = rTMP1
327	mov	ar.lc = rTMP2
328
329	// Get previous sp and ip
330	ld8	rRP = [rENV1], 96	// env+8: ip (my rp)
331	ld8	rPSP = [rENV2], 112	// env+16: sp
332	;;
333
334	// Restore ar.fpsr and gp
335	ld8	rTMP1 = [rENV1], 104	// env+104: ar.fpsr
336	ld8	r1 = [rENV2], 96	// env+128: gp
337	;;
338	mov	ar.fpsr = rTMP1		// restore ar.fpsr
339
340	// Restore FR 2-5 and 16-31
341	ldf.fill f2 = [rENV1], 32	// env+208: f2
342	ldf.fill f3 = [rENV2], 32	// env+224: f3
343	;;
344	ldf.fill f4 = [rENV1], 32	// env+240: f4
345	ldf.fill f5 = [rENV2], 32	// env+256: f5
346	;;
347	ldf.fill f16 = [rENV1], 32	// env+272: f16
348	ldf.fill f17 = [rENV2], 32	// env+288: f17
349	;;
350	ldf.fill f18 = [rENV1], 32	// env+304: f16
351	ldf.fill f19 = [rENV2], 32	// env+320: f17
352	;;
353	ldf.fill f20 = [rENV1], 32	// env+336: f16
354	ldf.fill f21 = [rENV2], 32	// env+352: f17
355	;;
356	ldf.fill f22 = [rENV1], 32	// env+368: f16
357	ldf.fill f23 = [rENV2], 32	// env+384: f17
358	;;
359	ldf.fill f24 = [rENV1], 32	// env+400: f16
360	ldf.fill f25 = [rENV2], 32	// env+416: f17
361	;;
362	ldf.fill f26 = [rENV1], 32	// env+432: f16
363	ldf.fill f27 = [rENV2], 32	// env+448: f17
364	;;
365	ldf.fill f28 = [rENV1], 32	// env+464: f16
366	ldf.fill f29 = [rENV2], 32	// env+480: f17
367	;;
368	ldf.fill f30 = [rENV1], 32	// env+496: f16
369	ldf.fill f31 = [rENV2], 32	// env+512: f17
370
371	// Set landing pad parameter registers
372	mov	r15 = r33
373	mov	r16 = r34
374	mov	r17 = r35
375	mov	r18 = r36
376
377	// Restore previous sp and Return
378	mov	ret0 = r37
379	mov	sp = rPSP
380	mov	b0 = rRP
381	br.ret.sptk b0
382
383	.endp
384