1// Copyright (c) 2003-2006 Hewlett-Packard Development Company, L.P. 2// Permission is hereby granted, free of charge, to any person 3// obtaining a copy of this software and associated documentation 4// files (the "Software"), to deal in the Software without 5// restriction, including without limitation the rights to use, 6// copy, modify, merge, publish, distribute, sublicense, and/or sell 7// copies of the Software, and to permit persons to whom the 8// Software is furnished to do so, subject to the following 9// conditions: 10// 11// The above copyright notice and this permission notice shall be 12// included in all copies or substantial portions of the Software. 13// 14// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 16// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 18// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 19// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 21// OTHER DEALINGS IN THE SOFTWARE. 22 23#ifdef _LP64 24#define SWIZZLE add 25#define STPTR st8 26#else 27#define SWIZZLE addp4 28#define STPTR st4 29#endif 30 31rRP = r14 32rPFS = r15 33rUNAT = r16 34rRNAT = r17 35rENV0 = r18 36rENV1 = r19 37rENV2 = r20 38rNSLOT = r21 39rBSP = r22 40rPBSP = r23 41rRSC = r24 42rNATP = r25 43rBIAS = r26 44rRSC0 = r27 45rTMP1 = r28 46rTMP2 = r29 47rTMP3 = r30 48rTMP4 = r31 49rTMP5 = r8 50rMYPFS = r9 51rPSP = r10 52 53VALID_IP = 1 54VALID_SP = 1 << 1 55VALID_BSP = 1 << 2 56VALID_CFM = 1 << 3 57VALID_PREDS = 1 << 7 58VALID_PRIUNAT = 1 << 8 59VALID_RNAT = 1 << 10 60VALID_UNAT = 1 << 11 61VALID_FPSR = 1 << 12 62VALID_LC = 1 << 13 63VALID_GRS = 0xf << 16 64VALID_BRS = 0x1f << 20 65VALID_BASIC4 = VALID_IP | VALID_SP | VALID_BSP | VALID_CFM 66VALID_SPEC = VALID_PREDS | VALID_PRIUNAT | VALID_RNAT | VALID_UNAT | VALID_FPSR | VALID_LC 67VALID_REGS = VALID_BASIC4 | VALID_SPEC | VALID_GRS | VALID_BRS 68VALID_FRS = 0xfffff 69// valid_regs and valid_frs are separate unsigned int fields. 70// In order to store them with a single st8, we need to know 71// the endianness. 72#ifdef __LITTLE_ENDIAN__ 73VALID_BITS = (VALID_FRS << 32) | VALID_REGS 74#else 75VALID_BITS = (VALID_REGS << 32) | VALID_FRS 76#endif 77 78 .text 79 80// int uwx_self_init_context(struct uwx_env *env); 81// 82// Stores a snapshot of the caller's context in the uwx_env structure. 83 84 .proc uwx_self_init_context 85 .global uwx_self_init_context 86uwx_self_init_context: 87 .prologue 88 alloc rPFS = ar.pfs, 1, 0, 0, 0 89 mov rUNAT = ar.unat 90 .body 91 SWIZZLE rENV0 = r0, r32 // rENV0 = &env 92 ;; 93 flushrs 94 extr.u rNSLOT = rPFS, 7, 7 // nslots = pfs.sol 95 mov rRP = b0 96 ;; 97 mov rRSC = ar.rsc 98 add rENV1 = 136, rENV0 // rENV1 = &env->context.gr[0] 99 add rENV2 = 144, rENV0 // rENV2 = &env->context.gr[1] 100 ;; 101 and rRSC0 = -4, rRSC // clear ar.rsc.mode 102 adds rNATP = 0x1f8, r0 103 mov rTMP1 = b1 104 ;; 105 st8.spill [rENV1] = r4, 16 // env+136: r4 106 st8.spill [rENV2] = r5, 16 // env+144: r5 107 mov rTMP2 = b2 108 ;; 109 st8.spill [rENV1] = r6, 16 // env+152: r6 110 st8.spill [rENV2] = r7, 16 // env+160: r7 111 mov rTMP3 = b3 112 ;; 113 st8 [rENV1] = rTMP1, 16 // env+168: b1 114 st8 [rENV2] = rTMP2, 16 // env+176: b2 115 mov rTMP1 = b4 116 ;; 117 st8 [rENV1] = rTMP3, 16 // env+184: b3 118 st8 [rENV2] = rTMP1, 16 // env+192: b4 119 mov rTMP2 = b5 120 ;; 121 st8 [rENV1] = rTMP2 // env+200: b5 122 mov ar.rsc = rRSC0 // enforced lazy mode 123 add rENV1 = 8, rENV0 124 ;; 125 mov rRNAT = ar.rnat // get copy of ar.rnat 126 movl rTMP1 = VALID_BITS // valid_regs: ip, sp, bsp, cfm, 127 // preds, priunat, rnat, unat, fpsr, 128 // lc, grs, brs 129 // = 0x1ff3d8f00000000 130 ;; 131 mov ar.rsc = rRSC // restore ar.rsc 132 mov rBSP = ar.bsp 133 add rTMP3 = 136, rENV0 // spill_loc = &env->context.gr[0] 134 ;; 135 mov rTMP2 = ar.unat 136 nop 137 extr.u rTMP3 = rTMP3, 3, 6 // bitpos = spill_loc{8:3} 138 ;; 139 and rBIAS = rBSP, rNATP // bias = (bsp & 0x1f8) ... 140 sub rTMP4 = 64, rTMP3 // (64 - bitpos) 141 shr rTMP5 = rTMP2, rTMP3 // (unat >> bitpos) 142 ;; 143 nop 144 extr.u rBIAS = rBIAS, 3, 6 // ... div 8 145 shl rTMP2 = rTMP2, rTMP4 // (unat << (64 - bitpos)) 146 ;; 147 or rTMP2 = rTMP2, rTMP5 // rotate_right(unat, bitpos) 148 nop 149 mov rTMP4 = pr 150 ;; 151 st8 [rENV0] = rTMP1, 16 // env+0: valid_regs mask 152 st8 [rENV1] = rRP, 24 // env+8: ip (my rp) 153 sub rBIAS = rNSLOT, rBIAS // bias = nslots - bias 154 ;; 155 cmp.lt p6, p0 = 0, rBIAS // if (0 < bias) ... 156 cmp.lt p7, p0 = 63, rBIAS // if (63 < bias) ... 157 ;; 158 st8 [rENV0] = r12, 48 // env+16: sp 159 st8 [rENV1] = rPFS, 40 // env+32: cfm (my pfs) 160(p6) add rNSLOT = 1, rNSLOT // ... nslots++ 161 ;; 162 st8 [rENV0] = rTMP4, 24 // env+64: preds 163 st8 [rENV1] = rTMP2, 24 // env+72: priunat 164(p7) add rNSLOT = 1, rNSLOT // ... nslots++ 165 ;; 166 st8 [rENV0] = rRNAT, -64 // env+88: ar.rnat 167 st8 [rENV1] = rUNAT, 8 // env+96: ar.unat 168 dep.z rTMP3 = rNSLOT, 3, 7 // (nslots << 3) 169 ;; 170 sub rPBSP = rBSP, rTMP3 // prev_bsp = bsp - (nslots << 3) 171 mov rTMP3 = ar.fpsr 172 mov rTMP1 = ar.lc 173 ;; 174 st8 [rENV0] = rPBSP, 184 // env+24: bsp (my prev bsp) 175 st8 [rENV1] = rTMP3, 8 // env+104: ar.fpsr 176 add rENV2 = 320, rENV2 // rENV2 = &env->context.rstate 177 ;; 178 st8 [rENV1] = rTMP1, 112 // env+112: ar.lc 179 STPTR [rENV2] = r0 // env+528: env->rstate = 0 180 nop 181 ;; 182 // THIS CODE NEEDS TO BE SCHEDULED!!! 183 stf.spill [rENV0] = f2, 32 // env+208: f2 184 stf.spill [rENV1] = f3, 32 // env+224: f3 185 ;; 186 stf.spill [rENV0] = f4, 32 // env+240: f4 187 stf.spill [rENV1] = f5, 32 // env+256: f5 188 ;; 189 stf.spill [rENV0] = f16, 32 // env+272: f16 190 stf.spill [rENV1] = f17, 32 // env+288: f17 191 ;; 192 stf.spill [rENV0] = f18, 32 // env+304: f16 193 stf.spill [rENV1] = f19, 32 // env+320: f17 194 ;; 195 stf.spill [rENV0] = f20, 32 // env+336: f16 196 stf.spill [rENV1] = f21, 32 // env+352: f17 197 ;; 198 stf.spill [rENV0] = f22, 32 // env+368: f16 199 stf.spill [rENV1] = f23, 32 // env+384: f17 200 ;; 201 stf.spill [rENV0] = f24, 32 // env+400: f16 202 stf.spill [rENV1] = f25, 32 // env+416: f17 203 ;; 204 stf.spill [rENV0] = f26, 32 // env+432: f16 205 stf.spill [rENV1] = f27, 32 // env+448: f17 206 ;; 207 stf.spill [rENV0] = f28, 32 // env+464: f16 208 stf.spill [rENV1] = f29, 32 // env+480: f17 209 ;; 210 stf.spill [rENV0] = f30, 32 // env+496: f16 211 stf.spill [rENV1] = f31, 32 // env+512: f17 212 ;; 213 mov ar.unat = rUNAT 214 mov ret0 = r0 // return UWX_OK 215 br.ret.sptk b0 216 .endp 217 218// uwx_self_install_context( 219// struct uwx_env *env, 220// uint64_t r15, 221// uint64_t r16, 222// uint64_t r17, 223// uint64_t r18, 224// uint64_t ret 225// ); 226// 227// Installs the given context, and sets the landing pad binding 228// registers r15-r18 to the values given. 229// Returns the value "ret" to the new context (for testing -- 230// when transferring to a landing pad, the new context won't 231// care about the return value). 232 233 .proc uwx_self_install_context 234 .global uwx_self_install_context 235uwx_self_install_context: 236 .prologue 237 alloc rMYPFS = ar.pfs, 6, 0, 0, 0 238 .body 239 SWIZZLE rENV0 = r0, r32 // rENV0 = &env 240 ;; 241 242 // THIS CODE NEEDS TO BE SCHEDULED!!! 243 244 // Restore GR 4-7 and ar.unat 245 add rENV1 = 136, rENV0 // &env->context.gr[0] 246 add rENV2 = 72, rENV0 // &env->context.priunat 247 ;; 248 ld8 rTMP2 = [rENV2], 24 // env+72: priunat 249 extr.u rTMP3 = rENV1, 3, 6 // bitpos = spill_loc{8:3} 250 ;; 251 ld8 rUNAT = [rENV2], 48 // env+96: ar.unat 252 sub rTMP4 = 64, rTMP3 // (64 - bitpos) 253 shl rTMP5 = rTMP2, rTMP3 // (unat << bitpos) 254 ;; 255 shr rTMP2 = rTMP2, rTMP4 // (unat >> (64 - bitpos)) 256 ;; 257 or rTMP2 = rTMP2, rTMP5 // rotate_left(unat, bitpos) 258 ;; 259 mov ar.unat = rTMP2 // put priunat in place 260 ;; 261 ld8.fill r4 = [rENV1], 16 // env+136: r4 262 ld8.fill r5 = [rENV2], 16 // env+144: r5 263 ;; 264 ld8.fill r6 = [rENV1], 16 // env+152: r6 265 ld8.fill r7 = [rENV2], 16 // env+160: r7 266 ;; 267 mov ar.unat = rUNAT // restore real ar.unat 268 269 // Restore BR 1-5 270 ld8 rTMP1 = [rENV1], 16 // env+168: b1 271 ld8 rTMP2 = [rENV2], 16 // env+176: b2 272 ;; 273 ld8 rTMP3 = [rENV1], 16 // env+184: b3 274 ld8 rTMP4 = [rENV2], -168 // env+192: b4 275 mov b1 = rTMP1 276 ;; 277 ld8 rTMP1 = [rENV1], -168 // env+200: b5 278 mov b2 = rTMP2 279 mov b3 = rTMP3 280 mov b4 = rTMP4 281 ;; 282 mov b5 = rTMP1 283 284 // Restore ar.bsp, ar.pfs, and ar.rnat 285 ld8 rPFS = [rENV1], 56 // env+32: cfm (+saved ar.ec) 286 mov rRSC = ar.rsc 287 adds rBIAS = 0x1f8, r0 288 ;; 289 flushrs 290 ld8 rRNAT = [rENV1], -24 // env+88: ar.rnat 291 ld8 rPBSP = [rENV2], 88 // env+24: prev_bsp 292 and rRSC0 = -4, rRSC // clear ar.rsc.mode 293 ;; 294 mov ar.rsc = rRSC0 // enforced lazy mode 295 extr.u rNSLOT = rPFS, 7, 7 // nslots = pfs.sol 296 ;; 297 invala 298 and rBIAS = rPBSP, rBIAS // bias = prev_bsp & 0x1f8 ... 299 ;; 300 extr.u rBIAS = rBIAS, 3, 6 // ... div 8 301 ;; 302 add rBIAS = rNSLOT, rBIAS // bias += nslots 303 ;; 304 cmp.lt p6, p0 = 63, rBIAS // if (63 < bias) ... 305 cmp.lt p7, p0 = 126, rBIAS // if (126 < bias) ... 306 ;; 307(p6) add rNSLOT = 1, rNSLOT // ... nslots++ 308 ;; 309(p7) add rNSLOT = 1, rNSLOT // ... nslots++ 310 ;; 311 dep.z rTMP3 = rNSLOT, 3, 7 // (nslots << 3) 312 ;; 313 add rBSP = rPBSP, rTMP3 // bsp = prev_bsp + (nslots << 3) 314 ;; 315 mov ar.bspstore = rBSP // restore ar.bsp 316 ;; 317 mov ar.rnat = rRNAT // restore ar.rnat 318 mov ar.pfs = rPFS // restore ar.pfs 319 ;; 320 mov ar.rsc = rRSC // restore ar.rsc 321 322 // Restore preds and ar.lc 323 ld8 rTMP1 = [rENV1], -56 // env+64: preds 324 ld8 rTMP2 = [rENV2], -96 // env+112: ar.lc 325 ;; 326 mov pr = rTMP1 327 mov ar.lc = rTMP2 328 329 // Get previous sp and ip 330 ld8 rRP = [rENV1], 96 // env+8: ip (my rp) 331 ld8 rPSP = [rENV2], 112 // env+16: sp 332 ;; 333 334 // Restore ar.fpsr and gp 335 ld8 rTMP1 = [rENV1], 104 // env+104: ar.fpsr 336 ld8 r1 = [rENV2], 96 // env+128: gp 337 ;; 338 mov ar.fpsr = rTMP1 // restore ar.fpsr 339 340 // Restore FR 2-5 and 16-31 341 ldf.fill f2 = [rENV1], 32 // env+208: f2 342 ldf.fill f3 = [rENV2], 32 // env+224: f3 343 ;; 344 ldf.fill f4 = [rENV1], 32 // env+240: f4 345 ldf.fill f5 = [rENV2], 32 // env+256: f5 346 ;; 347 ldf.fill f16 = [rENV1], 32 // env+272: f16 348 ldf.fill f17 = [rENV2], 32 // env+288: f17 349 ;; 350 ldf.fill f18 = [rENV1], 32 // env+304: f16 351 ldf.fill f19 = [rENV2], 32 // env+320: f17 352 ;; 353 ldf.fill f20 = [rENV1], 32 // env+336: f16 354 ldf.fill f21 = [rENV2], 32 // env+352: f17 355 ;; 356 ldf.fill f22 = [rENV1], 32 // env+368: f16 357 ldf.fill f23 = [rENV2], 32 // env+384: f17 358 ;; 359 ldf.fill f24 = [rENV1], 32 // env+400: f16 360 ldf.fill f25 = [rENV2], 32 // env+416: f17 361 ;; 362 ldf.fill f26 = [rENV1], 32 // env+432: f16 363 ldf.fill f27 = [rENV2], 32 // env+448: f17 364 ;; 365 ldf.fill f28 = [rENV1], 32 // env+464: f16 366 ldf.fill f29 = [rENV2], 32 // env+480: f17 367 ;; 368 ldf.fill f30 = [rENV1], 32 // env+496: f16 369 ldf.fill f31 = [rENV2], 32 // env+512: f17 370 371 // Set landing pad parameter registers 372 mov r15 = r33 373 mov r16 = r34 374 mov r17 = r35 375 mov r18 = r36 376 377 // Restore previous sp and Return 378 mov ret0 = r37 379 mov sp = rPSP 380 mov b0 = rRP 381 br.ret.sptk b0 382 383 .endp 384