1//  z_AIX_asm.S:  - microtasking routines specifically
2//                  written for Power platforms running AIX OS
3
4//
5////===----------------------------------------------------------------------===//
6////
7//// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8//// See https://llvm.org/LICENSE.txt for license information.
9//// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10////
11////===----------------------------------------------------------------------===//
12//
13
14// -----------------------------------------------------------------------
15// macros
16// -----------------------------------------------------------------------
17
18#include "kmp_config.h"
19
20#if KMP_OS_AIX
21//------------------------------------------------------------------------
22// int
23// __kmp_invoke_microtask( void (*pkfn) (int *gtid, int *tid, ...),
24//                         int gtid, int tid,
25//                         int argc, void *p_argv[]
26// #if OMPT_SUPPORT
27//                         ,
28//                         void **exit_frame_ptr
29// #endif
30//                       ) {
31// #if OMPT_SUPPORT
32//   *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0);
33// #endif
34//
35//   (*pkfn)( & gtid, & tid, p_argv[0], ... );
36//
37// // FIXME: This is done at call-site and can be removed here.
38// #if OMPT_SUPPORT
39//   *exit_frame_ptr = 0;
40// #endif
41//
42//   return 1;
43// }
44//
45// parameters:
46//   r3: pkfn
47//   r4: gtid
48//   r5: tid
49//   r6: argc
50//   r7: p_argv
51//   r8: &exit_frame
52//
53// return:  r3 (always 1/TRUE)
54//
55
56#if KMP_ARCH_PPC64_XCOFF
57
58    .globl  __kmp_invoke_microtask[DS]
59    .globl  .__kmp_invoke_microtask
60    .align  4
61    .csect __kmp_invoke_microtask[DS],3
62    .vbyte  8, .__kmp_invoke_microtask
63    .vbyte  8, TOC[TC0]
64    .vbyte  8, 0
65    .csect .text[PR],2
66    .machine "pwr7"
67.__kmp_invoke_microtask:
68
69
70// -- Begin __kmp_invoke_microtask
71// mark_begin;
72
73// We need to allocate a stack frame large enough to hold all of the parameters
74// on the stack for the microtask plus what this function needs. That's 48
75// bytes under the XCOFF64 ABI, plus max(64, 8*(2 + argc)) for
76// the parameters to the microtask (gtid, tid, argc elements of p_argv),
77// plus 8 bytes to store the values of r4 and r5, and 8 bytes to store r31.
78// With OMP-T support, we need an additional 8 bytes to save r30 to hold
79// a copy of r8.
80// Stack offsets relative to stack pointer:
81//   r31: -8, r30: -16, gtid: -20, tid: -24
82
83    mflr 0
84    std 31, -8(1)      # Save r31 to the stack
85    std 0, 16(1)       # Save LR to the linkage area
86
87// This is unusual because normally we'd set r31 equal to r1 after the stack
88// frame is established. In this case, however, we need to dynamically compute
89// the stack frame size, and so we keep a direct copy of r1 to access our
90// register save areas and restore the r1 value before returning.
91    mr 31, 1
92
93// Compute the size of the "argc" portion of the parameter save area.
94// The parameter save area is always at least 64 bytes long (i.e. 8 regs)
95// The microtask has (2 + argc) parameters, so if argc <= 6, we need to
96// to allocate 8*6 bytes, not 8*argc.
97    li 0, 6
98    cmpwi 0, 6, 6
99    iselgt 0, 6, 0     # r0 = (argc > 6)? argc : 6
100    sldi 0, 0, 3       # r0 = 8 * max(argc, 6)
101
102// Compute the size necessary for the local stack frame.
103// 88 = 48 + 4 (for r4) + 4 (for r5) + 8 (for r31) + 8 (for OMP-T r30) +
104//      8 (parameter gtid) + 8 (parameter tid)
105    li 12, 88
106    add 12, 0, 12
107    neg 12, 12
108
109// We need to make sure that the stack frame stays aligned (to 16 bytes).
110    li 0, -16
111    and 12, 0, 12
112
113// Establish the local stack frame.
114    stdux 1, 1, 12
115
116#if OMPT_SUPPORT
117    std 30, -16(31)    # Save r30 to the stack
118    std 1, 0(8)
119    mr 30, 8
120#endif
121
122// Store gtid and tid to the stack because they're passed by reference to the microtask.
123    stw 4, -20(31)     # Save gtid to the stack
124    stw 5, -24(31)     # Save tid to the stack
125
126    mr 12, 6           # r12 = argc
127    mr 4, 7            # r4 = p_argv
128
129    cmpwi 0, 12, 1
130    blt 0, .Lcall      # if (argc < 1) goto .Lcall
131
132    ld 5, 0(4)         # r5 = p_argv[0]
133
134    cmpwi 0, 12, 2
135    blt 0, .Lcall      # if (argc < 2) goto .Lcall
136
137    ld 6, 8(4)         # r6 = p_argv[1]
138
139    cmpwi 0, 12, 3
140    blt 0, .Lcall      # if (argc < 3) goto .Lcall
141
142    ld 7, 16(4)        # r7 = p_argv[2]
143
144    cmpwi 0, 12, 4
145    blt 0, .Lcall      # if (argc < 4) goto .Lcall
146
147    ld 8, 24(4)        # r8 = p_argv[3]
148
149    cmpwi 0, 12, 5
150    blt 0, .Lcall      # if (argc < 5) goto .Lcall
151
152    ld 9, 32(4)        # r9 = p_argv[4]
153
154    cmpwi 0, 12, 6
155    blt 0, .Lcall      # if (argc < 6) goto .Lcall
156
157    ld 10, 40(4)       # r10 = p_argv[5]
158
159    cmpwi 0, 12, 7
160    blt 0, .Lcall      # if (argc < 7) goto .Lcall
161
162// There are more than 6 microtask parameters, so we need to store the
163// remainder to the stack.
164    addi 12, 12, -6    # argc -= 6
165    mtctr 12
166
167// These are set to 8 bytes before the first desired store address (we're using
168// pre-increment loads and stores in the loop below). The parameter save area
169// for the microtask begins 48 + 8*8 == 112 bytes above r1 for XCOFF64.
170    addi 4, 4, 40      # p_argv = p_argv + 5
171                       # (i.e. skip the 5 elements we already processed)
172    addi 12, 1, 104    # r12 = stack offset (112 - 8)
173
174.Lnext:
175    ldu 0, 8(4)
176    stdu 0, 8(12)
177    bdnz .Lnext
178
179.Lcall:
180    std 2, 40(1)     # Save the TOC pointer to the linkage area
181// Load the actual function address from the function descriptor.
182    ld 12, 0(3)      # Function address
183    ld 2, 8(3)       # TOC pointer
184    ld 11, 16(3)     # Environment pointer
185
186    addi 3, 31, -20  # r3 = &gtid
187    addi 4, 31, -24  # r4 = &tid
188
189    mtctr 12         # CTR = function address
190    bctrl            # Branch to CTR
191    ld 2, 40(1)      # Restore TOC pointer from linkage area
192
193#if OMPT_SUPPORT
194    li 3, 0
195    std 3, 0(30)
196#endif
197
198    li 3, 1
199
200#if OMPT_SUPPORT
201    ld 30, -16(31)   # Restore r30 from the saved value on the stack
202#endif
203
204    mr 1, 31
205    ld 31, -8(1)     # Restore r31 from the saved value on the stack
206    ld 0, 16(1)
207    mtlr 0           # Restore LR from the linkage area
208    blr              # Branch to LR
209
210#else  // KMP_ARCH_PPC_XCOFF
211
212    .globl  __kmp_invoke_microtask[DS]
213    .globl  .__kmp_invoke_microtask
214    .align  4
215    .csect __kmp_invoke_microtask[DS],2
216    .vbyte  4, .__kmp_invoke_microtask
217    .vbyte  4, TOC[TC0]
218    .vbyte  4, 0
219    .csect .text[PR],2
220    .machine "pwr7"
221.__kmp_invoke_microtask:
222
223
224// -- Begin __kmp_invoke_microtask
225// mark_begin;
226
227// We need to allocate a stack frame large enough to hold all of the parameters
228// on the stack for the microtask plus what this function needs. That's 24
229// bytes under the XCOFF ABI, plus max(32, 8*(2 + argc)) for
230// the parameters to the microtask (gtid, tid, argc elements of p_argv),
231// plus 8 bytes to store the values of r4 and r5, and 4 bytes to store r31.
232// With OMP-T support, we need an additional 4 bytes to save r30 to hold
233// a copy of r8.
234// Stack offsets relative to stack pointer:
235//   r31: -4, r30: -8, gtid: -12, tid: -16
236
237    mflr 0
238    stw 31, -4(1)      # Save r31 to the stack
239    stw 0, 8(1)        # Save LR to the linkage area
240
241// This is unusual because normally we'd set r31 equal to r1 after the stack
242// frame is established. In this case, however, we need to dynamically compute
243// the stack frame size, and so we keep a direct copy of r1 to access our
244// register save areas and restore the r1 value before returning.
245    mr 31, 1
246
247// Compute the size of the "argc" portion of the parameter save area.
248// The parameter save area is always at least 32 bytes long (i.e. 8 regs)
249// The microtask has (2 + argc) parameters, so if argc <= 6, we need to
250// to allocate 4*6 bytes, not 4*argc.
251    li 0, 6
252    cmpwi 0, 6, 6
253    iselgt 0, 6, 0     # r0 = (argc > 6)? argc : 6
254    slwi 0, 0, 2       # r0 = 4 * max(argc, 6)
255
256// Compute the size necessary for the local stack frame.
257// 56 = 32 + 4 (for r4) + 4 (for r5) + 4 (for r31) + 4 (for OMP-T r30) +
258//      4 (parameter gtid) + 4 (parameter tid)
259    li 12, 56
260    add 12, 0, 12
261    neg 12, 12
262
263// We need to make sure that the stack frame stays aligned (to 16 bytes).
264    li 0, -16
265    and 12, 0, 12
266
267// Establish the local stack frame.
268    stwux 1, 1, 12
269
270#if OMPT_SUPPORT
271    stw 30, -8(31)     # Save r30 to the stack
272    stw 1, 0(8)
273    mr 30, 8
274#endif
275
276// Store gtid and tid to the stack because they're passed by reference to the microtask.
277    stw 4, -12(31)     # Save gtid to the stack
278    stw 5, -16(31)     # Save tid to the stack
279
280    mr 12, 6           # r12 = argc
281    mr 4, 7            # r4 = p_argv
282
283    cmpwi 0, 12, 1
284    blt 0, .Lcall      # if (argc < 1) goto .Lcall
285
286    lwz 5, 0(4)        # r5 = p_argv[0]
287
288    cmpwi 0, 12, 2
289    blt 0, .Lcall      # if (argc < 2) goto .Lcall
290
291    lwz 6, 4(4)        # r6 = p_argv[1]
292
293    cmpwi 0, 12, 3
294    blt 0, .Lcall      # if (argc < 3) goto .Lcall
295
296    lwz 7, 8(4)        # r7 = p_argv[2]
297
298    cmpwi 0, 12, 4
299    blt 0, .Lcall      # if (argc < 4) goto .Lcall
300
301    lwz 8, 12(4)       # r8 = p_argv[3]
302
303    cmpwi 0, 12, 5
304    blt 0, .Lcall      # if (argc < 5) goto .Lcall
305
306    lwz 9, 16(4)       # r9 = p_argv[4]
307
308    cmpwi 0, 12, 6
309    blt 0, .Lcall      # if (argc < 6) goto .Lcall
310
311    lwz 10, 20(4)      # r10 = p_argv[5]
312
313    cmpwi 0, 12, 7
314    blt 0, .Lcall      # if (argc < 7) goto .Lcall
315
316// There are more than 6 microtask parameters, so we need to store the
317// remainder to the stack.
318    addi 12, 12, -6    # argc -= 6
319    mtctr 12
320
321// These are set to 4 bytes before the first desired store address (we're using
322// pre-increment loads and stores in the loop below). The parameter save area
323// for the microtask begins 24 + 4*8 == 56 bytes above r1 for XCOFF.
324    addi 4, 4, 20      # p_argv = p_argv + 5
325                       # (i.e. skip the 5 elements we already processed)
326    addi 12, 1, 52     # r12 = stack offset (56 - 4)
327
328.Lnext:
329    lwzu 0, 4(4)
330    stwu 0, 4(12)
331    bdnz .Lnext
332
333.Lcall:
334    stw 2, 20(1)     # Save the TOC pointer to the linkage area
335// Load the actual function address from the function descriptor.
336    lwz 12, 0(3)     # Function address
337    lwz 2, 4(3)      # TOC pointer
338    lwz 11, 8(3)     # Environment pointer
339
340    addi 3, 31, -12  # r3 = &gtid
341    addi 4, 31, -16  # r4 = &tid
342
343    mtctr 12         # CTR = function address
344    bctrl            # Branch to CTR
345    lwz 2, 20(1)     # Restore TOC pointer from linkage area
346
347#if OMPT_SUPPORT
348    li 3, 0
349    stw 3, 0(30)
350#endif
351
352    li 3, 1
353
354#if OMPT_SUPPORT
355    lwz 30, -8(31)   # Restore r30 from the saved value on the stack
356#endif
357
358    mr 1, 31
359    lwz 31, -4(1)    # Restore r31 from the saved value on the stack
360    lwz 0, 8(1)
361    mtlr 0           # Restore LR from the linkage area
362    blr              # Branch to LR
363
364#endif // KMP_ARCH_PPC64_XCOFF
365
366.Lfunc_end0:
367    .vbyte  4, 0x00000000           # Traceback table begin
368    .byte   0x00                    # Version = 0
369    .byte   0x09                    # Language = CPlusPlus
370    .byte   0x20                    # -IsGlobaLinkage, -IsOutOfLineEpilogOrPrologue
371                                    # +HasTraceBackTableOffset, -IsInternalProcedure
372                                    # -HasControlledStorage, -IsTOCless
373                                    # -IsFloatingPointPresent
374                                    # -IsFloatingPointOperationLogOrAbortEnabled
375    .byte   0x61                    # -IsInterruptHandler, +IsFunctionNamePresent, +IsAllocaUsed
376                                    # OnConditionDirective = 0, -IsCRSaved, +IsLRSaved
377    .byte   0x80                    # +IsBackChainStored, -IsFixup, NumOfFPRsSaved = 0
378#if OMPT_SUPPORT
379    .byte   0x02                    # -HasExtensionTable, -HasVectorInfo, NumOfGPRsSaved = 2
380    .byte   0x06                    # NumberOfFixedParms = 6
381#else
382    .byte   0x01                    # -HasExtensionTable, -HasVectorInfo, NumOfGPRsSaved = 1
383    .byte   0x05                    # NumberOfFixedParms = 5
384#endif
385    .byte   0x01                    # NumberOfFPParms = 0, +HasParmsOnStack
386    .vbyte  4, 0x00000000           # Parameter type = i, i, i, i, i
387    .vbyte  4, .Lfunc_end0-.__kmp_invoke_microtask # Function size
388    .vbyte  2, 0x0016               # Function name len = 22
389    .byte   "__kmp_invoke_microtask" # Function Name
390    .byte   0x1f                    # AllocaRegister = 31
391                                    # -- End function
392
393// -- End  __kmp_invoke_microtask
394
395// Support for unnamed common blocks.
396
397    .comm .gomp_critical_user_, 32, 3
398#if KMP_ARCH_PPC64_XCOFF
399    .csect __kmp_unnamed_critical_addr[RW],3
400#else
401    .csect __kmp_unnamed_critical_addr[RW],2
402#endif
403    .globl __kmp_unnamed_critical_addr[RW]
404    .ptr .gomp_critical_user_
405
406// -- End unnamed common block
407
408    .toc
409
410#endif // KMP_OS_AIX
411