1/* Copyright (C) 2014-2015 Free Software Foundation, Inc.
2
3   Contributed by Mentor Embedded.
4
5   This file is part of the GNU Offloading and Multi Processing Library
6   (libgomp).
7
8   Libgomp is free software; you can redistribute it and/or modify it
9   under the terms of the GNU General Public License as published by
10   the Free Software Foundation; either version 3, or (at your option)
11   any later version.
12
13   Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
14   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
15   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
16   more details.
17
18   Under Section 7 of GPL version 3, you are granted additional
19   permissions described in the GCC Runtime Library Exception, version
20   3.1, as published by the Free Software Foundation.
21
22   You should have received a copy of the GNU General Public License and
23   a copy of the GCC Runtime Library Exception along with this program;
24   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
25   <http://www.gnu.org/licenses/>.  */
26
27#define ABORT_PTX				\
28  ".version 3.1\n"				\
29  ".target sm_30\n"				\
30  ".address_size 64\n"				\
31  ".visible .func abort;\n"			\
32  ".visible .func abort\n"			\
33  "{\n"						\
34  "trap;\n"					\
35  "ret;\n"					\
36  "}\n"						\
37  ".visible .func _gfortran_abort;\n"		\
38  ".visible .func _gfortran_abort\n"		\
39  "{\n"						\
40  "trap;\n"					\
41  "ret;\n"					\
42  "}\n" \
43
44/* Generated with:
45
46   $ echo 'int acc_on_device(int d) { return __builtin_acc_on_device(d); } int acc_on_device_h_(int *d) { return acc_on_device(*d); }' | accel-gcc/xgcc -Baccel-gcc -x c - -o - -S -m64 -O3 -fno-builtin-acc_on_device -fno-inline
47*/
48#define ACC_ON_DEVICE_PTX						\
49  "        .version        3.1\n"					\
50  "        .target sm_30\n"						\
51  "        .address_size 64\n"						\
52  ".visible .func (.param.u32 %out_retval)acc_on_device(.param.u32 %in_ar1);\n" \
53  ".visible .func (.param.u32 %out_retval)acc_on_device(.param.u32 %in_ar1)\n" \
54  "{\n"									\
55  "        .reg.u32 %ar1;\n"						\
56  ".reg.u32 %retval;\n"							\
57  "        .reg.u64 %hr10;\n"						\
58  "        .reg.u32 %r24;\n"						\
59  "        .reg.u32 %r25;\n"						\
60  "        .reg.pred %r27;\n"						\
61  "        .reg.u32 %r30;\n"						\
62  "        ld.param.u32 %ar1, [%in_ar1];\n"				\
63  "                mov.u32 %r24, %ar1;\n"				\
64  "                setp.ne.u32 %r27,%r24,4;\n"				\
65  "                set.u32.eq.u32 %r30,%r24,5;\n"			\
66  "                neg.s32 %r25, %r30;\n"				\
67  "        @%r27   bra     $L3;\n"					\
68  "                mov.u32 %r25, 1;\n"					\
69  "$L3:\n"								\
70  "                mov.u32 %retval, %r25;\n"				\
71  "        st.param.u32    [%out_retval], %retval;\n"			\
72  "        ret;\n"							\
73  "        }\n"								\
74  ".visible .func (.param.u32 %out_retval)acc_on_device_h_(.param.u64 %in_ar1);\n" \
75  ".visible .func (.param.u32 %out_retval)acc_on_device_h_(.param.u64 %in_ar1)\n" \
76  "{\n"									\
77  "        .reg.u64 %ar1;\n"						\
78  ".reg.u32 %retval;\n"							\
79  "        .reg.u64 %hr10;\n"						\
80  "        .reg.u64 %r25;\n"						\
81  "        .reg.u32 %r26;\n"						\
82  "        .reg.u32 %r27;\n"						\
83  "        ld.param.u64 %ar1, [%in_ar1];\n"				\
84  "                mov.u64 %r25, %ar1;\n"				\
85  "                ld.u32  %r26, [%r25];\n"				\
86  "        {\n"								\
87  "                .param.u32 %retval_in;\n"				\
88  "        {\n"								\
89  "                .param.u32 %out_arg0;\n"				\
90  "                st.param.u32 [%out_arg0], %r26;\n"			\
91  "                call (%retval_in), acc_on_device, (%out_arg0);\n"	\
92  "        }\n"								\
93  "                ld.param.u32    %r27, [%retval_in];\n"		\
94  "}\n"									\
95  "                mov.u32 %retval, %r27;\n"				\
96  "        st.param.u32    [%out_retval], %retval;\n"			\
97  "        ret;\n"							\
98  "        }"
99
100 #define GOACC_INTERNAL_PTX						\
101  ".version 3.1\n" \
102  ".target sm_30\n" \
103  ".address_size 64\n" \
104  ".visible .func (.param .u32 %out_retval) GOACC_tid (.param .u32 %in_ar1);\n" \
105  ".visible .func (.param .u32 %out_retval) GOACC_ntid (.param .u32 %in_ar1);\n" \
106  ".visible .func (.param .u32 %out_retval) GOACC_ctaid (.param .u32 %in_ar1);\n" \
107  ".visible .func (.param .u32 %out_retval) GOACC_nctaid (.param .u32 %in_ar1);\n" \
108  ".visible .func (.param .u32 %out_retval) GOACC_get_num_threads;\n" \
109  ".visible .func (.param .u32 %out_retval) GOACC_get_thread_num;\n" \
110  ".extern .func abort;\n" \
111  ".visible .func (.param .u32 %out_retval) GOACC_tid (.param .u32 %in_ar1)\n" \
112  "{\n" \
113  ".reg .u32 %ar1;\n" \
114  ".reg .u32 %retval;\n" \
115  ".reg .u64 %hr10;\n" \
116  ".reg .u32 %r22;\n" \
117  ".reg .u32 %r23;\n" \
118  ".reg .u32 %r24;\n" \
119  ".reg .u32 %r25;\n" \
120  ".reg .u32 %r26;\n" \
121  ".reg .u32 %r27;\n" \
122  ".reg .u32 %r28;\n" \
123  ".reg .u32 %r29;\n" \
124  ".reg .pred %r30;\n" \
125  ".reg .u32 %r31;\n" \
126  ".reg .pred %r32;\n" \
127  ".reg .u32 %r33;\n" \
128  ".reg .pred %r34;\n" \
129  ".local .align 8 .b8 %frame[4];\n" \
130  "ld.param.u32 %ar1,[%in_ar1];\n" \
131  "mov.u32 %r27,%ar1;\n" \
132  "st.local.u32 [%frame],%r27;\n" \
133  "ld.local.u32 %r28,[%frame];\n" \
134  "mov.u32 %r29,1;\n"							\
135  "setp.eq.u32 %r30,%r28,%r29;\n"					\
136  "@%r30 bra $L4;\n"							\
137  "mov.u32 %r31,2;\n"							\
138  "setp.eq.u32 %r32,%r28,%r31;\n"					\
139  "@%r32 bra $L5;\n"							\
140  "mov.u32 %r33,0;\n"							\
141  "setp.eq.u32 %r34,%r28,%r33;\n"					\
142  "@!%r34 bra $L8;\n"							\
143  "mov.u32 %r23,%tid.x;\n"						\
144  "mov.u32 %r22,%r23;\n"						\
145  "bra $L7;\n"								\
146  "$L4:\n"								\
147  "mov.u32 %r24,%tid.y;\n"						\
148  "mov.u32 %r22,%r24;\n"						\
149  "bra $L7;\n"								\
150  "$L5:\n"								\
151  "mov.u32 %r25,%tid.z;\n"						\
152  "mov.u32 %r22,%r25;\n"						\
153  "bra $L7;\n"								\
154  "$L8:\n"								\
155  "{\n"									\
156  "{\n"									\
157  "call abort;\n"							\
158  "}\n"									\
159  "}\n"									\
160  "$L7:\n"								\
161  "mov.u32 %r26,%r22;\n"						\
162  "mov.u32 %retval,%r26;\n"						\
163  "st.param.u32 [%out_retval],%retval;\n"				\
164  "ret;\n"								\
165  "}\n"									\
166  ".visible .func (.param .u32 %out_retval) GOACC_ntid (.param .u32 %in_ar1)\n" \
167  "{\n"									\
168  ".reg .u32 %ar1;\n"							\
169  ".reg .u32 %retval;\n"						\
170  ".reg .u64 %hr10;\n"							\
171  ".reg .u32 %r22;\n"							\
172  ".reg .u32 %r23;\n"							\
173  ".reg .u32 %r24;\n"							\
174  ".reg .u32 %r25;\n"							\
175  ".reg .u32 %r26;\n"							\
176  ".reg .u32 %r27;\n"							\
177  ".reg .u32 %r28;\n"							\
178  ".reg .u32 %r29;\n"							\
179  ".reg .pred %r30;\n"							\
180  ".reg .u32 %r31;\n"							\
181  ".reg .pred %r32;\n"							\
182  ".reg .u32 %r33;\n"							\
183  ".reg .pred %r34;\n"							\
184  ".local .align 8 .b8 %frame[4];\n"					\
185  "ld.param.u32 %ar1,[%in_ar1];\n"					\
186  "mov.u32 %r27,%ar1;\n"						\
187  "st.local.u32 [%frame],%r27;\n"					\
188  "ld.local.u32 %r28,[%frame];\n"					\
189  "mov.u32 %r29,1;\n"							\
190  "setp.eq.u32 %r30,%r28,%r29;\n"					\
191  "@%r30 bra $L11;\n"							\
192  "mov.u32 %r31,2;\n"							\
193  "setp.eq.u32 %r32,%r28,%r31;\n"					\
194  "@%r32 bra $L12;\n"							\
195  "mov.u32 %r33,0;\n"							\
196  "setp.eq.u32 %r34,%r28,%r33;\n"					\
197  "@!%r34 bra $L15;\n"							\
198  "mov.u32 %r23,%ntid.x;\n"						\
199  "mov.u32 %r22,%r23;\n"						\
200  "bra $L14;\n"								\
201  "$L11:\n"								\
202  "mov.u32 %r24,%ntid.y;\n"						\
203  "mov.u32 %r22,%r24;\n"						\
204  "bra $L14;\n"								\
205  "$L12:\n"								\
206  "mov.u32 %r25,%ntid.z;\n"						\
207  "mov.u32 %r22,%r25;\n"						\
208  "bra $L14;\n"								\
209  "$L15:\n"								\
210  "{\n"									\
211  "{\n"									\
212  "call abort;\n"							\
213  "}\n"									\
214  "}\n"									\
215  "$L14:\n"								\
216  "mov.u32 %r26,%r22;\n"						\
217  "mov.u32 %retval,%r26;\n"						\
218  "st.param.u32 [%out_retval],%retval;\n"				\
219  "ret;\n"								\
220  "}\n"									\
221  ".visible .func (.param .u32 %out_retval) GOACC_ctaid (.param .u32 %in_ar1)\n" \
222  "{\n"									\
223  ".reg .u32 %ar1;\n"							\
224  ".reg .u32 %retval;\n"						\
225  ".reg .u64 %hr10;\n"							\
226  ".reg .u32 %r22;\n"							\
227  ".reg .u32 %r23;\n"							\
228  ".reg .u32 %r24;\n"							\
229  ".reg .u32 %r25;\n"							\
230  ".reg .u32 %r26;\n"							\
231  ".reg .u32 %r27;\n"							\
232  ".reg .u32 %r28;\n"							\
233  ".reg .u32 %r29;\n"							\
234  ".reg .pred %r30;\n"							\
235  ".reg .u32 %r31;\n"							\
236  ".reg .pred %r32;\n"							\
237  ".reg .u32 %r33;\n"							\
238  ".reg .pred %r34;\n"							\
239  ".local .align 8 .b8 %frame[4];\n"					\
240  "ld.param.u32 %ar1,[%in_ar1];\n"					\
241  "mov.u32 %r27,%ar1;\n"						\
242  "st.local.u32 [%frame],%r27;\n"					\
243  "ld.local.u32 %r28,[%frame];\n"					\
244  "mov.u32 %r29,1;\n"							\
245  "setp.eq.u32 %r30,%r28,%r29;\n"					\
246  "@%r30 bra $L18;\n"							\
247  "mov.u32 %r31,2;\n"							\
248  "setp.eq.u32 %r32,%r28,%r31;\n"					\
249  "@%r32 bra $L19;\n"							\
250  "mov.u32 %r33,0;\n"							\
251  "setp.eq.u32 %r34,%r28,%r33;\n"					\
252  "@!%r34 bra $L22;\n"							\
253  "mov.u32 %r23,%ctaid.x;\n"						\
254  "mov.u32 %r22,%r23;\n"						\
255  "bra $L21;\n"								\
256  "$L18:\n"								\
257  "mov.u32 %r24,%ctaid.y;\n"						\
258  "mov.u32 %r22,%r24;\n"						\
259  "bra $L21;\n"								\
260  "$L19:\n"								\
261  "mov.u32 %r25,%ctaid.z;\n"						\
262  "mov.u32 %r22,%r25;\n"						\
263  "bra $L21;\n"								\
264  "$L22:\n"								\
265  "{\n"									\
266  "{\n"									\
267  "call abort;\n"							\
268  "}\n"									\
269  "}\n"									\
270  "$L21:\n"								\
271  "mov.u32 %r26,%r22;\n"						\
272  "mov.u32 %retval,%r26;\n"						\
273  "st.param.u32 [%out_retval],%retval;\n"				\
274  "ret;\n"								\
275  "}\n"									\
276  ".visible .func (.param .u32 %out_retval) GOACC_nctaid (.param .u32 %in_ar1)\n" \
277  "{\n"									\
278  ".reg .u32 %ar1;\n"							\
279  ".reg .u32 %retval;\n"						\
280  ".reg .u64 %hr10;\n"							\
281  ".reg .u32 %r22;\n"							\
282  ".reg .u32 %r23;\n"							\
283  ".reg .u32 %r24;\n"							\
284  ".reg .u32 %r25;\n"							\
285  ".reg .u32 %r26;\n"							\
286  ".reg .u32 %r27;\n"							\
287  ".reg .u32 %r28;\n"							\
288  ".reg .u32 %r29;\n"							\
289  ".reg .pred %r30;\n"							\
290  ".reg .u32 %r31;\n"							\
291  ".reg .pred %r32;\n"							\
292  ".reg .u32 %r33;\n"							\
293  ".reg .pred %r34;\n"							\
294  ".local .align 8 .b8 %frame[4];\n"					\
295  "ld.param.u32 %ar1,[%in_ar1];\n"					\
296  "mov.u32 %r27,%ar1;\n"						\
297  "st.local.u32 [%frame],%r27;\n"					\
298  "ld.local.u32 %r28,[%frame];\n"					\
299  "mov.u32 %r29,1;\n"							\
300  "setp.eq.u32 %r30,%r28,%r29;\n"					\
301  "@%r30 bra $L25;\n"							\
302  "mov.u32 %r31,2;\n"							\
303  "setp.eq.u32 %r32,%r28,%r31;\n"					\
304  "@%r32 bra $L26;\n"							\
305  "mov.u32 %r33,0;\n"							\
306  "setp.eq.u32 %r34,%r28,%r33;\n"					\
307  "@!%r34 bra $L29;\n"							\
308  "mov.u32 %r23,%nctaid.x;\n"						\
309  "mov.u32 %r22,%r23;\n"						\
310  "bra $L28;\n"								\
311  "$L25:\n"								\
312  "mov.u32 %r24,%nctaid.y;\n"						\
313  "mov.u32 %r22,%r24;\n"						\
314  "bra $L28;\n"								\
315  "$L26:\n"								\
316  "mov.u32 %r25,%nctaid.z;\n"						\
317  "mov.u32 %r22,%r25;\n"						\
318  "bra $L28;\n"								\
319  "$L29:\n"								\
320  "{\n"									\
321  "{\n"									\
322  "call abort;\n"							\
323  "}\n"									\
324  "}\n"									\
325  "$L28:\n"								\
326  "mov.u32 %r26,%r22;\n"						\
327  "mov.u32 %retval,%r26;\n"						\
328  "st.param.u32 [%out_retval],%retval;\n"				\
329  "ret;\n"								\
330  "}\n"									\
331  ".visible .func (.param .u32 %out_retval) GOACC_get_num_threads\n"	\
332  "{\n"									\
333  ".reg .u32 %retval;\n"						\
334  ".reg .u64 %hr10;\n"							\
335  ".reg .u32 %r22;\n"							\
336  ".reg .u32 %r23;\n"							\
337  ".reg .u32 %r24;\n"							\
338  ".reg .u32 %r25;\n"							\
339  ".reg .u32 %r26;\n"							\
340  ".reg .u32 %r27;\n"							\
341  ".reg .u32 %r28;\n"							\
342  ".reg .u32 %r29;\n"							\
343  "mov.u32 %r26,0;\n"							\
344  "{\n"									\
345  ".param .u32 %retval_in;\n"						\
346  "{\n"									\
347  ".param .u32 %out_arg0;\n"						\
348  "st.param.u32 [%out_arg0],%r26;\n"					\
349  "call (%retval_in),GOACC_ntid,(%out_arg0);\n"				\
350  "}\n"									\
351  "ld.param.u32 %r27,[%retval_in];\n"					\
352  "}\n"									\
353  "mov.u32 %r22,%r27;\n"						\
354  "mov.u32 %r28,0;\n"							\
355  "{\n"									\
356  ".param .u32 %retval_in;\n"						\
357  "{\n"									\
358  ".param .u32 %out_arg0;\n"						\
359  "st.param.u32 [%out_arg0],%r28;\n"					\
360  "call (%retval_in),GOACC_nctaid,(%out_arg0);\n"			\
361  "}\n"									\
362  "ld.param.u32 %r29,[%retval_in];\n"					\
363  "}\n"									\
364  "mov.u32 %r23,%r29;\n"						\
365  "mul.lo.u32 %r24,%r22,%r23;\n"					\
366  "mov.u32 %r25,%r24;\n"						\
367  "mov.u32 %retval,%r25;\n"						\
368  "st.param.u32 [%out_retval],%retval;\n"				\
369  "ret;\n"								\
370  "}\n"									\
371  ".visible .func (.param .u32 %out_retval) GOACC_get_thread_num\n"	\
372  "{\n"									\
373  ".reg .u32 %retval;\n"						\
374  ".reg .u64 %hr10;\n"							\
375  ".reg .u32 %r22;\n"							\
376  ".reg .u32 %r23;\n"							\
377  ".reg .u32 %r24;\n"							\
378  ".reg .u32 %r25;\n"							\
379  ".reg .u32 %r26;\n"							\
380  ".reg .u32 %r27;\n"							\
381  ".reg .u32 %r28;\n"							\
382  ".reg .u32 %r29;\n"							\
383  ".reg .u32 %r30;\n"							\
384  ".reg .u32 %r31;\n"							\
385  ".reg .u32 %r32;\n"							\
386  ".reg .u32 %r33;\n"							\
387  "mov.u32 %r28,0;\n"							\
388  "{\n"									\
389  ".param .u32 %retval_in;\n"						\
390  "{\n"									\
391  ".param .u32 %out_arg0;\n"						\
392  "st.param.u32 [%out_arg0],%r28;\n"					\
393  "call (%retval_in),GOACC_ntid,(%out_arg0);\n"				\
394  "}\n"									\
395  "ld.param.u32 %r29,[%retval_in];\n"					\
396  "}\n"									\
397  "mov.u32 %r22,%r29;\n"						\
398  "mov.u32 %r30,0;\n"							\
399  "{\n"									\
400  ".param .u32 %retval_in;\n"						\
401  "{\n"									\
402  ".param .u32 %out_arg0;\n"						\
403  "st.param.u32 [%out_arg0],%r30;\n"					\
404  "call (%retval_in),GOACC_ctaid,(%out_arg0);\n"			\
405  "}\n"									\
406  "ld.param.u32 %r31,[%retval_in];\n"					\
407  "}\n"									\
408  "mov.u32 %r23,%r31;\n"						\
409  "mul.lo.u32 %r24,%r22,%r23;\n"					\
410  "mov.u32 %r32,0;\n"							\
411  "{\n"									\
412  ".param .u32 %retval_in;\n"						\
413  "{\n"									\
414  ".param .u32 %out_arg0;\n"						\
415  "st.param.u32 [%out_arg0],%r32;\n"					\
416  "call (%retval_in),GOACC_tid,(%out_arg0);\n"				\
417  "}\n"									\
418  "ld.param.u32 %r33,[%retval_in];\n"					\
419  "}\n"									\
420  "mov.u32 %r25,%r33;\n"						\
421  "add.u32 %r26,%r24,%r25;\n"						\
422  "mov.u32 %r27,%r26;\n"						\
423  "mov.u32 %retval,%r27;\n"						\
424  "st.param.u32 [%out_retval],%retval;\n"				\
425  "ret;\n"								\
426  "}\n"
427