patch-r267981-llvm-r211435-fix-ppc-fctiduz.diff revision 268065
1Pull in r211627 from upstream llvm trunk (by Bill Schmidt):
2
3  [PPC64] Fix PR20071 (fctiduz generated for targets lacking that
4  instruction)
5
6  PR20071 identifies a problem in PowerPC's fast-isel implementation
7  for floating-point conversion to integer.  The fctiduz instruction
8  was added in Power ISA 2.06 (i.e., Power7 and later).  However, this
9  instruction is being generated regardless of which 64-bit PowerPC
10  target is selected.
11
12  The intent is for fast-isel to punt to DAG selection when this
13  instruction is not available.  This patch implements that change.
14  For testing purposes, the existing fast-isel-conversion.ll test adds
15  a RUN line for -mcpu=970 and tests for the expected code generation.
16  Additionally, the existing test fast-isel-conversion-p5.ll was found
17  to be incorrectly expecting the unavailable instruction to be
18  generated.  I've removed these test variants since we have adequate
19  coverage in fast-isel-conversion.ll.
20
21This is needed to compile clang with debug+asserts on older powerpc64
22and ppc970 targets.
23
24Introduced here: http://svnweb.freebsd.org/changeset/base/267981
25
26Index: lib/Target/PowerPC/PPCFastISel.cpp
27===================================================================
28--- lib/Target/PowerPC/PPCFastISel.cpp	(revision 106)
29+++ lib/Target/PowerPC/PPCFastISel.cpp	(revision 107)
30@@ -1026,6 +1026,10 @@ bool PPCFastISel::SelectFPToI(const Instruction *I
31   if (DstVT != MVT::i32 && DstVT != MVT::i64)
32     return false;
33 
34+  // If we don't have FCTIDUZ and we need it, punt to SelectionDAG.
35+  if (DstVT == MVT::i64 && !IsSigned && !PPCSubTarget.hasFPCVT())
36+    return false;
37+
38   Value *Src = I->getOperand(0);
39   Type *SrcTy = Src->getType();
40   if (!isTypeLegal(SrcTy, SrcVT))
41Index: test/CodeGen/PowerPC/fast-isel-conversion-p5.ll
42===================================================================
43--- test/CodeGen/PowerPC/fast-isel-conversion-p5.ll	(revision 106)
44+++ test/CodeGen/PowerPC/fast-isel-conversion-p5.ll	(revision 107)
45@@ -116,18 +116,6 @@ entry:
46   ret void
47 }
48 
49-define void @fptoui_float_i64(float %a) nounwind ssp {
50-entry:
51-; ELF64: fptoui_float_i64
52-  %b.addr = alloca i64, align 4
53-  %conv = fptoui float %a to i64
54-; ELF64: fctiduz
55-; ELF64: stfd
56-; ELF64: ld
57-  store i64 %conv, i64* %b.addr, align 4
58-  ret void
59-}
60-
61 define void @fptoui_double_i32(double %a) nounwind ssp {
62 entry:
63 ; ELF64: fptoui_double_i32
64@@ -140,14 +128,3 @@ entry:
65   ret void
66 }
67 
68-define void @fptoui_double_i64(double %a) nounwind ssp {
69-entry:
70-; ELF64: fptoui_double_i64
71-  %b.addr = alloca i64, align 8
72-  %conv = fptoui double %a to i64
73-; ELF64: fctiduz
74-; ELF64: stfd
75-; ELF64: ld
76-  store i64 %conv, i64* %b.addr, align 8
77-  ret void
78-}
79Index: test/CodeGen/PowerPC/fast-isel-conversion.ll
80===================================================================
81--- test/CodeGen/PowerPC/fast-isel-conversion.ll	(revision 106)
82+++ test/CodeGen/PowerPC/fast-isel-conversion.ll	(revision 107)
83@@ -1,15 +1,24 @@
84 ; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s --check-prefix=ELF64
85+; RUN: llc < %s -O0 -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -mcpu=970 | FileCheck %s --check-prefix=PPC970
86 
87+;; Tests for 970 don't use -fast-isel-abort because we intentionally punt
88+;; to SelectionDAG in some cases.
89+
90 ; Test sitofp
91 
92 define void @sitofp_single_i64(i64 %a, float %b) nounwind ssp {
93 entry:
94 ; ELF64: sitofp_single_i64
95+; PPC970: sitofp_single_i64
96   %b.addr = alloca float, align 4
97   %conv = sitofp i64 %a to float
98 ; ELF64: std
99 ; ELF64: lfd
100 ; ELF64: fcfids
101+; PPC970: std
102+; PPC970: lfd
103+; PPC970: fcfid
104+; PPC970: frsp
105   store float %conv, float* %b.addr, align 4
106   ret void
107 }
108@@ -17,11 +26,16 @@ entry:
109 define void @sitofp_single_i32(i32 %a, float %b) nounwind ssp {
110 entry:
111 ; ELF64: sitofp_single_i32
112+; PPC970: sitofp_single_i32
113   %b.addr = alloca float, align 4
114   %conv = sitofp i32 %a to float
115 ; ELF64: std
116 ; ELF64: lfiwax
117 ; ELF64: fcfids
118+; PPC970: std
119+; PPC970: lfd
120+; PPC970: fcfid
121+; PPC970: frsp
122   store float %conv, float* %b.addr, align 4
123   ret void
124 }
125@@ -29,6 +43,7 @@ entry:
126 define void @sitofp_single_i16(i16 %a, float %b) nounwind ssp {
127 entry:
128 ; ELF64: sitofp_single_i16
129+; PPC970: sitofp_single_i16
130   %b.addr = alloca float, align 4
131   %conv = sitofp i16 %a to float
132 ; ELF64: extsh
133@@ -35,6 +50,11 @@ entry:
134 ; ELF64: std
135 ; ELF64: lfd
136 ; ELF64: fcfids
137+; PPC970: extsh
138+; PPC970: std
139+; PPC970: lfd
140+; PPC970: fcfid
141+; PPC970: frsp
142   store float %conv, float* %b.addr, align 4
143   ret void
144 }
145@@ -42,6 +62,7 @@ entry:
146 define void @sitofp_single_i8(i8 %a) nounwind ssp {
147 entry:
148 ; ELF64: sitofp_single_i8
149+; PPC970: sitofp_single_i8
150   %b.addr = alloca float, align 4
151   %conv = sitofp i8 %a to float
152 ; ELF64: extsb
153@@ -48,6 +69,11 @@ entry:
154 ; ELF64: std
155 ; ELF64: lfd
156 ; ELF64: fcfids
157+; PPC970: extsb
158+; PPC970: std
159+; PPC970: lfd
160+; PPC970: fcfid
161+; PPC970: frsp
162   store float %conv, float* %b.addr, align 4
163   ret void
164 }
165@@ -55,11 +81,15 @@ entry:
166 define void @sitofp_double_i32(i32 %a, double %b) nounwind ssp {
167 entry:
168 ; ELF64: sitofp_double_i32
169+; PPC970: sitofp_double_i32
170   %b.addr = alloca double, align 8
171   %conv = sitofp i32 %a to double
172 ; ELF64: std
173 ; ELF64: lfiwax
174 ; ELF64: fcfid
175+; PPC970: std
176+; PPC970: lfd
177+; PPC970: fcfid
178   store double %conv, double* %b.addr, align 8
179   ret void
180 }
181@@ -67,11 +97,15 @@ entry:
182 define void @sitofp_double_i64(i64 %a, double %b) nounwind ssp {
183 entry:
184 ; ELF64: sitofp_double_i64
185+; PPC970: sitofp_double_i64
186   %b.addr = alloca double, align 8
187   %conv = sitofp i64 %a to double
188 ; ELF64: std
189 ; ELF64: lfd
190 ; ELF64: fcfid
191+; PPC970: std
192+; PPC970: lfd
193+; PPC970: fcfid
194   store double %conv, double* %b.addr, align 8
195   ret void
196 }
197@@ -79,6 +113,7 @@ entry:
198 define void @sitofp_double_i16(i16 %a, double %b) nounwind ssp {
199 entry:
200 ; ELF64: sitofp_double_i16
201+; PPC970: sitofp_double_i16
202   %b.addr = alloca double, align 8
203   %conv = sitofp i16 %a to double
204 ; ELF64: extsh
205@@ -85,6 +120,10 @@ entry:
206 ; ELF64: std
207 ; ELF64: lfd
208 ; ELF64: fcfid
209+; PPC970: extsh
210+; PPC970: std
211+; PPC970: lfd
212+; PPC970: fcfid
213   store double %conv, double* %b.addr, align 8
214   ret void
215 }
216@@ -92,6 +131,7 @@ entry:
217 define void @sitofp_double_i8(i8 %a, double %b) nounwind ssp {
218 entry:
219 ; ELF64: sitofp_double_i8
220+; PPC970: sitofp_double_i8
221   %b.addr = alloca double, align 8
222   %conv = sitofp i8 %a to double
223 ; ELF64: extsb
224@@ -98,6 +138,10 @@ entry:
225 ; ELF64: std
226 ; ELF64: lfd
227 ; ELF64: fcfid
228+; PPC970: extsb
229+; PPC970: std
230+; PPC970: lfd
231+; PPC970: fcfid
232   store double %conv, double* %b.addr, align 8
233   ret void
234 }
235@@ -107,11 +151,13 @@ entry:
236 define void @uitofp_single_i64(i64 %a, float %b) nounwind ssp {
237 entry:
238 ; ELF64: uitofp_single_i64
239+; PPC970: uitofp_single_i64
240   %b.addr = alloca float, align 4
241   %conv = uitofp i64 %a to float
242 ; ELF64: std
243 ; ELF64: lfd
244 ; ELF64: fcfidus
245+; PPC970-NOT: fcfidus
246   store float %conv, float* %b.addr, align 4
247   ret void
248 }
249@@ -119,11 +165,14 @@ entry:
250 define void @uitofp_single_i32(i32 %a, float %b) nounwind ssp {
251 entry:
252 ; ELF64: uitofp_single_i32
253+; PPC970: uitofp_single_i32
254   %b.addr = alloca float, align 4
255   %conv = uitofp i32 %a to float
256 ; ELF64: std
257 ; ELF64: lfiwzx
258 ; ELF64: fcfidus
259+; PPC970-NOT: lfiwzx
260+; PPC970-NOT: fcfidus
261   store float %conv, float* %b.addr, align 4
262   ret void
263 }
264@@ -131,6 +180,7 @@ entry:
265 define void @uitofp_single_i16(i16 %a, float %b) nounwind ssp {
266 entry:
267 ; ELF64: uitofp_single_i16
268+; PPC970: uitofp_single_i16
269   %b.addr = alloca float, align 4
270   %conv = uitofp i16 %a to float
271 ; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 48
272@@ -137,6 +187,11 @@ entry:
273 ; ELF64: std
274 ; ELF64: lfd
275 ; ELF64: fcfidus
276+; PPC970: rlwinm {{[0-9]+}}, {{[0-9]+}}, 0, 16, 31
277+; PPC970: std
278+; PPC970: lfd
279+; PPC970: fcfid
280+; PPC970: frsp
281   store float %conv, float* %b.addr, align 4
282   ret void
283 }
284@@ -144,6 +199,7 @@ entry:
285 define void @uitofp_single_i8(i8 %a) nounwind ssp {
286 entry:
287 ; ELF64: uitofp_single_i8
288+; PPC970: uitofp_single_i8
289   %b.addr = alloca float, align 4
290   %conv = uitofp i8 %a to float
291 ; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 56
292@@ -150,6 +206,11 @@ entry:
293 ; ELF64: std
294 ; ELF64: lfd
295 ; ELF64: fcfidus
296+; PPC970: rlwinm {{[0-9]+}}, {{[0-9]+}}, 0, 24, 31
297+; PPC970: std
298+; PPC970: lfd
299+; PPC970: fcfid
300+; PPC970: frsp
301   store float %conv, float* %b.addr, align 4
302   ret void
303 }
304@@ -157,11 +218,13 @@ entry:
305 define void @uitofp_double_i64(i64 %a, double %b) nounwind ssp {
306 entry:
307 ; ELF64: uitofp_double_i64
308+; PPC970: uitofp_double_i64
309   %b.addr = alloca double, align 8
310   %conv = uitofp i64 %a to double
311 ; ELF64: std
312 ; ELF64: lfd
313 ; ELF64: fcfidu
314+; PPC970-NOT: fcfidu
315   store double %conv, double* %b.addr, align 8
316   ret void
317 }
318@@ -169,11 +232,14 @@ entry:
319 define void @uitofp_double_i32(i32 %a, double %b) nounwind ssp {
320 entry:
321 ; ELF64: uitofp_double_i32
322+; PPC970: uitofp_double_i32
323   %b.addr = alloca double, align 8
324   %conv = uitofp i32 %a to double
325 ; ELF64: std
326 ; ELF64: lfiwzx
327 ; ELF64: fcfidu
328+; PPC970-NOT: lfiwzx
329+; PPC970-NOT: fcfidu
330   store double %conv, double* %b.addr, align 8
331   ret void
332 }
333@@ -181,6 +247,7 @@ entry:
334 define void @uitofp_double_i16(i16 %a, double %b) nounwind ssp {
335 entry:
336 ; ELF64: uitofp_double_i16
337+; PPC970: uitofp_double_i16
338   %b.addr = alloca double, align 8
339   %conv = uitofp i16 %a to double
340 ; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 48
341@@ -187,6 +254,10 @@ entry:
342 ; ELF64: std
343 ; ELF64: lfd
344 ; ELF64: fcfidu
345+; PPC970: rlwinm {{[0-9]+}}, {{[0-9]+}}, 0, 16, 31
346+; PPC970: std
347+; PPC970: lfd
348+; PPC970: fcfid
349   store double %conv, double* %b.addr, align 8
350   ret void
351 }
352@@ -194,6 +265,7 @@ entry:
353 define void @uitofp_double_i8(i8 %a, double %b) nounwind ssp {
354 entry:
355 ; ELF64: uitofp_double_i8
356+; PPC970: uitofp_double_i8
357   %b.addr = alloca double, align 8
358   %conv = uitofp i8 %a to double
359 ; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 56
360@@ -200,6 +272,10 @@ entry:
361 ; ELF64: std
362 ; ELF64: lfd
363 ; ELF64: fcfidu
364+; PPC970: rlwinm {{[0-9]+}}, {{[0-9]+}}, 0, 24, 31
365+; PPC970: std
366+; PPC970: lfd
367+; PPC970: fcfid
368   store double %conv, double* %b.addr, align 8
369   ret void
370 }
371@@ -209,11 +285,15 @@ entry:
372 define void @fptosi_float_i32(float %a) nounwind ssp {
373 entry:
374 ; ELF64: fptosi_float_i32
375+; PPC970: fptosi_float_i32
376   %b.addr = alloca i32, align 4
377   %conv = fptosi float %a to i32
378 ; ELF64: fctiwz
379 ; ELF64: stfd
380 ; ELF64: lwa
381+; PPC970: fctiwz
382+; PPC970: stfd
383+; PPC970: lwa
384   store i32 %conv, i32* %b.addr, align 4
385   ret void
386 }
387@@ -221,11 +301,15 @@ entry:
388 define void @fptosi_float_i64(float %a) nounwind ssp {
389 entry:
390 ; ELF64: fptosi_float_i64
391+; PPC970: fptosi_float_i64
392   %b.addr = alloca i64, align 4
393   %conv = fptosi float %a to i64
394 ; ELF64: fctidz
395 ; ELF64: stfd
396 ; ELF64: ld
397+; PPC970: fctidz
398+; PPC970: stfd
399+; PPC970: ld
400   store i64 %conv, i64* %b.addr, align 4
401   ret void
402 }
403@@ -233,11 +317,15 @@ entry:
404 define void @fptosi_double_i32(double %a) nounwind ssp {
405 entry:
406 ; ELF64: fptosi_double_i32
407+; PPC970: fptosi_double_i32
408   %b.addr = alloca i32, align 8
409   %conv = fptosi double %a to i32
410 ; ELF64: fctiwz
411 ; ELF64: stfd
412 ; ELF64: lwa
413+; PPC970: fctiwz
414+; PPC970: stfd
415+; PPC970: lwa
416   store i32 %conv, i32* %b.addr, align 8
417   ret void
418 }
419@@ -245,11 +333,15 @@ entry:
420 define void @fptosi_double_i64(double %a) nounwind ssp {
421 entry:
422 ; ELF64: fptosi_double_i64
423+; PPC970: fptosi_double_i64
424   %b.addr = alloca i64, align 8
425   %conv = fptosi double %a to i64
426 ; ELF64: fctidz
427 ; ELF64: stfd
428 ; ELF64: ld
429+; PPC970: fctidz
430+; PPC970: stfd
431+; PPC970: ld
432   store i64 %conv, i64* %b.addr, align 8
433   ret void
434 }
435@@ -259,11 +351,15 @@ entry:
436 define void @fptoui_float_i32(float %a) nounwind ssp {
437 entry:
438 ; ELF64: fptoui_float_i32
439+; PPC970: fptoui_float_i32
440   %b.addr = alloca i32, align 4
441   %conv = fptoui float %a to i32
442 ; ELF64: fctiwuz
443 ; ELF64: stfd
444 ; ELF64: lwz
445+; PPC970: fctidz
446+; PPC970: stfd
447+; PPC970: lwz
448   store i32 %conv, i32* %b.addr, align 4
449   ret void
450 }
451@@ -271,11 +367,13 @@ entry:
452 define void @fptoui_float_i64(float %a) nounwind ssp {
453 entry:
454 ; ELF64: fptoui_float_i64
455+; PPC970: fptoui_float_i64
456   %b.addr = alloca i64, align 4
457   %conv = fptoui float %a to i64
458 ; ELF64: fctiduz
459 ; ELF64: stfd
460 ; ELF64: ld
461+; PPC970-NOT: fctiduz
462   store i64 %conv, i64* %b.addr, align 4
463   ret void
464 }
465@@ -283,11 +381,15 @@ entry:
466 define void @fptoui_double_i32(double %a) nounwind ssp {
467 entry:
468 ; ELF64: fptoui_double_i32
469+; PPC970: fptoui_double_i32
470   %b.addr = alloca i32, align 8
471   %conv = fptoui double %a to i32
472 ; ELF64: fctiwuz
473 ; ELF64: stfd
474 ; ELF64: lwz
475+; PPC970: fctidz
476+; PPC970: stfd
477+; PPC970: lwz
478   store i32 %conv, i32* %b.addr, align 8
479   ret void
480 }
481@@ -295,11 +397,13 @@ entry:
482 define void @fptoui_double_i64(double %a) nounwind ssp {
483 entry:
484 ; ELF64: fptoui_double_i64
485+; PPC970: fptoui_double_i64
486   %b.addr = alloca i64, align 8
487   %conv = fptoui double %a to i64
488 ; ELF64: fctiduz
489 ; ELF64: stfd
490 ; ELF64: ld
491+; PPC970-NOT: fctiduz
492   store i64 %conv, i64* %b.addr, align 8
493   ret void
494 }
495