patch-r267981-llvm-r211435-fix-ppc-fctiduz.diff revision 268065
1Pull in r211627 from upstream llvm trunk (by Bill Schmidt): 2 3 [PPC64] Fix PR20071 (fctiduz generated for targets lacking that 4 instruction) 5 6 PR20071 identifies a problem in PowerPC's fast-isel implementation 7 for floating-point conversion to integer. The fctiduz instruction 8 was added in Power ISA 2.06 (i.e., Power7 and later). However, this 9 instruction is being generated regardless of which 64-bit PowerPC 10 target is selected. 11 12 The intent is for fast-isel to punt to DAG selection when this 13 instruction is not available. This patch implements that change. 14 For testing purposes, the existing fast-isel-conversion.ll test adds 15 a RUN line for -mcpu=970 and tests for the expected code generation. 16 Additionally, the existing test fast-isel-conversion-p5.ll was found 17 to be incorrectly expecting the unavailable instruction to be 18 generated. I've removed these test variants since we have adequate 19 coverage in fast-isel-conversion.ll. 20 21This is needed to compile clang with debug+asserts on older powerpc64 22and ppc970 targets. 23 24Introduced here: http://svnweb.freebsd.org/changeset/base/267981 25 26Index: lib/Target/PowerPC/PPCFastISel.cpp 27=================================================================== 28--- lib/Target/PowerPC/PPCFastISel.cpp (revision 106) 29+++ lib/Target/PowerPC/PPCFastISel.cpp (revision 107) 30@@ -1026,6 +1026,10 @@ bool PPCFastISel::SelectFPToI(const Instruction *I 31 if (DstVT != MVT::i32 && DstVT != MVT::i64) 32 return false; 33 34+ // If we don't have FCTIDUZ and we need it, punt to SelectionDAG. 35+ if (DstVT == MVT::i64 && !IsSigned && !PPCSubTarget.hasFPCVT()) 36+ return false; 37+ 38 Value *Src = I->getOperand(0); 39 Type *SrcTy = Src->getType(); 40 if (!isTypeLegal(SrcTy, SrcVT)) 41Index: test/CodeGen/PowerPC/fast-isel-conversion-p5.ll 42=================================================================== 43--- test/CodeGen/PowerPC/fast-isel-conversion-p5.ll (revision 106) 44+++ test/CodeGen/PowerPC/fast-isel-conversion-p5.ll (revision 107) 45@@ -116,18 +116,6 @@ entry: 46 ret void 47 } 48 49-define void @fptoui_float_i64(float %a) nounwind ssp { 50-entry: 51-; ELF64: fptoui_float_i64 52- %b.addr = alloca i64, align 4 53- %conv = fptoui float %a to i64 54-; ELF64: fctiduz 55-; ELF64: stfd 56-; ELF64: ld 57- store i64 %conv, i64* %b.addr, align 4 58- ret void 59-} 60- 61 define void @fptoui_double_i32(double %a) nounwind ssp { 62 entry: 63 ; ELF64: fptoui_double_i32 64@@ -140,14 +128,3 @@ entry: 65 ret void 66 } 67 68-define void @fptoui_double_i64(double %a) nounwind ssp { 69-entry: 70-; ELF64: fptoui_double_i64 71- %b.addr = alloca i64, align 8 72- %conv = fptoui double %a to i64 73-; ELF64: fctiduz 74-; ELF64: stfd 75-; ELF64: ld 76- store i64 %conv, i64* %b.addr, align 8 77- ret void 78-} 79Index: test/CodeGen/PowerPC/fast-isel-conversion.ll 80=================================================================== 81--- test/CodeGen/PowerPC/fast-isel-conversion.ll (revision 106) 82+++ test/CodeGen/PowerPC/fast-isel-conversion.ll (revision 107) 83@@ -1,15 +1,24 @@ 84 ; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s --check-prefix=ELF64 85+; RUN: llc < %s -O0 -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -mcpu=970 | FileCheck %s --check-prefix=PPC970 86 87+;; Tests for 970 don't use -fast-isel-abort because we intentionally punt 88+;; to SelectionDAG in some cases. 89+ 90 ; Test sitofp 91 92 define void @sitofp_single_i64(i64 %a, float %b) nounwind ssp { 93 entry: 94 ; ELF64: sitofp_single_i64 95+; PPC970: sitofp_single_i64 96 %b.addr = alloca float, align 4 97 %conv = sitofp i64 %a to float 98 ; ELF64: std 99 ; ELF64: lfd 100 ; ELF64: fcfids 101+; PPC970: std 102+; PPC970: lfd 103+; PPC970: fcfid 104+; PPC970: frsp 105 store float %conv, float* %b.addr, align 4 106 ret void 107 } 108@@ -17,11 +26,16 @@ entry: 109 define void @sitofp_single_i32(i32 %a, float %b) nounwind ssp { 110 entry: 111 ; ELF64: sitofp_single_i32 112+; PPC970: sitofp_single_i32 113 %b.addr = alloca float, align 4 114 %conv = sitofp i32 %a to float 115 ; ELF64: std 116 ; ELF64: lfiwax 117 ; ELF64: fcfids 118+; PPC970: std 119+; PPC970: lfd 120+; PPC970: fcfid 121+; PPC970: frsp 122 store float %conv, float* %b.addr, align 4 123 ret void 124 } 125@@ -29,6 +43,7 @@ entry: 126 define void @sitofp_single_i16(i16 %a, float %b) nounwind ssp { 127 entry: 128 ; ELF64: sitofp_single_i16 129+; PPC970: sitofp_single_i16 130 %b.addr = alloca float, align 4 131 %conv = sitofp i16 %a to float 132 ; ELF64: extsh 133@@ -35,6 +50,11 @@ entry: 134 ; ELF64: std 135 ; ELF64: lfd 136 ; ELF64: fcfids 137+; PPC970: extsh 138+; PPC970: std 139+; PPC970: lfd 140+; PPC970: fcfid 141+; PPC970: frsp 142 store float %conv, float* %b.addr, align 4 143 ret void 144 } 145@@ -42,6 +62,7 @@ entry: 146 define void @sitofp_single_i8(i8 %a) nounwind ssp { 147 entry: 148 ; ELF64: sitofp_single_i8 149+; PPC970: sitofp_single_i8 150 %b.addr = alloca float, align 4 151 %conv = sitofp i8 %a to float 152 ; ELF64: extsb 153@@ -48,6 +69,11 @@ entry: 154 ; ELF64: std 155 ; ELF64: lfd 156 ; ELF64: fcfids 157+; PPC970: extsb 158+; PPC970: std 159+; PPC970: lfd 160+; PPC970: fcfid 161+; PPC970: frsp 162 store float %conv, float* %b.addr, align 4 163 ret void 164 } 165@@ -55,11 +81,15 @@ entry: 166 define void @sitofp_double_i32(i32 %a, double %b) nounwind ssp { 167 entry: 168 ; ELF64: sitofp_double_i32 169+; PPC970: sitofp_double_i32 170 %b.addr = alloca double, align 8 171 %conv = sitofp i32 %a to double 172 ; ELF64: std 173 ; ELF64: lfiwax 174 ; ELF64: fcfid 175+; PPC970: std 176+; PPC970: lfd 177+; PPC970: fcfid 178 store double %conv, double* %b.addr, align 8 179 ret void 180 } 181@@ -67,11 +97,15 @@ entry: 182 define void @sitofp_double_i64(i64 %a, double %b) nounwind ssp { 183 entry: 184 ; ELF64: sitofp_double_i64 185+; PPC970: sitofp_double_i64 186 %b.addr = alloca double, align 8 187 %conv = sitofp i64 %a to double 188 ; ELF64: std 189 ; ELF64: lfd 190 ; ELF64: fcfid 191+; PPC970: std 192+; PPC970: lfd 193+; PPC970: fcfid 194 store double %conv, double* %b.addr, align 8 195 ret void 196 } 197@@ -79,6 +113,7 @@ entry: 198 define void @sitofp_double_i16(i16 %a, double %b) nounwind ssp { 199 entry: 200 ; ELF64: sitofp_double_i16 201+; PPC970: sitofp_double_i16 202 %b.addr = alloca double, align 8 203 %conv = sitofp i16 %a to double 204 ; ELF64: extsh 205@@ -85,6 +120,10 @@ entry: 206 ; ELF64: std 207 ; ELF64: lfd 208 ; ELF64: fcfid 209+; PPC970: extsh 210+; PPC970: std 211+; PPC970: lfd 212+; PPC970: fcfid 213 store double %conv, double* %b.addr, align 8 214 ret void 215 } 216@@ -92,6 +131,7 @@ entry: 217 define void @sitofp_double_i8(i8 %a, double %b) nounwind ssp { 218 entry: 219 ; ELF64: sitofp_double_i8 220+; PPC970: sitofp_double_i8 221 %b.addr = alloca double, align 8 222 %conv = sitofp i8 %a to double 223 ; ELF64: extsb 224@@ -98,6 +138,10 @@ entry: 225 ; ELF64: std 226 ; ELF64: lfd 227 ; ELF64: fcfid 228+; PPC970: extsb 229+; PPC970: std 230+; PPC970: lfd 231+; PPC970: fcfid 232 store double %conv, double* %b.addr, align 8 233 ret void 234 } 235@@ -107,11 +151,13 @@ entry: 236 define void @uitofp_single_i64(i64 %a, float %b) nounwind ssp { 237 entry: 238 ; ELF64: uitofp_single_i64 239+; PPC970: uitofp_single_i64 240 %b.addr = alloca float, align 4 241 %conv = uitofp i64 %a to float 242 ; ELF64: std 243 ; ELF64: lfd 244 ; ELF64: fcfidus 245+; PPC970-NOT: fcfidus 246 store float %conv, float* %b.addr, align 4 247 ret void 248 } 249@@ -119,11 +165,14 @@ entry: 250 define void @uitofp_single_i32(i32 %a, float %b) nounwind ssp { 251 entry: 252 ; ELF64: uitofp_single_i32 253+; PPC970: uitofp_single_i32 254 %b.addr = alloca float, align 4 255 %conv = uitofp i32 %a to float 256 ; ELF64: std 257 ; ELF64: lfiwzx 258 ; ELF64: fcfidus 259+; PPC970-NOT: lfiwzx 260+; PPC970-NOT: fcfidus 261 store float %conv, float* %b.addr, align 4 262 ret void 263 } 264@@ -131,6 +180,7 @@ entry: 265 define void @uitofp_single_i16(i16 %a, float %b) nounwind ssp { 266 entry: 267 ; ELF64: uitofp_single_i16 268+; PPC970: uitofp_single_i16 269 %b.addr = alloca float, align 4 270 %conv = uitofp i16 %a to float 271 ; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 48 272@@ -137,6 +187,11 @@ entry: 273 ; ELF64: std 274 ; ELF64: lfd 275 ; ELF64: fcfidus 276+; PPC970: rlwinm {{[0-9]+}}, {{[0-9]+}}, 0, 16, 31 277+; PPC970: std 278+; PPC970: lfd 279+; PPC970: fcfid 280+; PPC970: frsp 281 store float %conv, float* %b.addr, align 4 282 ret void 283 } 284@@ -144,6 +199,7 @@ entry: 285 define void @uitofp_single_i8(i8 %a) nounwind ssp { 286 entry: 287 ; ELF64: uitofp_single_i8 288+; PPC970: uitofp_single_i8 289 %b.addr = alloca float, align 4 290 %conv = uitofp i8 %a to float 291 ; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 56 292@@ -150,6 +206,11 @@ entry: 293 ; ELF64: std 294 ; ELF64: lfd 295 ; ELF64: fcfidus 296+; PPC970: rlwinm {{[0-9]+}}, {{[0-9]+}}, 0, 24, 31 297+; PPC970: std 298+; PPC970: lfd 299+; PPC970: fcfid 300+; PPC970: frsp 301 store float %conv, float* %b.addr, align 4 302 ret void 303 } 304@@ -157,11 +218,13 @@ entry: 305 define void @uitofp_double_i64(i64 %a, double %b) nounwind ssp { 306 entry: 307 ; ELF64: uitofp_double_i64 308+; PPC970: uitofp_double_i64 309 %b.addr = alloca double, align 8 310 %conv = uitofp i64 %a to double 311 ; ELF64: std 312 ; ELF64: lfd 313 ; ELF64: fcfidu 314+; PPC970-NOT: fcfidu 315 store double %conv, double* %b.addr, align 8 316 ret void 317 } 318@@ -169,11 +232,14 @@ entry: 319 define void @uitofp_double_i32(i32 %a, double %b) nounwind ssp { 320 entry: 321 ; ELF64: uitofp_double_i32 322+; PPC970: uitofp_double_i32 323 %b.addr = alloca double, align 8 324 %conv = uitofp i32 %a to double 325 ; ELF64: std 326 ; ELF64: lfiwzx 327 ; ELF64: fcfidu 328+; PPC970-NOT: lfiwzx 329+; PPC970-NOT: fcfidu 330 store double %conv, double* %b.addr, align 8 331 ret void 332 } 333@@ -181,6 +247,7 @@ entry: 334 define void @uitofp_double_i16(i16 %a, double %b) nounwind ssp { 335 entry: 336 ; ELF64: uitofp_double_i16 337+; PPC970: uitofp_double_i16 338 %b.addr = alloca double, align 8 339 %conv = uitofp i16 %a to double 340 ; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 48 341@@ -187,6 +254,10 @@ entry: 342 ; ELF64: std 343 ; ELF64: lfd 344 ; ELF64: fcfidu 345+; PPC970: rlwinm {{[0-9]+}}, {{[0-9]+}}, 0, 16, 31 346+; PPC970: std 347+; PPC970: lfd 348+; PPC970: fcfid 349 store double %conv, double* %b.addr, align 8 350 ret void 351 } 352@@ -194,6 +265,7 @@ entry: 353 define void @uitofp_double_i8(i8 %a, double %b) nounwind ssp { 354 entry: 355 ; ELF64: uitofp_double_i8 356+; PPC970: uitofp_double_i8 357 %b.addr = alloca double, align 8 358 %conv = uitofp i8 %a to double 359 ; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 56 360@@ -200,6 +272,10 @@ entry: 361 ; ELF64: std 362 ; ELF64: lfd 363 ; ELF64: fcfidu 364+; PPC970: rlwinm {{[0-9]+}}, {{[0-9]+}}, 0, 24, 31 365+; PPC970: std 366+; PPC970: lfd 367+; PPC970: fcfid 368 store double %conv, double* %b.addr, align 8 369 ret void 370 } 371@@ -209,11 +285,15 @@ entry: 372 define void @fptosi_float_i32(float %a) nounwind ssp { 373 entry: 374 ; ELF64: fptosi_float_i32 375+; PPC970: fptosi_float_i32 376 %b.addr = alloca i32, align 4 377 %conv = fptosi float %a to i32 378 ; ELF64: fctiwz 379 ; ELF64: stfd 380 ; ELF64: lwa 381+; PPC970: fctiwz 382+; PPC970: stfd 383+; PPC970: lwa 384 store i32 %conv, i32* %b.addr, align 4 385 ret void 386 } 387@@ -221,11 +301,15 @@ entry: 388 define void @fptosi_float_i64(float %a) nounwind ssp { 389 entry: 390 ; ELF64: fptosi_float_i64 391+; PPC970: fptosi_float_i64 392 %b.addr = alloca i64, align 4 393 %conv = fptosi float %a to i64 394 ; ELF64: fctidz 395 ; ELF64: stfd 396 ; ELF64: ld 397+; PPC970: fctidz 398+; PPC970: stfd 399+; PPC970: ld 400 store i64 %conv, i64* %b.addr, align 4 401 ret void 402 } 403@@ -233,11 +317,15 @@ entry: 404 define void @fptosi_double_i32(double %a) nounwind ssp { 405 entry: 406 ; ELF64: fptosi_double_i32 407+; PPC970: fptosi_double_i32 408 %b.addr = alloca i32, align 8 409 %conv = fptosi double %a to i32 410 ; ELF64: fctiwz 411 ; ELF64: stfd 412 ; ELF64: lwa 413+; PPC970: fctiwz 414+; PPC970: stfd 415+; PPC970: lwa 416 store i32 %conv, i32* %b.addr, align 8 417 ret void 418 } 419@@ -245,11 +333,15 @@ entry: 420 define void @fptosi_double_i64(double %a) nounwind ssp { 421 entry: 422 ; ELF64: fptosi_double_i64 423+; PPC970: fptosi_double_i64 424 %b.addr = alloca i64, align 8 425 %conv = fptosi double %a to i64 426 ; ELF64: fctidz 427 ; ELF64: stfd 428 ; ELF64: ld 429+; PPC970: fctidz 430+; PPC970: stfd 431+; PPC970: ld 432 store i64 %conv, i64* %b.addr, align 8 433 ret void 434 } 435@@ -259,11 +351,15 @@ entry: 436 define void @fptoui_float_i32(float %a) nounwind ssp { 437 entry: 438 ; ELF64: fptoui_float_i32 439+; PPC970: fptoui_float_i32 440 %b.addr = alloca i32, align 4 441 %conv = fptoui float %a to i32 442 ; ELF64: fctiwuz 443 ; ELF64: stfd 444 ; ELF64: lwz 445+; PPC970: fctidz 446+; PPC970: stfd 447+; PPC970: lwz 448 store i32 %conv, i32* %b.addr, align 4 449 ret void 450 } 451@@ -271,11 +367,13 @@ entry: 452 define void @fptoui_float_i64(float %a) nounwind ssp { 453 entry: 454 ; ELF64: fptoui_float_i64 455+; PPC970: fptoui_float_i64 456 %b.addr = alloca i64, align 4 457 %conv = fptoui float %a to i64 458 ; ELF64: fctiduz 459 ; ELF64: stfd 460 ; ELF64: ld 461+; PPC970-NOT: fctiduz 462 store i64 %conv, i64* %b.addr, align 4 463 ret void 464 } 465@@ -283,11 +381,15 @@ entry: 466 define void @fptoui_double_i32(double %a) nounwind ssp { 467 entry: 468 ; ELF64: fptoui_double_i32 469+; PPC970: fptoui_double_i32 470 %b.addr = alloca i32, align 8 471 %conv = fptoui double %a to i32 472 ; ELF64: fctiwuz 473 ; ELF64: stfd 474 ; ELF64: lwz 475+; PPC970: fctidz 476+; PPC970: stfd 477+; PPC970: lwz 478 store i32 %conv, i32* %b.addr, align 8 479 ret void 480 } 481@@ -295,11 +397,13 @@ entry: 482 define void @fptoui_double_i64(double %a) nounwind ssp { 483 entry: 484 ; ELF64: fptoui_double_i64 485+; PPC970: fptoui_double_i64 486 %b.addr = alloca i64, align 8 487 %conv = fptoui double %a to i64 488 ; ELF64: fctiduz 489 ; ELF64: stfd 490 ; ELF64: ld 491+; PPC970-NOT: fctiduz 492 store i64 %conv, i64* %b.addr, align 8 493 ret void 494 } 495