AArch64LegalizerInfo.cpp revision 360784
1//===- AArch64LegalizerInfo.cpp ----------------------------------*- C++ -*-==// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8/// \file 9/// This file implements the targeting of the Machinelegalizer class for 10/// AArch64. 11/// \todo This should be generated by TableGen. 12//===----------------------------------------------------------------------===// 13 14#include "AArch64LegalizerInfo.h" 15#include "AArch64Subtarget.h" 16#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h" 17#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" 18#include "llvm/CodeGen/GlobalISel/Utils.h" 19#include "llvm/CodeGen/MachineInstr.h" 20#include "llvm/CodeGen/MachineRegisterInfo.h" 21#include "llvm/CodeGen/TargetOpcodes.h" 22#include "llvm/CodeGen/ValueTypes.h" 23#include "llvm/IR/DerivedTypes.h" 24#include "llvm/IR/Type.h" 25 26#define DEBUG_TYPE "aarch64-legalinfo" 27 28using namespace llvm; 29using namespace LegalizeActions; 30using namespace LegalizeMutations; 31using namespace LegalityPredicates; 32 33AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) { 34 using namespace TargetOpcode; 35 const LLT p0 = LLT::pointer(0, 64); 36 const LLT s1 = LLT::scalar(1); 37 const LLT s8 = LLT::scalar(8); 38 const LLT s16 = LLT::scalar(16); 39 const LLT s32 = LLT::scalar(32); 40 const LLT s64 = LLT::scalar(64); 41 const LLT s128 = LLT::scalar(128); 42 const LLT s256 = LLT::scalar(256); 43 const LLT s512 = LLT::scalar(512); 44 const LLT v16s8 = LLT::vector(16, 8); 45 const LLT v8s8 = LLT::vector(8, 8); 46 const LLT v4s8 = LLT::vector(4, 8); 47 const LLT v8s16 = LLT::vector(8, 16); 48 const LLT v4s16 = LLT::vector(4, 16); 49 const LLT v2s16 = LLT::vector(2, 16); 50 const LLT v2s32 = LLT::vector(2, 32); 51 const LLT v4s32 = LLT::vector(4, 32); 52 const LLT v2s64 = LLT::vector(2, 64); 53 const LLT v2p0 = LLT::vector(2, p0); 54 55 // FIXME: support subtargets which have neon/fp-armv8 disabled. 56 if (!ST.hasNEON() || !ST.hasFPARMv8()) { 57 computeTables(); 58 return; 59 } 60 61 getActionDefinitionsBuilder(G_IMPLICIT_DEF) 62 .legalFor({p0, s1, s8, s16, s32, s64, v2s32, v4s32, v2s64}) 63 .clampScalar(0, s1, s64) 64 .widenScalarToNextPow2(0, 8) 65 .fewerElementsIf( 66 [=](const LegalityQuery &Query) { 67 return Query.Types[0].isVector() && 68 (Query.Types[0].getElementType() != s64 || 69 Query.Types[0].getNumElements() != 2); 70 }, 71 [=](const LegalityQuery &Query) { 72 LLT EltTy = Query.Types[0].getElementType(); 73 if (EltTy == s64) 74 return std::make_pair(0, LLT::vector(2, 64)); 75 return std::make_pair(0, EltTy); 76 }); 77 78 getActionDefinitionsBuilder(G_PHI) 79 .legalFor({p0, s16, s32, s64, v2s32, v4s32, v2s64}) 80 .clampScalar(0, s16, s64) 81 .widenScalarToNextPow2(0); 82 83 getActionDefinitionsBuilder(G_BSWAP) 84 .legalFor({s32, s64, v4s32, v2s32, v2s64}) 85 .clampScalar(0, s32, s64) 86 .widenScalarToNextPow2(0); 87 88 getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR}) 89 .legalFor({s32, s64, v2s32, v4s32, v2s64, v8s16, v16s8}) 90 .clampScalar(0, s32, s64) 91 .widenScalarToNextPow2(0) 92 .clampNumElements(0, v2s32, v4s32) 93 .clampNumElements(0, v2s64, v2s64) 94 .moreElementsToNextPow2(0); 95 96 getActionDefinitionsBuilder(G_SHL) 97 .legalFor({{s32, s32}, {s64, s64}, 98 {v2s32, v2s32}, {v4s32, v4s32}, {v2s64, v2s64}}) 99 .clampScalar(1, s32, s64) 100 .clampScalar(0, s32, s64) 101 .widenScalarToNextPow2(0) 102 .clampNumElements(0, v2s32, v4s32) 103 .clampNumElements(0, v2s64, v2s64) 104 .moreElementsToNextPow2(0) 105 .minScalarSameAs(1, 0); 106 107 getActionDefinitionsBuilder(G_PTR_ADD) 108 .legalFor({{p0, s64}}) 109 .clampScalar(1, s64, s64); 110 111 getActionDefinitionsBuilder(G_PTR_MASK).legalFor({p0}); 112 113 getActionDefinitionsBuilder({G_SDIV, G_UDIV}) 114 .legalFor({s32, s64}) 115 .libcallFor({s128}) 116 .clampScalar(0, s32, s64) 117 .widenScalarToNextPow2(0) 118 .scalarize(0); 119 120 getActionDefinitionsBuilder({G_LSHR, G_ASHR}) 121 .customIf([=](const LegalityQuery &Query) { 122 const auto &SrcTy = Query.Types[0]; 123 const auto &AmtTy = Query.Types[1]; 124 return !SrcTy.isVector() && SrcTy.getSizeInBits() == 32 && 125 AmtTy.getSizeInBits() == 32; 126 }) 127 .legalFor({{s32, s32}, 128 {s32, s64}, 129 {s64, s64}, 130 {v2s32, v2s32}, 131 {v4s32, v4s32}, 132 {v2s64, v2s64}}) 133 .clampScalar(1, s32, s64) 134 .clampScalar(0, s32, s64) 135 .minScalarSameAs(1, 0); 136 137 getActionDefinitionsBuilder({G_SREM, G_UREM}) 138 .lowerFor({s1, s8, s16, s32, s64}); 139 140 getActionDefinitionsBuilder({G_SMULO, G_UMULO}) 141 .lowerFor({{s64, s1}}); 142 143 getActionDefinitionsBuilder({G_SMULH, G_UMULH}).legalFor({s32, s64}); 144 145 getActionDefinitionsBuilder({G_UADDE, G_USUBE, G_SADDO, G_SSUBO, G_UADDO}) 146 .legalFor({{s32, s1}, {s64, s1}}) 147 .minScalar(0, s32); 148 149 getActionDefinitionsBuilder({G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FNEG}) 150 .legalFor({s32, s64, v2s64, v4s32, v2s32}); 151 152 getActionDefinitionsBuilder(G_FREM).libcallFor({s32, s64}); 153 154 getActionDefinitionsBuilder({G_FCEIL, G_FABS, G_FSQRT, G_FFLOOR, G_FRINT, 155 G_FMA, G_INTRINSIC_TRUNC, G_INTRINSIC_ROUND, 156 G_FNEARBYINT}) 157 // If we don't have full FP16 support, then scalarize the elements of 158 // vectors containing fp16 types. 159 .fewerElementsIf( 160 [=, &ST](const LegalityQuery &Query) { 161 const auto &Ty = Query.Types[0]; 162 return Ty.isVector() && Ty.getElementType() == s16 && 163 !ST.hasFullFP16(); 164 }, 165 [=](const LegalityQuery &Query) { return std::make_pair(0, s16); }) 166 // If we don't have full FP16 support, then widen s16 to s32 if we 167 // encounter it. 168 .widenScalarIf( 169 [=, &ST](const LegalityQuery &Query) { 170 return Query.Types[0] == s16 && !ST.hasFullFP16(); 171 }, 172 [=](const LegalityQuery &Query) { return std::make_pair(0, s32); }) 173 .legalFor({s16, s32, s64, v2s32, v4s32, v2s64, v2s16, v4s16, v8s16}); 174 175 getActionDefinitionsBuilder( 176 {G_FCOS, G_FSIN, G_FLOG10, G_FLOG, G_FLOG2, G_FEXP, G_FEXP2, G_FPOW}) 177 // We need a call for these, so we always need to scalarize. 178 .scalarize(0) 179 // Regardless of FP16 support, widen 16-bit elements to 32-bits. 180 .minScalar(0, s32) 181 .libcallFor({s32, s64, v2s32, v4s32, v2s64}); 182 183 getActionDefinitionsBuilder(G_INSERT) 184 .unsupportedIf([=](const LegalityQuery &Query) { 185 return Query.Types[0].getSizeInBits() <= Query.Types[1].getSizeInBits(); 186 }) 187 .legalIf([=](const LegalityQuery &Query) { 188 const LLT &Ty0 = Query.Types[0]; 189 const LLT &Ty1 = Query.Types[1]; 190 if (Ty0 != s32 && Ty0 != s64 && Ty0 != p0) 191 return false; 192 return isPowerOf2_32(Ty1.getSizeInBits()) && 193 (Ty1.getSizeInBits() == 1 || Ty1.getSizeInBits() >= 8); 194 }) 195 .clampScalar(0, s32, s64) 196 .widenScalarToNextPow2(0) 197 .maxScalarIf(typeInSet(0, {s32}), 1, s16) 198 .maxScalarIf(typeInSet(0, {s64}), 1, s32) 199 .widenScalarToNextPow2(1); 200 201 getActionDefinitionsBuilder(G_EXTRACT) 202 .unsupportedIf([=](const LegalityQuery &Query) { 203 return Query.Types[0].getSizeInBits() >= Query.Types[1].getSizeInBits(); 204 }) 205 .legalIf([=](const LegalityQuery &Query) { 206 const LLT &Ty0 = Query.Types[0]; 207 const LLT &Ty1 = Query.Types[1]; 208 if (Ty1 != s32 && Ty1 != s64 && Ty1 != s128) 209 return false; 210 if (Ty1 == p0) 211 return true; 212 return isPowerOf2_32(Ty0.getSizeInBits()) && 213 (Ty0.getSizeInBits() == 1 || Ty0.getSizeInBits() >= 8); 214 }) 215 .clampScalar(1, s32, s128) 216 .widenScalarToNextPow2(1) 217 .maxScalarIf(typeInSet(1, {s32}), 0, s16) 218 .maxScalarIf(typeInSet(1, {s64}), 0, s32) 219 .widenScalarToNextPow2(0); 220 221 getActionDefinitionsBuilder({G_SEXTLOAD, G_ZEXTLOAD}) 222 .legalForTypesWithMemDesc({{s32, p0, 8, 8}, 223 {s32, p0, 16, 8}, 224 {s32, p0, 32, 8}, 225 {s64, p0, 8, 2}, 226 {s64, p0, 16, 2}, 227 {s64, p0, 32, 4}, 228 {s64, p0, 64, 8}, 229 {p0, p0, 64, 8}, 230 {v2s32, p0, 64, 8}}) 231 .clampScalar(0, s32, s64) 232 .widenScalarToNextPow2(0) 233 // TODO: We could support sum-of-pow2's but the lowering code doesn't know 234 // how to do that yet. 235 .unsupportedIfMemSizeNotPow2() 236 // Lower anything left over into G_*EXT and G_LOAD 237 .lower(); 238 239 auto IsPtrVecPred = [=](const LegalityQuery &Query) { 240 const LLT &ValTy = Query.Types[0]; 241 if (!ValTy.isVector()) 242 return false; 243 const LLT EltTy = ValTy.getElementType(); 244 return EltTy.isPointer() && EltTy.getAddressSpace() == 0; 245 }; 246 247 getActionDefinitionsBuilder(G_LOAD) 248 .legalForTypesWithMemDesc({{s8, p0, 8, 8}, 249 {s16, p0, 16, 8}, 250 {s32, p0, 32, 8}, 251 {s64, p0, 64, 8}, 252 {p0, p0, 64, 8}, 253 {s128, p0, 128, 8}, 254 {v8s8, p0, 64, 8}, 255 {v16s8, p0, 128, 8}, 256 {v4s16, p0, 64, 8}, 257 {v8s16, p0, 128, 8}, 258 {v2s32, p0, 64, 8}, 259 {v4s32, p0, 128, 8}, 260 {v2s64, p0, 128, 8}}) 261 // These extends are also legal 262 .legalForTypesWithMemDesc({{s32, p0, 8, 8}, 263 {s32, p0, 16, 8}}) 264 .clampScalar(0, s8, s64) 265 .lowerIfMemSizeNotPow2() 266 // Lower any any-extending loads left into G_ANYEXT and G_LOAD 267 .lowerIf([=](const LegalityQuery &Query) { 268 return Query.Types[0].getSizeInBits() != Query.MMODescrs[0].SizeInBits; 269 }) 270 .widenScalarToNextPow2(0) 271 .clampMaxNumElements(0, s32, 2) 272 .clampMaxNumElements(0, s64, 1) 273 .customIf(IsPtrVecPred); 274 275 getActionDefinitionsBuilder(G_STORE) 276 .legalForTypesWithMemDesc({{s8, p0, 8, 8}, 277 {s16, p0, 16, 8}, 278 {s32, p0, 8, 8}, 279 {s32, p0, 16, 8}, 280 {s32, p0, 32, 8}, 281 {s64, p0, 64, 8}, 282 {p0, p0, 64, 8}, 283 {s128, p0, 128, 8}, 284 {v16s8, p0, 128, 8}, 285 {v4s16, p0, 64, 8}, 286 {v8s16, p0, 128, 8}, 287 {v2s32, p0, 64, 8}, 288 {v4s32, p0, 128, 8}, 289 {v2s64, p0, 128, 8}}) 290 .clampScalar(0, s8, s64) 291 .lowerIfMemSizeNotPow2() 292 .lowerIf([=](const LegalityQuery &Query) { 293 return Query.Types[0].isScalar() && 294 Query.Types[0].getSizeInBits() != Query.MMODescrs[0].SizeInBits; 295 }) 296 .clampMaxNumElements(0, s32, 2) 297 .clampMaxNumElements(0, s64, 1) 298 .customIf(IsPtrVecPred); 299 300 // Constants 301 getActionDefinitionsBuilder(G_CONSTANT) 302 .legalFor({p0, s8, s16, s32, s64}) 303 .clampScalar(0, s8, s64) 304 .widenScalarToNextPow2(0); 305 getActionDefinitionsBuilder(G_FCONSTANT) 306 .legalFor({s32, s64}) 307 .clampScalar(0, s32, s64); 308 309 getActionDefinitionsBuilder(G_ICMP) 310 .legalFor({{s32, s32}, 311 {s32, s64}, 312 {s32, p0}, 313 {v4s32, v4s32}, 314 {v2s32, v2s32}, 315 {v2s64, v2s64}, 316 {v2s64, v2p0}, 317 {v4s16, v4s16}, 318 {v8s16, v8s16}, 319 {v8s8, v8s8}, 320 {v16s8, v16s8}}) 321 .clampScalar(1, s32, s64) 322 .clampScalar(0, s32, s32) 323 .minScalarEltSameAsIf( 324 [=](const LegalityQuery &Query) { 325 const LLT &Ty = Query.Types[0]; 326 const LLT &SrcTy = Query.Types[1]; 327 return Ty.isVector() && !SrcTy.getElementType().isPointer() && 328 Ty.getElementType() != SrcTy.getElementType(); 329 }, 330 0, 1) 331 .minScalarOrEltIf( 332 [=](const LegalityQuery &Query) { return Query.Types[1] == v2s16; }, 333 1, s32) 334 .minScalarOrEltIf( 335 [=](const LegalityQuery &Query) { return Query.Types[1] == v2p0; }, 0, 336 s64) 337 .widenScalarOrEltToNextPow2(1); 338 339 getActionDefinitionsBuilder(G_FCMP) 340 .legalFor({{s32, s32}, {s32, s64}}) 341 .clampScalar(0, s32, s32) 342 .clampScalar(1, s32, s64) 343 .widenScalarToNextPow2(1); 344 345 // Extensions 346 auto ExtLegalFunc = [=](const LegalityQuery &Query) { 347 unsigned DstSize = Query.Types[0].getSizeInBits(); 348 349 if (DstSize == 128 && !Query.Types[0].isVector()) 350 return false; // Extending to a scalar s128 needs narrowing. 351 352 // Make sure that we have something that will fit in a register, and 353 // make sure it's a power of 2. 354 if (DstSize < 8 || DstSize > 128 || !isPowerOf2_32(DstSize)) 355 return false; 356 357 const LLT &SrcTy = Query.Types[1]; 358 359 // Special case for s1. 360 if (SrcTy == s1) 361 return true; 362 363 // Make sure we fit in a register otherwise. Don't bother checking that 364 // the source type is below 128 bits. We shouldn't be allowing anything 365 // through which is wider than the destination in the first place. 366 unsigned SrcSize = SrcTy.getSizeInBits(); 367 if (SrcSize < 8 || !isPowerOf2_32(SrcSize)) 368 return false; 369 370 return true; 371 }; 372 getActionDefinitionsBuilder({G_ZEXT, G_SEXT, G_ANYEXT}) 373 .legalIf(ExtLegalFunc) 374 .clampScalar(0, s64, s64); // Just for s128, others are handled above. 375 376 getActionDefinitionsBuilder(G_TRUNC).alwaysLegal(); 377 378 getActionDefinitionsBuilder(G_SEXT_INREG).lower(); 379 380 // FP conversions 381 getActionDefinitionsBuilder(G_FPTRUNC).legalFor( 382 {{s16, s32}, {s16, s64}, {s32, s64}, {v4s16, v4s32}, {v2s32, v2s64}}); 383 getActionDefinitionsBuilder(G_FPEXT).legalFor( 384 {{s32, s16}, {s64, s16}, {s64, s32}, {v4s32, v4s16}, {v2s64, v2s32}}); 385 386 // Conversions 387 getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI}) 388 .legalForCartesianProduct({s32, s64, v2s64, v4s32, v2s32}) 389 .clampScalar(0, s32, s64) 390 .widenScalarToNextPow2(0) 391 .clampScalar(1, s32, s64) 392 .widenScalarToNextPow2(1); 393 394 getActionDefinitionsBuilder({G_SITOFP, G_UITOFP}) 395 .legalForCartesianProduct({s32, s64, v2s64, v4s32, v2s32}) 396 .clampScalar(1, s32, s64) 397 .widenScalarToNextPow2(1) 398 .clampScalar(0, s32, s64) 399 .widenScalarToNextPow2(0); 400 401 // Control-flow 402 getActionDefinitionsBuilder(G_BRCOND).legalFor({s1, s8, s16, s32}); 403 getActionDefinitionsBuilder(G_BRINDIRECT).legalFor({p0}); 404 405 // Select 406 // FIXME: We can probably do a bit better than just scalarizing vector 407 // selects. 408 getActionDefinitionsBuilder(G_SELECT) 409 .legalFor({{s32, s1}, {s64, s1}, {p0, s1}}) 410 .clampScalar(0, s32, s64) 411 .widenScalarToNextPow2(0) 412 .scalarize(0); 413 414 // Pointer-handling 415 getActionDefinitionsBuilder(G_FRAME_INDEX).legalFor({p0}); 416 getActionDefinitionsBuilder(G_GLOBAL_VALUE).legalFor({p0}); 417 418 getActionDefinitionsBuilder(G_PTRTOINT) 419 .legalForCartesianProduct({s1, s8, s16, s32, s64}, {p0}) 420 .maxScalar(0, s64) 421 .widenScalarToNextPow2(0, /*Min*/ 8); 422 423 getActionDefinitionsBuilder(G_INTTOPTR) 424 .unsupportedIf([&](const LegalityQuery &Query) { 425 return Query.Types[0].getSizeInBits() != Query.Types[1].getSizeInBits(); 426 }) 427 .legalFor({{p0, s64}}); 428 429 // Casts for 32 and 64-bit width type are just copies. 430 // Same for 128-bit width type, except they are on the FPR bank. 431 getActionDefinitionsBuilder(G_BITCAST) 432 // FIXME: This is wrong since G_BITCAST is not allowed to change the 433 // number of bits but it's what the previous code described and fixing 434 // it breaks tests. 435 .legalForCartesianProduct({s1, s8, s16, s32, s64, s128, v16s8, v8s8, v4s8, 436 v8s16, v4s16, v2s16, v4s32, v2s32, v2s64, 437 v2p0}); 438 439 getActionDefinitionsBuilder(G_VASTART).legalFor({p0}); 440 441 // va_list must be a pointer, but most sized types are pretty easy to handle 442 // as the destination. 443 getActionDefinitionsBuilder(G_VAARG) 444 .customForCartesianProduct({s8, s16, s32, s64, p0}, {p0}) 445 .clampScalar(0, s8, s64) 446 .widenScalarToNextPow2(0, /*Min*/ 8); 447 448 if (ST.hasLSE()) { 449 getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG_WITH_SUCCESS) 450 .lowerIf(all( 451 typeInSet(0, {s8, s16, s32, s64}), typeIs(1, s1), typeIs(2, p0), 452 atomicOrderingAtLeastOrStrongerThan(0, AtomicOrdering::Monotonic))); 453 454 getActionDefinitionsBuilder( 455 {G_ATOMICRMW_XCHG, G_ATOMICRMW_ADD, G_ATOMICRMW_SUB, G_ATOMICRMW_AND, 456 G_ATOMICRMW_OR, G_ATOMICRMW_XOR, G_ATOMICRMW_MIN, G_ATOMICRMW_MAX, 457 G_ATOMICRMW_UMIN, G_ATOMICRMW_UMAX, G_ATOMIC_CMPXCHG}) 458 .legalIf(all( 459 typeInSet(0, {s8, s16, s32, s64}), typeIs(1, p0), 460 atomicOrderingAtLeastOrStrongerThan(0, AtomicOrdering::Monotonic))); 461 } 462 463 getActionDefinitionsBuilder(G_BLOCK_ADDR).legalFor({p0}); 464 465 // Merge/Unmerge 466 for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) { 467 unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1; 468 unsigned LitTyIdx = Op == G_MERGE_VALUES ? 1 : 0; 469 470 auto notValidElt = [](const LegalityQuery &Query, unsigned TypeIdx) { 471 const LLT &Ty = Query.Types[TypeIdx]; 472 if (Ty.isVector()) { 473 const LLT &EltTy = Ty.getElementType(); 474 if (EltTy.getSizeInBits() < 8 || EltTy.getSizeInBits() > 64) 475 return true; 476 if (!isPowerOf2_32(EltTy.getSizeInBits())) 477 return true; 478 } 479 return false; 480 }; 481 482 // FIXME: This rule is horrible, but specifies the same as what we had 483 // before with the particularly strange definitions removed (e.g. 484 // s8 = G_MERGE_VALUES s32, s32). 485 // Part of the complexity comes from these ops being extremely flexible. For 486 // example, you can build/decompose vectors with it, concatenate vectors, 487 // etc. and in addition to this you can also bitcast with it at the same 488 // time. We've been considering breaking it up into multiple ops to make it 489 // more manageable throughout the backend. 490 getActionDefinitionsBuilder(Op) 491 // Break up vectors with weird elements into scalars 492 .fewerElementsIf( 493 [=](const LegalityQuery &Query) { return notValidElt(Query, 0); }, 494 scalarize(0)) 495 .fewerElementsIf( 496 [=](const LegalityQuery &Query) { return notValidElt(Query, 1); }, 497 scalarize(1)) 498 // Clamp the big scalar to s8-s512 and make it either a power of 2, 192, 499 // or 384. 500 .clampScalar(BigTyIdx, s8, s512) 501 .widenScalarIf( 502 [=](const LegalityQuery &Query) { 503 const LLT &Ty = Query.Types[BigTyIdx]; 504 return !isPowerOf2_32(Ty.getSizeInBits()) && 505 Ty.getSizeInBits() % 64 != 0; 506 }, 507 [=](const LegalityQuery &Query) { 508 // Pick the next power of 2, or a multiple of 64 over 128. 509 // Whichever is smaller. 510 const LLT &Ty = Query.Types[BigTyIdx]; 511 unsigned NewSizeInBits = 1 512 << Log2_32_Ceil(Ty.getSizeInBits() + 1); 513 if (NewSizeInBits >= 256) { 514 unsigned RoundedTo = alignTo<64>(Ty.getSizeInBits() + 1); 515 if (RoundedTo < NewSizeInBits) 516 NewSizeInBits = RoundedTo; 517 } 518 return std::make_pair(BigTyIdx, LLT::scalar(NewSizeInBits)); 519 }) 520 // Clamp the little scalar to s8-s256 and make it a power of 2. It's not 521 // worth considering the multiples of 64 since 2*192 and 2*384 are not 522 // valid. 523 .clampScalar(LitTyIdx, s8, s256) 524 .widenScalarToNextPow2(LitTyIdx, /*Min*/ 8) 525 // So at this point, we have s8, s16, s32, s64, s128, s192, s256, s384, 526 // s512, <X x s8>, <X x s16>, <X x s32>, or <X x s64>. 527 // At this point it's simple enough to accept the legal types. 528 .legalIf([=](const LegalityQuery &Query) { 529 const LLT &BigTy = Query.Types[BigTyIdx]; 530 const LLT &LitTy = Query.Types[LitTyIdx]; 531 if (BigTy.isVector() && BigTy.getSizeInBits() < 32) 532 return false; 533 if (LitTy.isVector() && LitTy.getSizeInBits() < 32) 534 return false; 535 return BigTy.getSizeInBits() % LitTy.getSizeInBits() == 0; 536 }) 537 // Any vectors left are the wrong size. Scalarize them. 538 .scalarize(0) 539 .scalarize(1); 540 } 541 542 getActionDefinitionsBuilder(G_EXTRACT_VECTOR_ELT) 543 .unsupportedIf([=](const LegalityQuery &Query) { 544 const LLT &EltTy = Query.Types[1].getElementType(); 545 return Query.Types[0] != EltTy; 546 }) 547 .minScalar(2, s64) 548 .legalIf([=](const LegalityQuery &Query) { 549 const LLT &VecTy = Query.Types[1]; 550 return VecTy == v2s16 || VecTy == v4s16 || VecTy == v8s16 || 551 VecTy == v4s32 || VecTy == v2s64 || VecTy == v2s32; 552 }); 553 554 getActionDefinitionsBuilder(G_INSERT_VECTOR_ELT) 555 .legalIf([=](const LegalityQuery &Query) { 556 const LLT &VecTy = Query.Types[0]; 557 // TODO: Support s8 and s16 558 return VecTy == v2s32 || VecTy == v4s32 || VecTy == v2s64; 559 }); 560 561 getActionDefinitionsBuilder(G_BUILD_VECTOR) 562 .legalFor({{v4s16, s16}, 563 {v8s16, s16}, 564 {v2s32, s32}, 565 {v4s32, s32}, 566 {v2p0, p0}, 567 {v2s64, s64}}) 568 .clampNumElements(0, v4s32, v4s32) 569 .clampNumElements(0, v2s64, v2s64) 570 571 // Deal with larger scalar types, which will be implicitly truncated. 572 .legalIf([=](const LegalityQuery &Query) { 573 return Query.Types[0].getScalarSizeInBits() < 574 Query.Types[1].getSizeInBits(); 575 }) 576 .minScalarSameAs(1, 0); 577 578 getActionDefinitionsBuilder(G_CTLZ).legalForCartesianProduct( 579 {s32, s64, v8s8, v16s8, v4s16, v8s16, v2s32, v4s32}) 580 .scalarize(1); 581 582 getActionDefinitionsBuilder(G_SHUFFLE_VECTOR) 583 .legalIf([=](const LegalityQuery &Query) { 584 const LLT &DstTy = Query.Types[0]; 585 const LLT &SrcTy = Query.Types[1]; 586 // For now just support the TBL2 variant which needs the source vectors 587 // to be the same size as the dest. 588 if (DstTy != SrcTy) 589 return false; 590 for (auto &Ty : {v2s32, v4s32, v2s64}) { 591 if (DstTy == Ty) 592 return true; 593 } 594 return false; 595 }) 596 // G_SHUFFLE_VECTOR can have scalar sources (from 1 x s vectors), we 597 // just want those lowered into G_BUILD_VECTOR 598 .lowerIf([=](const LegalityQuery &Query) { 599 return !Query.Types[1].isVector(); 600 }) 601 .clampNumElements(0, v4s32, v4s32) 602 .clampNumElements(0, v2s64, v2s64); 603 604 getActionDefinitionsBuilder(G_CONCAT_VECTORS) 605 .legalFor({{v4s32, v2s32}, {v8s16, v4s16}}); 606 607 getActionDefinitionsBuilder(G_JUMP_TABLE) 608 .legalFor({{p0}, {s64}}); 609 610 getActionDefinitionsBuilder(G_BRJT).legalIf([=](const LegalityQuery &Query) { 611 return Query.Types[0] == p0 && Query.Types[1] == s64; 612 }); 613 614 getActionDefinitionsBuilder(G_DYN_STACKALLOC).lower(); 615 616 computeTables(); 617 verify(*ST.getInstrInfo()); 618} 619 620bool AArch64LegalizerInfo::legalizeCustom(MachineInstr &MI, 621 MachineRegisterInfo &MRI, 622 MachineIRBuilder &MIRBuilder, 623 GISelChangeObserver &Observer) const { 624 switch (MI.getOpcode()) { 625 default: 626 // No idea what to do. 627 return false; 628 case TargetOpcode::G_VAARG: 629 return legalizeVaArg(MI, MRI, MIRBuilder); 630 case TargetOpcode::G_LOAD: 631 case TargetOpcode::G_STORE: 632 return legalizeLoadStore(MI, MRI, MIRBuilder, Observer); 633 case TargetOpcode::G_SHL: 634 case TargetOpcode::G_ASHR: 635 case TargetOpcode::G_LSHR: 636 return legalizeShlAshrLshr(MI, MRI, MIRBuilder, Observer); 637 } 638 639 llvm_unreachable("expected switch to return"); 640} 641 642bool AArch64LegalizerInfo::legalizeIntrinsic( 643 MachineInstr &MI, MachineRegisterInfo &MRI, 644 MachineIRBuilder &MIRBuilder) const { 645 switch (MI.getIntrinsicID()) { 646 case Intrinsic::memcpy: 647 case Intrinsic::memset: 648 case Intrinsic::memmove: 649 if (createMemLibcall(MIRBuilder, MRI, MI) == 650 LegalizerHelper::UnableToLegalize) 651 return false; 652 MI.eraseFromParent(); 653 return true; 654 default: 655 break; 656 } 657 return true; 658} 659 660bool AArch64LegalizerInfo::legalizeShlAshrLshr( 661 MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder, 662 GISelChangeObserver &Observer) const { 663 assert(MI.getOpcode() == TargetOpcode::G_ASHR || 664 MI.getOpcode() == TargetOpcode::G_LSHR || 665 MI.getOpcode() == TargetOpcode::G_SHL); 666 // If the shift amount is a G_CONSTANT, promote it to a 64 bit type so the 667 // imported patterns can select it later. Either way, it will be legal. 668 Register AmtReg = MI.getOperand(2).getReg(); 669 auto *CstMI = MRI.getVRegDef(AmtReg); 670 assert(CstMI && "expected to find a vreg def"); 671 if (CstMI->getOpcode() != TargetOpcode::G_CONSTANT) 672 return true; 673 // Check the shift amount is in range for an immediate form. 674 unsigned Amount = CstMI->getOperand(1).getCImm()->getZExtValue(); 675 if (Amount > 31) 676 return true; // This will have to remain a register variant. 677 assert(MRI.getType(AmtReg).getSizeInBits() == 32); 678 MIRBuilder.setInstr(MI); 679 auto ExtCst = MIRBuilder.buildZExt(LLT::scalar(64), AmtReg); 680 MI.getOperand(2).setReg(ExtCst.getReg(0)); 681 return true; 682} 683 684bool AArch64LegalizerInfo::legalizeLoadStore( 685 MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder, 686 GISelChangeObserver &Observer) const { 687 assert(MI.getOpcode() == TargetOpcode::G_STORE || 688 MI.getOpcode() == TargetOpcode::G_LOAD); 689 // Here we just try to handle vector loads/stores where our value type might 690 // have pointer elements, which the SelectionDAG importer can't handle. To 691 // allow the existing patterns for s64 to fire for p0, we just try to bitcast 692 // the value to use s64 types. 693 694 // Custom legalization requires the instruction, if not deleted, must be fully 695 // legalized. In order to allow further legalization of the inst, we create 696 // a new instruction and erase the existing one. 697 698 Register ValReg = MI.getOperand(0).getReg(); 699 const LLT ValTy = MRI.getType(ValReg); 700 701 if (!ValTy.isVector() || !ValTy.getElementType().isPointer() || 702 ValTy.getElementType().getAddressSpace() != 0) { 703 LLVM_DEBUG(dbgs() << "Tried to do custom legalization on wrong load/store"); 704 return false; 705 } 706 707 MIRBuilder.setInstr(MI); 708 unsigned PtrSize = ValTy.getElementType().getSizeInBits(); 709 const LLT NewTy = LLT::vector(ValTy.getNumElements(), PtrSize); 710 auto &MMO = **MI.memoperands_begin(); 711 if (MI.getOpcode() == TargetOpcode::G_STORE) { 712 auto Bitcast = MIRBuilder.buildBitcast({NewTy}, {ValReg}); 713 MIRBuilder.buildStore(Bitcast.getReg(0), MI.getOperand(1).getReg(), MMO); 714 } else { 715 Register NewReg = MRI.createGenericVirtualRegister(NewTy); 716 auto NewLoad = MIRBuilder.buildLoad(NewReg, MI.getOperand(1).getReg(), MMO); 717 MIRBuilder.buildBitcast({ValReg}, {NewLoad}); 718 } 719 MI.eraseFromParent(); 720 return true; 721} 722 723bool AArch64LegalizerInfo::legalizeVaArg(MachineInstr &MI, 724 MachineRegisterInfo &MRI, 725 MachineIRBuilder &MIRBuilder) const { 726 MIRBuilder.setInstr(MI); 727 MachineFunction &MF = MIRBuilder.getMF(); 728 unsigned Align = MI.getOperand(2).getImm(); 729 Register Dst = MI.getOperand(0).getReg(); 730 Register ListPtr = MI.getOperand(1).getReg(); 731 732 LLT PtrTy = MRI.getType(ListPtr); 733 LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits()); 734 735 const unsigned PtrSize = PtrTy.getSizeInBits() / 8; 736 Register List = MRI.createGenericVirtualRegister(PtrTy); 737 MIRBuilder.buildLoad( 738 List, ListPtr, 739 *MF.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOLoad, 740 PtrSize, /* Align = */ PtrSize)); 741 742 Register DstPtr; 743 if (Align > PtrSize) { 744 // Realign the list to the actual required alignment. 745 auto AlignMinus1 = MIRBuilder.buildConstant(IntPtrTy, Align - 1); 746 747 auto ListTmp = MIRBuilder.buildPtrAdd(PtrTy, List, AlignMinus1.getReg(0)); 748 749 DstPtr = MRI.createGenericVirtualRegister(PtrTy); 750 MIRBuilder.buildPtrMask(DstPtr, ListTmp, Log2_64(Align)); 751 } else 752 DstPtr = List; 753 754 uint64_t ValSize = MRI.getType(Dst).getSizeInBits() / 8; 755 MIRBuilder.buildLoad( 756 Dst, DstPtr, 757 *MF.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOLoad, 758 ValSize, std::max(Align, PtrSize))); 759 760 auto Size = MIRBuilder.buildConstant(IntPtrTy, alignTo(ValSize, PtrSize)); 761 762 auto NewList = MIRBuilder.buildPtrAdd(PtrTy, DstPtr, Size.getReg(0)); 763 764 MIRBuilder.buildStore( 765 NewList, ListPtr, 766 *MF.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOStore, 767 PtrSize, /* Align = */ PtrSize)); 768 769 MI.eraseFromParent(); 770 return true; 771} 772