AArch64LegalizerInfo.cpp revision 360784
1//===- AArch64LegalizerInfo.cpp ----------------------------------*- C++ -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the Machinelegalizer class for
10/// AArch64.
11/// \todo This should be generated by TableGen.
12//===----------------------------------------------------------------------===//
13
14#include "AArch64LegalizerInfo.h"
15#include "AArch64Subtarget.h"
16#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
17#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
18#include "llvm/CodeGen/GlobalISel/Utils.h"
19#include "llvm/CodeGen/MachineInstr.h"
20#include "llvm/CodeGen/MachineRegisterInfo.h"
21#include "llvm/CodeGen/TargetOpcodes.h"
22#include "llvm/CodeGen/ValueTypes.h"
23#include "llvm/IR/DerivedTypes.h"
24#include "llvm/IR/Type.h"
25
26#define DEBUG_TYPE "aarch64-legalinfo"
27
28using namespace llvm;
29using namespace LegalizeActions;
30using namespace LegalizeMutations;
31using namespace LegalityPredicates;
32
33AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {
34  using namespace TargetOpcode;
35  const LLT p0 = LLT::pointer(0, 64);
36  const LLT s1 = LLT::scalar(1);
37  const LLT s8 = LLT::scalar(8);
38  const LLT s16 = LLT::scalar(16);
39  const LLT s32 = LLT::scalar(32);
40  const LLT s64 = LLT::scalar(64);
41  const LLT s128 = LLT::scalar(128);
42  const LLT s256 = LLT::scalar(256);
43  const LLT s512 = LLT::scalar(512);
44  const LLT v16s8 = LLT::vector(16, 8);
45  const LLT v8s8 = LLT::vector(8, 8);
46  const LLT v4s8 = LLT::vector(4, 8);
47  const LLT v8s16 = LLT::vector(8, 16);
48  const LLT v4s16 = LLT::vector(4, 16);
49  const LLT v2s16 = LLT::vector(2, 16);
50  const LLT v2s32 = LLT::vector(2, 32);
51  const LLT v4s32 = LLT::vector(4, 32);
52  const LLT v2s64 = LLT::vector(2, 64);
53  const LLT v2p0 = LLT::vector(2, p0);
54
55  // FIXME: support subtargets which have neon/fp-armv8 disabled.
56  if (!ST.hasNEON() || !ST.hasFPARMv8()) {
57    computeTables();
58    return;
59  }
60
61  getActionDefinitionsBuilder(G_IMPLICIT_DEF)
62    .legalFor({p0, s1, s8, s16, s32, s64, v2s32, v4s32, v2s64})
63    .clampScalar(0, s1, s64)
64    .widenScalarToNextPow2(0, 8)
65    .fewerElementsIf(
66      [=](const LegalityQuery &Query) {
67        return Query.Types[0].isVector() &&
68          (Query.Types[0].getElementType() != s64 ||
69           Query.Types[0].getNumElements() != 2);
70      },
71      [=](const LegalityQuery &Query) {
72        LLT EltTy = Query.Types[0].getElementType();
73        if (EltTy == s64)
74          return std::make_pair(0, LLT::vector(2, 64));
75        return std::make_pair(0, EltTy);
76      });
77
78  getActionDefinitionsBuilder(G_PHI)
79      .legalFor({p0, s16, s32, s64, v2s32, v4s32, v2s64})
80      .clampScalar(0, s16, s64)
81      .widenScalarToNextPow2(0);
82
83  getActionDefinitionsBuilder(G_BSWAP)
84      .legalFor({s32, s64, v4s32, v2s32, v2s64})
85      .clampScalar(0, s32, s64)
86      .widenScalarToNextPow2(0);
87
88  getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR})
89      .legalFor({s32, s64, v2s32, v4s32, v2s64, v8s16, v16s8})
90      .clampScalar(0, s32, s64)
91      .widenScalarToNextPow2(0)
92      .clampNumElements(0, v2s32, v4s32)
93      .clampNumElements(0, v2s64, v2s64)
94      .moreElementsToNextPow2(0);
95
96  getActionDefinitionsBuilder(G_SHL)
97    .legalFor({{s32, s32}, {s64, s64},
98               {v2s32, v2s32}, {v4s32, v4s32}, {v2s64, v2s64}})
99    .clampScalar(1, s32, s64)
100    .clampScalar(0, s32, s64)
101    .widenScalarToNextPow2(0)
102    .clampNumElements(0, v2s32, v4s32)
103    .clampNumElements(0, v2s64, v2s64)
104    .moreElementsToNextPow2(0)
105    .minScalarSameAs(1, 0);
106
107  getActionDefinitionsBuilder(G_PTR_ADD)
108      .legalFor({{p0, s64}})
109      .clampScalar(1, s64, s64);
110
111  getActionDefinitionsBuilder(G_PTR_MASK).legalFor({p0});
112
113  getActionDefinitionsBuilder({G_SDIV, G_UDIV})
114      .legalFor({s32, s64})
115      .libcallFor({s128})
116      .clampScalar(0, s32, s64)
117      .widenScalarToNextPow2(0)
118      .scalarize(0);
119
120  getActionDefinitionsBuilder({G_LSHR, G_ASHR})
121      .customIf([=](const LegalityQuery &Query) {
122        const auto &SrcTy = Query.Types[0];
123        const auto &AmtTy = Query.Types[1];
124        return !SrcTy.isVector() && SrcTy.getSizeInBits() == 32 &&
125               AmtTy.getSizeInBits() == 32;
126      })
127      .legalFor({{s32, s32},
128                 {s32, s64},
129                 {s64, s64},
130                 {v2s32, v2s32},
131                 {v4s32, v4s32},
132                 {v2s64, v2s64}})
133      .clampScalar(1, s32, s64)
134      .clampScalar(0, s32, s64)
135      .minScalarSameAs(1, 0);
136
137  getActionDefinitionsBuilder({G_SREM, G_UREM})
138      .lowerFor({s1, s8, s16, s32, s64});
139
140  getActionDefinitionsBuilder({G_SMULO, G_UMULO})
141      .lowerFor({{s64, s1}});
142
143  getActionDefinitionsBuilder({G_SMULH, G_UMULH}).legalFor({s32, s64});
144
145  getActionDefinitionsBuilder({G_UADDE, G_USUBE, G_SADDO, G_SSUBO, G_UADDO})
146      .legalFor({{s32, s1}, {s64, s1}})
147      .minScalar(0, s32);
148
149  getActionDefinitionsBuilder({G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FNEG})
150    .legalFor({s32, s64, v2s64, v4s32, v2s32});
151
152  getActionDefinitionsBuilder(G_FREM).libcallFor({s32, s64});
153
154  getActionDefinitionsBuilder({G_FCEIL, G_FABS, G_FSQRT, G_FFLOOR, G_FRINT,
155                               G_FMA, G_INTRINSIC_TRUNC, G_INTRINSIC_ROUND,
156                               G_FNEARBYINT})
157      // If we don't have full FP16 support, then scalarize the elements of
158      // vectors containing fp16 types.
159      .fewerElementsIf(
160          [=, &ST](const LegalityQuery &Query) {
161            const auto &Ty = Query.Types[0];
162            return Ty.isVector() && Ty.getElementType() == s16 &&
163                   !ST.hasFullFP16();
164          },
165          [=](const LegalityQuery &Query) { return std::make_pair(0, s16); })
166      // If we don't have full FP16 support, then widen s16 to s32 if we
167      // encounter it.
168      .widenScalarIf(
169          [=, &ST](const LegalityQuery &Query) {
170            return Query.Types[0] == s16 && !ST.hasFullFP16();
171          },
172          [=](const LegalityQuery &Query) { return std::make_pair(0, s32); })
173      .legalFor({s16, s32, s64, v2s32, v4s32, v2s64, v2s16, v4s16, v8s16});
174
175  getActionDefinitionsBuilder(
176      {G_FCOS, G_FSIN, G_FLOG10, G_FLOG, G_FLOG2, G_FEXP, G_FEXP2, G_FPOW})
177      // We need a call for these, so we always need to scalarize.
178      .scalarize(0)
179      // Regardless of FP16 support, widen 16-bit elements to 32-bits.
180      .minScalar(0, s32)
181      .libcallFor({s32, s64, v2s32, v4s32, v2s64});
182
183  getActionDefinitionsBuilder(G_INSERT)
184      .unsupportedIf([=](const LegalityQuery &Query) {
185        return Query.Types[0].getSizeInBits() <= Query.Types[1].getSizeInBits();
186      })
187      .legalIf([=](const LegalityQuery &Query) {
188        const LLT &Ty0 = Query.Types[0];
189        const LLT &Ty1 = Query.Types[1];
190        if (Ty0 != s32 && Ty0 != s64 && Ty0 != p0)
191          return false;
192        return isPowerOf2_32(Ty1.getSizeInBits()) &&
193               (Ty1.getSizeInBits() == 1 || Ty1.getSizeInBits() >= 8);
194      })
195      .clampScalar(0, s32, s64)
196      .widenScalarToNextPow2(0)
197      .maxScalarIf(typeInSet(0, {s32}), 1, s16)
198      .maxScalarIf(typeInSet(0, {s64}), 1, s32)
199      .widenScalarToNextPow2(1);
200
201  getActionDefinitionsBuilder(G_EXTRACT)
202      .unsupportedIf([=](const LegalityQuery &Query) {
203        return Query.Types[0].getSizeInBits() >= Query.Types[1].getSizeInBits();
204      })
205      .legalIf([=](const LegalityQuery &Query) {
206        const LLT &Ty0 = Query.Types[0];
207        const LLT &Ty1 = Query.Types[1];
208        if (Ty1 != s32 && Ty1 != s64 && Ty1 != s128)
209          return false;
210        if (Ty1 == p0)
211          return true;
212        return isPowerOf2_32(Ty0.getSizeInBits()) &&
213               (Ty0.getSizeInBits() == 1 || Ty0.getSizeInBits() >= 8);
214      })
215      .clampScalar(1, s32, s128)
216      .widenScalarToNextPow2(1)
217      .maxScalarIf(typeInSet(1, {s32}), 0, s16)
218      .maxScalarIf(typeInSet(1, {s64}), 0, s32)
219      .widenScalarToNextPow2(0);
220
221  getActionDefinitionsBuilder({G_SEXTLOAD, G_ZEXTLOAD})
222      .legalForTypesWithMemDesc({{s32, p0, 8, 8},
223                                 {s32, p0, 16, 8},
224                                 {s32, p0, 32, 8},
225                                 {s64, p0, 8, 2},
226                                 {s64, p0, 16, 2},
227                                 {s64, p0, 32, 4},
228                                 {s64, p0, 64, 8},
229                                 {p0, p0, 64, 8},
230                                 {v2s32, p0, 64, 8}})
231      .clampScalar(0, s32, s64)
232      .widenScalarToNextPow2(0)
233      // TODO: We could support sum-of-pow2's but the lowering code doesn't know
234      //       how to do that yet.
235      .unsupportedIfMemSizeNotPow2()
236      // Lower anything left over into G_*EXT and G_LOAD
237      .lower();
238
239  auto IsPtrVecPred = [=](const LegalityQuery &Query) {
240    const LLT &ValTy = Query.Types[0];
241    if (!ValTy.isVector())
242      return false;
243    const LLT EltTy = ValTy.getElementType();
244    return EltTy.isPointer() && EltTy.getAddressSpace() == 0;
245  };
246
247  getActionDefinitionsBuilder(G_LOAD)
248      .legalForTypesWithMemDesc({{s8, p0, 8, 8},
249                                 {s16, p0, 16, 8},
250                                 {s32, p0, 32, 8},
251                                 {s64, p0, 64, 8},
252                                 {p0, p0, 64, 8},
253                                 {s128, p0, 128, 8},
254                                 {v8s8, p0, 64, 8},
255                                 {v16s8, p0, 128, 8},
256                                 {v4s16, p0, 64, 8},
257                                 {v8s16, p0, 128, 8},
258                                 {v2s32, p0, 64, 8},
259                                 {v4s32, p0, 128, 8},
260                                 {v2s64, p0, 128, 8}})
261      // These extends are also legal
262      .legalForTypesWithMemDesc({{s32, p0, 8, 8},
263                                 {s32, p0, 16, 8}})
264      .clampScalar(0, s8, s64)
265      .lowerIfMemSizeNotPow2()
266      // Lower any any-extending loads left into G_ANYEXT and G_LOAD
267      .lowerIf([=](const LegalityQuery &Query) {
268        return Query.Types[0].getSizeInBits() != Query.MMODescrs[0].SizeInBits;
269      })
270      .widenScalarToNextPow2(0)
271      .clampMaxNumElements(0, s32, 2)
272      .clampMaxNumElements(0, s64, 1)
273      .customIf(IsPtrVecPred);
274
275  getActionDefinitionsBuilder(G_STORE)
276      .legalForTypesWithMemDesc({{s8, p0, 8, 8},
277                                 {s16, p0, 16, 8},
278                                 {s32, p0, 8, 8},
279                                 {s32, p0, 16, 8},
280                                 {s32, p0, 32, 8},
281                                 {s64, p0, 64, 8},
282                                 {p0, p0, 64, 8},
283                                 {s128, p0, 128, 8},
284                                 {v16s8, p0, 128, 8},
285                                 {v4s16, p0, 64, 8},
286                                 {v8s16, p0, 128, 8},
287                                 {v2s32, p0, 64, 8},
288                                 {v4s32, p0, 128, 8},
289                                 {v2s64, p0, 128, 8}})
290      .clampScalar(0, s8, s64)
291      .lowerIfMemSizeNotPow2()
292      .lowerIf([=](const LegalityQuery &Query) {
293        return Query.Types[0].isScalar() &&
294               Query.Types[0].getSizeInBits() != Query.MMODescrs[0].SizeInBits;
295      })
296      .clampMaxNumElements(0, s32, 2)
297      .clampMaxNumElements(0, s64, 1)
298      .customIf(IsPtrVecPred);
299
300  // Constants
301  getActionDefinitionsBuilder(G_CONSTANT)
302    .legalFor({p0, s8, s16, s32, s64})
303      .clampScalar(0, s8, s64)
304      .widenScalarToNextPow2(0);
305  getActionDefinitionsBuilder(G_FCONSTANT)
306      .legalFor({s32, s64})
307      .clampScalar(0, s32, s64);
308
309  getActionDefinitionsBuilder(G_ICMP)
310      .legalFor({{s32, s32},
311                 {s32, s64},
312                 {s32, p0},
313                 {v4s32, v4s32},
314                 {v2s32, v2s32},
315                 {v2s64, v2s64},
316                 {v2s64, v2p0},
317                 {v4s16, v4s16},
318                 {v8s16, v8s16},
319                 {v8s8, v8s8},
320                 {v16s8, v16s8}})
321      .clampScalar(1, s32, s64)
322      .clampScalar(0, s32, s32)
323      .minScalarEltSameAsIf(
324          [=](const LegalityQuery &Query) {
325            const LLT &Ty = Query.Types[0];
326            const LLT &SrcTy = Query.Types[1];
327            return Ty.isVector() && !SrcTy.getElementType().isPointer() &&
328                   Ty.getElementType() != SrcTy.getElementType();
329          },
330          0, 1)
331      .minScalarOrEltIf(
332          [=](const LegalityQuery &Query) { return Query.Types[1] == v2s16; },
333          1, s32)
334      .minScalarOrEltIf(
335          [=](const LegalityQuery &Query) { return Query.Types[1] == v2p0; }, 0,
336          s64)
337      .widenScalarOrEltToNextPow2(1);
338
339  getActionDefinitionsBuilder(G_FCMP)
340      .legalFor({{s32, s32}, {s32, s64}})
341      .clampScalar(0, s32, s32)
342      .clampScalar(1, s32, s64)
343      .widenScalarToNextPow2(1);
344
345  // Extensions
346  auto ExtLegalFunc = [=](const LegalityQuery &Query) {
347    unsigned DstSize = Query.Types[0].getSizeInBits();
348
349    if (DstSize == 128 && !Query.Types[0].isVector())
350      return false; // Extending to a scalar s128 needs narrowing.
351
352    // Make sure that we have something that will fit in a register, and
353    // make sure it's a power of 2.
354    if (DstSize < 8 || DstSize > 128 || !isPowerOf2_32(DstSize))
355      return false;
356
357    const LLT &SrcTy = Query.Types[1];
358
359    // Special case for s1.
360    if (SrcTy == s1)
361      return true;
362
363    // Make sure we fit in a register otherwise. Don't bother checking that
364    // the source type is below 128 bits. We shouldn't be allowing anything
365    // through which is wider than the destination in the first place.
366    unsigned SrcSize = SrcTy.getSizeInBits();
367    if (SrcSize < 8 || !isPowerOf2_32(SrcSize))
368      return false;
369
370    return true;
371  };
372  getActionDefinitionsBuilder({G_ZEXT, G_SEXT, G_ANYEXT})
373      .legalIf(ExtLegalFunc)
374      .clampScalar(0, s64, s64); // Just for s128, others are handled above.
375
376  getActionDefinitionsBuilder(G_TRUNC).alwaysLegal();
377
378  getActionDefinitionsBuilder(G_SEXT_INREG).lower();
379
380  // FP conversions
381  getActionDefinitionsBuilder(G_FPTRUNC).legalFor(
382      {{s16, s32}, {s16, s64}, {s32, s64}, {v4s16, v4s32}, {v2s32, v2s64}});
383  getActionDefinitionsBuilder(G_FPEXT).legalFor(
384      {{s32, s16}, {s64, s16}, {s64, s32}, {v4s32, v4s16}, {v2s64, v2s32}});
385
386  // Conversions
387  getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI})
388      .legalForCartesianProduct({s32, s64, v2s64, v4s32, v2s32})
389      .clampScalar(0, s32, s64)
390      .widenScalarToNextPow2(0)
391      .clampScalar(1, s32, s64)
392      .widenScalarToNextPow2(1);
393
394  getActionDefinitionsBuilder({G_SITOFP, G_UITOFP})
395      .legalForCartesianProduct({s32, s64, v2s64, v4s32, v2s32})
396      .clampScalar(1, s32, s64)
397      .widenScalarToNextPow2(1)
398      .clampScalar(0, s32, s64)
399      .widenScalarToNextPow2(0);
400
401  // Control-flow
402  getActionDefinitionsBuilder(G_BRCOND).legalFor({s1, s8, s16, s32});
403  getActionDefinitionsBuilder(G_BRINDIRECT).legalFor({p0});
404
405  // Select
406  // FIXME: We can probably do a bit better than just scalarizing vector
407  // selects.
408  getActionDefinitionsBuilder(G_SELECT)
409      .legalFor({{s32, s1}, {s64, s1}, {p0, s1}})
410      .clampScalar(0, s32, s64)
411      .widenScalarToNextPow2(0)
412      .scalarize(0);
413
414  // Pointer-handling
415  getActionDefinitionsBuilder(G_FRAME_INDEX).legalFor({p0});
416  getActionDefinitionsBuilder(G_GLOBAL_VALUE).legalFor({p0});
417
418  getActionDefinitionsBuilder(G_PTRTOINT)
419      .legalForCartesianProduct({s1, s8, s16, s32, s64}, {p0})
420      .maxScalar(0, s64)
421      .widenScalarToNextPow2(0, /*Min*/ 8);
422
423  getActionDefinitionsBuilder(G_INTTOPTR)
424      .unsupportedIf([&](const LegalityQuery &Query) {
425        return Query.Types[0].getSizeInBits() != Query.Types[1].getSizeInBits();
426      })
427      .legalFor({{p0, s64}});
428
429  // Casts for 32 and 64-bit width type are just copies.
430  // Same for 128-bit width type, except they are on the FPR bank.
431  getActionDefinitionsBuilder(G_BITCAST)
432      // FIXME: This is wrong since G_BITCAST is not allowed to change the
433      // number of bits but it's what the previous code described and fixing
434      // it breaks tests.
435      .legalForCartesianProduct({s1, s8, s16, s32, s64, s128, v16s8, v8s8, v4s8,
436                                 v8s16, v4s16, v2s16, v4s32, v2s32, v2s64,
437                                 v2p0});
438
439  getActionDefinitionsBuilder(G_VASTART).legalFor({p0});
440
441  // va_list must be a pointer, but most sized types are pretty easy to handle
442  // as the destination.
443  getActionDefinitionsBuilder(G_VAARG)
444      .customForCartesianProduct({s8, s16, s32, s64, p0}, {p0})
445      .clampScalar(0, s8, s64)
446      .widenScalarToNextPow2(0, /*Min*/ 8);
447
448  if (ST.hasLSE()) {
449    getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG_WITH_SUCCESS)
450        .lowerIf(all(
451            typeInSet(0, {s8, s16, s32, s64}), typeIs(1, s1), typeIs(2, p0),
452            atomicOrderingAtLeastOrStrongerThan(0, AtomicOrdering::Monotonic)));
453
454    getActionDefinitionsBuilder(
455        {G_ATOMICRMW_XCHG, G_ATOMICRMW_ADD, G_ATOMICRMW_SUB, G_ATOMICRMW_AND,
456         G_ATOMICRMW_OR, G_ATOMICRMW_XOR, G_ATOMICRMW_MIN, G_ATOMICRMW_MAX,
457         G_ATOMICRMW_UMIN, G_ATOMICRMW_UMAX, G_ATOMIC_CMPXCHG})
458        .legalIf(all(
459            typeInSet(0, {s8, s16, s32, s64}), typeIs(1, p0),
460            atomicOrderingAtLeastOrStrongerThan(0, AtomicOrdering::Monotonic)));
461  }
462
463  getActionDefinitionsBuilder(G_BLOCK_ADDR).legalFor({p0});
464
465  // Merge/Unmerge
466  for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) {
467    unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1;
468    unsigned LitTyIdx = Op == G_MERGE_VALUES ? 1 : 0;
469
470    auto notValidElt = [](const LegalityQuery &Query, unsigned TypeIdx) {
471      const LLT &Ty = Query.Types[TypeIdx];
472      if (Ty.isVector()) {
473        const LLT &EltTy = Ty.getElementType();
474        if (EltTy.getSizeInBits() < 8 || EltTy.getSizeInBits() > 64)
475          return true;
476        if (!isPowerOf2_32(EltTy.getSizeInBits()))
477          return true;
478      }
479      return false;
480    };
481
482    // FIXME: This rule is horrible, but specifies the same as what we had
483    // before with the particularly strange definitions removed (e.g.
484    // s8 = G_MERGE_VALUES s32, s32).
485    // Part of the complexity comes from these ops being extremely flexible. For
486    // example, you can build/decompose vectors with it, concatenate vectors,
487    // etc. and in addition to this you can also bitcast with it at the same
488    // time. We've been considering breaking it up into multiple ops to make it
489    // more manageable throughout the backend.
490    getActionDefinitionsBuilder(Op)
491        // Break up vectors with weird elements into scalars
492        .fewerElementsIf(
493            [=](const LegalityQuery &Query) { return notValidElt(Query, 0); },
494            scalarize(0))
495        .fewerElementsIf(
496            [=](const LegalityQuery &Query) { return notValidElt(Query, 1); },
497            scalarize(1))
498        // Clamp the big scalar to s8-s512 and make it either a power of 2, 192,
499        // or 384.
500        .clampScalar(BigTyIdx, s8, s512)
501        .widenScalarIf(
502            [=](const LegalityQuery &Query) {
503              const LLT &Ty = Query.Types[BigTyIdx];
504              return !isPowerOf2_32(Ty.getSizeInBits()) &&
505                     Ty.getSizeInBits() % 64 != 0;
506            },
507            [=](const LegalityQuery &Query) {
508              // Pick the next power of 2, or a multiple of 64 over 128.
509              // Whichever is smaller.
510              const LLT &Ty = Query.Types[BigTyIdx];
511              unsigned NewSizeInBits = 1
512                                       << Log2_32_Ceil(Ty.getSizeInBits() + 1);
513              if (NewSizeInBits >= 256) {
514                unsigned RoundedTo = alignTo<64>(Ty.getSizeInBits() + 1);
515                if (RoundedTo < NewSizeInBits)
516                  NewSizeInBits = RoundedTo;
517              }
518              return std::make_pair(BigTyIdx, LLT::scalar(NewSizeInBits));
519            })
520        // Clamp the little scalar to s8-s256 and make it a power of 2. It's not
521        // worth considering the multiples of 64 since 2*192 and 2*384 are not
522        // valid.
523        .clampScalar(LitTyIdx, s8, s256)
524        .widenScalarToNextPow2(LitTyIdx, /*Min*/ 8)
525        // So at this point, we have s8, s16, s32, s64, s128, s192, s256, s384,
526        // s512, <X x s8>, <X x s16>, <X x s32>, or <X x s64>.
527        // At this point it's simple enough to accept the legal types.
528        .legalIf([=](const LegalityQuery &Query) {
529          const LLT &BigTy = Query.Types[BigTyIdx];
530          const LLT &LitTy = Query.Types[LitTyIdx];
531          if (BigTy.isVector() && BigTy.getSizeInBits() < 32)
532            return false;
533          if (LitTy.isVector() && LitTy.getSizeInBits() < 32)
534            return false;
535          return BigTy.getSizeInBits() % LitTy.getSizeInBits() == 0;
536        })
537        // Any vectors left are the wrong size. Scalarize them.
538      .scalarize(0)
539      .scalarize(1);
540  }
541
542  getActionDefinitionsBuilder(G_EXTRACT_VECTOR_ELT)
543      .unsupportedIf([=](const LegalityQuery &Query) {
544        const LLT &EltTy = Query.Types[1].getElementType();
545        return Query.Types[0] != EltTy;
546      })
547      .minScalar(2, s64)
548      .legalIf([=](const LegalityQuery &Query) {
549        const LLT &VecTy = Query.Types[1];
550        return VecTy == v2s16 || VecTy == v4s16 || VecTy == v8s16 ||
551               VecTy == v4s32 || VecTy == v2s64 || VecTy == v2s32;
552      });
553
554  getActionDefinitionsBuilder(G_INSERT_VECTOR_ELT)
555      .legalIf([=](const LegalityQuery &Query) {
556        const LLT &VecTy = Query.Types[0];
557        // TODO: Support s8 and s16
558        return VecTy == v2s32 || VecTy == v4s32 || VecTy == v2s64;
559      });
560
561  getActionDefinitionsBuilder(G_BUILD_VECTOR)
562      .legalFor({{v4s16, s16},
563                 {v8s16, s16},
564                 {v2s32, s32},
565                 {v4s32, s32},
566                 {v2p0, p0},
567                 {v2s64, s64}})
568      .clampNumElements(0, v4s32, v4s32)
569      .clampNumElements(0, v2s64, v2s64)
570
571      // Deal with larger scalar types, which will be implicitly truncated.
572      .legalIf([=](const LegalityQuery &Query) {
573        return Query.Types[0].getScalarSizeInBits() <
574               Query.Types[1].getSizeInBits();
575      })
576      .minScalarSameAs(1, 0);
577
578  getActionDefinitionsBuilder(G_CTLZ).legalForCartesianProduct(
579      {s32, s64, v8s8, v16s8, v4s16, v8s16, v2s32, v4s32})
580      .scalarize(1);
581
582  getActionDefinitionsBuilder(G_SHUFFLE_VECTOR)
583      .legalIf([=](const LegalityQuery &Query) {
584        const LLT &DstTy = Query.Types[0];
585        const LLT &SrcTy = Query.Types[1];
586        // For now just support the TBL2 variant which needs the source vectors
587        // to be the same size as the dest.
588        if (DstTy != SrcTy)
589          return false;
590        for (auto &Ty : {v2s32, v4s32, v2s64}) {
591          if (DstTy == Ty)
592            return true;
593        }
594        return false;
595      })
596      // G_SHUFFLE_VECTOR can have scalar sources (from 1 x s vectors), we
597      // just want those lowered into G_BUILD_VECTOR
598      .lowerIf([=](const LegalityQuery &Query) {
599        return !Query.Types[1].isVector();
600      })
601      .clampNumElements(0, v4s32, v4s32)
602      .clampNumElements(0, v2s64, v2s64);
603
604  getActionDefinitionsBuilder(G_CONCAT_VECTORS)
605      .legalFor({{v4s32, v2s32}, {v8s16, v4s16}});
606
607  getActionDefinitionsBuilder(G_JUMP_TABLE)
608    .legalFor({{p0}, {s64}});
609
610  getActionDefinitionsBuilder(G_BRJT).legalIf([=](const LegalityQuery &Query) {
611    return Query.Types[0] == p0 && Query.Types[1] == s64;
612  });
613
614  getActionDefinitionsBuilder(G_DYN_STACKALLOC).lower();
615
616  computeTables();
617  verify(*ST.getInstrInfo());
618}
619
620bool AArch64LegalizerInfo::legalizeCustom(MachineInstr &MI,
621                                          MachineRegisterInfo &MRI,
622                                          MachineIRBuilder &MIRBuilder,
623                                          GISelChangeObserver &Observer) const {
624  switch (MI.getOpcode()) {
625  default:
626    // No idea what to do.
627    return false;
628  case TargetOpcode::G_VAARG:
629    return legalizeVaArg(MI, MRI, MIRBuilder);
630  case TargetOpcode::G_LOAD:
631  case TargetOpcode::G_STORE:
632    return legalizeLoadStore(MI, MRI, MIRBuilder, Observer);
633  case TargetOpcode::G_SHL:
634  case TargetOpcode::G_ASHR:
635  case TargetOpcode::G_LSHR:
636    return legalizeShlAshrLshr(MI, MRI, MIRBuilder, Observer);
637  }
638
639  llvm_unreachable("expected switch to return");
640}
641
642bool AArch64LegalizerInfo::legalizeIntrinsic(
643    MachineInstr &MI, MachineRegisterInfo &MRI,
644    MachineIRBuilder &MIRBuilder) const {
645  switch (MI.getIntrinsicID()) {
646  case Intrinsic::memcpy:
647  case Intrinsic::memset:
648  case Intrinsic::memmove:
649    if (createMemLibcall(MIRBuilder, MRI, MI) ==
650        LegalizerHelper::UnableToLegalize)
651      return false;
652    MI.eraseFromParent();
653    return true;
654  default:
655    break;
656  }
657  return true;
658}
659
660bool AArch64LegalizerInfo::legalizeShlAshrLshr(
661    MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder,
662    GISelChangeObserver &Observer) const {
663  assert(MI.getOpcode() == TargetOpcode::G_ASHR ||
664         MI.getOpcode() == TargetOpcode::G_LSHR ||
665         MI.getOpcode() == TargetOpcode::G_SHL);
666  // If the shift amount is a G_CONSTANT, promote it to a 64 bit type so the
667  // imported patterns can select it later. Either way, it will be legal.
668  Register AmtReg = MI.getOperand(2).getReg();
669  auto *CstMI = MRI.getVRegDef(AmtReg);
670  assert(CstMI && "expected to find a vreg def");
671  if (CstMI->getOpcode() != TargetOpcode::G_CONSTANT)
672    return true;
673  // Check the shift amount is in range for an immediate form.
674  unsigned Amount = CstMI->getOperand(1).getCImm()->getZExtValue();
675  if (Amount > 31)
676    return true; // This will have to remain a register variant.
677  assert(MRI.getType(AmtReg).getSizeInBits() == 32);
678  MIRBuilder.setInstr(MI);
679  auto ExtCst = MIRBuilder.buildZExt(LLT::scalar(64), AmtReg);
680  MI.getOperand(2).setReg(ExtCst.getReg(0));
681  return true;
682}
683
684bool AArch64LegalizerInfo::legalizeLoadStore(
685    MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder,
686    GISelChangeObserver &Observer) const {
687  assert(MI.getOpcode() == TargetOpcode::G_STORE ||
688         MI.getOpcode() == TargetOpcode::G_LOAD);
689  // Here we just try to handle vector loads/stores where our value type might
690  // have pointer elements, which the SelectionDAG importer can't handle. To
691  // allow the existing patterns for s64 to fire for p0, we just try to bitcast
692  // the value to use s64 types.
693
694  // Custom legalization requires the instruction, if not deleted, must be fully
695  // legalized. In order to allow further legalization of the inst, we create
696  // a new instruction and erase the existing one.
697
698  Register ValReg = MI.getOperand(0).getReg();
699  const LLT ValTy = MRI.getType(ValReg);
700
701  if (!ValTy.isVector() || !ValTy.getElementType().isPointer() ||
702      ValTy.getElementType().getAddressSpace() != 0) {
703    LLVM_DEBUG(dbgs() << "Tried to do custom legalization on wrong load/store");
704    return false;
705  }
706
707  MIRBuilder.setInstr(MI);
708  unsigned PtrSize = ValTy.getElementType().getSizeInBits();
709  const LLT NewTy = LLT::vector(ValTy.getNumElements(), PtrSize);
710  auto &MMO = **MI.memoperands_begin();
711  if (MI.getOpcode() == TargetOpcode::G_STORE) {
712    auto Bitcast = MIRBuilder.buildBitcast({NewTy}, {ValReg});
713    MIRBuilder.buildStore(Bitcast.getReg(0), MI.getOperand(1).getReg(), MMO);
714  } else {
715    Register NewReg = MRI.createGenericVirtualRegister(NewTy);
716    auto NewLoad = MIRBuilder.buildLoad(NewReg, MI.getOperand(1).getReg(), MMO);
717    MIRBuilder.buildBitcast({ValReg}, {NewLoad});
718  }
719  MI.eraseFromParent();
720  return true;
721}
722
723bool AArch64LegalizerInfo::legalizeVaArg(MachineInstr &MI,
724                                         MachineRegisterInfo &MRI,
725                                         MachineIRBuilder &MIRBuilder) const {
726  MIRBuilder.setInstr(MI);
727  MachineFunction &MF = MIRBuilder.getMF();
728  unsigned Align = MI.getOperand(2).getImm();
729  Register Dst = MI.getOperand(0).getReg();
730  Register ListPtr = MI.getOperand(1).getReg();
731
732  LLT PtrTy = MRI.getType(ListPtr);
733  LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits());
734
735  const unsigned PtrSize = PtrTy.getSizeInBits() / 8;
736  Register List = MRI.createGenericVirtualRegister(PtrTy);
737  MIRBuilder.buildLoad(
738      List, ListPtr,
739      *MF.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOLoad,
740                               PtrSize, /* Align = */ PtrSize));
741
742  Register DstPtr;
743  if (Align > PtrSize) {
744    // Realign the list to the actual required alignment.
745    auto AlignMinus1 = MIRBuilder.buildConstant(IntPtrTy, Align - 1);
746
747    auto ListTmp = MIRBuilder.buildPtrAdd(PtrTy, List, AlignMinus1.getReg(0));
748
749    DstPtr = MRI.createGenericVirtualRegister(PtrTy);
750    MIRBuilder.buildPtrMask(DstPtr, ListTmp, Log2_64(Align));
751  } else
752    DstPtr = List;
753
754  uint64_t ValSize = MRI.getType(Dst).getSizeInBits() / 8;
755  MIRBuilder.buildLoad(
756      Dst, DstPtr,
757      *MF.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOLoad,
758                               ValSize, std::max(Align, PtrSize)));
759
760  auto Size = MIRBuilder.buildConstant(IntPtrTy, alignTo(ValSize, PtrSize));
761
762  auto NewList = MIRBuilder.buildPtrAdd(PtrTy, DstPtr, Size.getReg(0));
763
764  MIRBuilder.buildStore(
765      NewList, ListPtr,
766      *MF.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOStore,
767                               PtrSize, /* Align = */ PtrSize));
768
769  MI.eraseFromParent();
770  return true;
771}
772