1//===-- TargetLowering.cpp - Implement the TargetLowering class -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This implements the TargetLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "llvm/CodeGen/TargetLowering.h"
14#include "llvm/ADT/STLExtras.h"
15#include "llvm/Analysis/VectorUtils.h"
16#include "llvm/CodeGen/CallingConvLower.h"
17#include "llvm/CodeGen/CodeGenCommonISel.h"
18#include "llvm/CodeGen/MachineFrameInfo.h"
19#include "llvm/CodeGen/MachineFunction.h"
20#include "llvm/CodeGen/MachineJumpTableInfo.h"
21#include "llvm/CodeGen/MachineModuleInfoImpls.h"
22#include "llvm/CodeGen/MachineRegisterInfo.h"
23#include "llvm/CodeGen/SelectionDAG.h"
24#include "llvm/CodeGen/TargetRegisterInfo.h"
25#include "llvm/IR/DataLayout.h"
26#include "llvm/IR/DerivedTypes.h"
27#include "llvm/IR/GlobalVariable.h"
28#include "llvm/IR/LLVMContext.h"
29#include "llvm/MC/MCAsmInfo.h"
30#include "llvm/MC/MCExpr.h"
31#include "llvm/Support/DivisionByConstantInfo.h"
32#include "llvm/Support/ErrorHandling.h"
33#include "llvm/Support/KnownBits.h"
34#include "llvm/Support/MathExtras.h"
35#include "llvm/Target/TargetMachine.h"
36#include <cctype>
37using namespace llvm;
38
39/// NOTE: The TargetMachine owns TLOF.
40TargetLowering::TargetLowering(const TargetMachine &tm)
41    : TargetLoweringBase(tm) {}
42
43const char *TargetLowering::getTargetNodeName(unsigned Opcode) const {
44  return nullptr;
45}
46
47bool TargetLowering::isPositionIndependent() const {
48  return getTargetMachine().isPositionIndependent();
49}
50
51/// Check whether a given call node is in tail position within its function. If
52/// so, it sets Chain to the input chain of the tail call.
53bool TargetLowering::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node,
54                                          SDValue &Chain) const {
55  const Function &F = DAG.getMachineFunction().getFunction();
56
57  // First, check if tail calls have been disabled in this function.
58  if (F.getFnAttribute("disable-tail-calls").getValueAsBool())
59    return false;
60
61  // Conservatively require the attributes of the call to match those of
62  // the return. Ignore following attributes because they don't affect the
63  // call sequence.
64  AttrBuilder CallerAttrs(F.getContext(), F.getAttributes().getRetAttrs());
65  for (const auto &Attr : {Attribute::Alignment, Attribute::Dereferenceable,
66                           Attribute::DereferenceableOrNull, Attribute::NoAlias,
67                           Attribute::NonNull, Attribute::NoUndef})
68    CallerAttrs.removeAttribute(Attr);
69
70  if (CallerAttrs.hasAttributes())
71    return false;
72
73  // It's not safe to eliminate the sign / zero extension of the return value.
74  if (CallerAttrs.contains(Attribute::ZExt) ||
75      CallerAttrs.contains(Attribute::SExt))
76    return false;
77
78  // Check if the only use is a function return node.
79  return isUsedByReturnOnly(Node, Chain);
80}
81
82bool TargetLowering::parametersInCSRMatch(const MachineRegisterInfo &MRI,
83    const uint32_t *CallerPreservedMask,
84    const SmallVectorImpl<CCValAssign> &ArgLocs,
85    const SmallVectorImpl<SDValue> &OutVals) const {
86  for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
87    const CCValAssign &ArgLoc = ArgLocs[I];
88    if (!ArgLoc.isRegLoc())
89      continue;
90    MCRegister Reg = ArgLoc.getLocReg();
91    // Only look at callee saved registers.
92    if (MachineOperand::clobbersPhysReg(CallerPreservedMask, Reg))
93      continue;
94    // Check that we pass the value used for the caller.
95    // (We look for a CopyFromReg reading a virtual register that is used
96    //  for the function live-in value of register Reg)
97    SDValue Value = OutVals[I];
98    if (Value->getOpcode() == ISD::AssertZext)
99      Value = Value.getOperand(0);
100    if (Value->getOpcode() != ISD::CopyFromReg)
101      return false;
102    Register ArgReg = cast<RegisterSDNode>(Value->getOperand(1))->getReg();
103    if (MRI.getLiveInPhysReg(ArgReg) != Reg)
104      return false;
105  }
106  return true;
107}
108
109/// Set CallLoweringInfo attribute flags based on a call instruction
110/// and called function attributes.
111void TargetLoweringBase::ArgListEntry::setAttributes(const CallBase *Call,
112                                                     unsigned ArgIdx) {
113  IsSExt = Call->paramHasAttr(ArgIdx, Attribute::SExt);
114  IsZExt = Call->paramHasAttr(ArgIdx, Attribute::ZExt);
115  IsInReg = Call->paramHasAttr(ArgIdx, Attribute::InReg);
116  IsSRet = Call->paramHasAttr(ArgIdx, Attribute::StructRet);
117  IsNest = Call->paramHasAttr(ArgIdx, Attribute::Nest);
118  IsByVal = Call->paramHasAttr(ArgIdx, Attribute::ByVal);
119  IsPreallocated = Call->paramHasAttr(ArgIdx, Attribute::Preallocated);
120  IsInAlloca = Call->paramHasAttr(ArgIdx, Attribute::InAlloca);
121  IsReturned = Call->paramHasAttr(ArgIdx, Attribute::Returned);
122  IsSwiftSelf = Call->paramHasAttr(ArgIdx, Attribute::SwiftSelf);
123  IsSwiftAsync = Call->paramHasAttr(ArgIdx, Attribute::SwiftAsync);
124  IsSwiftError = Call->paramHasAttr(ArgIdx, Attribute::SwiftError);
125  Alignment = Call->getParamStackAlign(ArgIdx);
126  IndirectType = nullptr;
127  assert(IsByVal + IsPreallocated + IsInAlloca + IsSRet <= 1 &&
128         "multiple ABI attributes?");
129  if (IsByVal) {
130    IndirectType = Call->getParamByValType(ArgIdx);
131    if (!Alignment)
132      Alignment = Call->getParamAlign(ArgIdx);
133  }
134  if (IsPreallocated)
135    IndirectType = Call->getParamPreallocatedType(ArgIdx);
136  if (IsInAlloca)
137    IndirectType = Call->getParamInAllocaType(ArgIdx);
138  if (IsSRet)
139    IndirectType = Call->getParamStructRetType(ArgIdx);
140}
141
142/// Generate a libcall taking the given operands as arguments and returning a
143/// result of type RetVT.
144std::pair<SDValue, SDValue>
145TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT,
146                            ArrayRef<SDValue> Ops,
147                            MakeLibCallOptions CallOptions,
148                            const SDLoc &dl,
149                            SDValue InChain) const {
150  if (!InChain)
151    InChain = DAG.getEntryNode();
152
153  TargetLowering::ArgListTy Args;
154  Args.reserve(Ops.size());
155
156  TargetLowering::ArgListEntry Entry;
157  for (unsigned i = 0; i < Ops.size(); ++i) {
158    SDValue NewOp = Ops[i];
159    Entry.Node = NewOp;
160    Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
161    Entry.IsSExt = shouldSignExtendTypeInLibCall(NewOp.getValueType(),
162                                                 CallOptions.IsSExt);
163    Entry.IsZExt = !Entry.IsSExt;
164
165    if (CallOptions.IsSoften &&
166        !shouldExtendTypeInLibCall(CallOptions.OpsVTBeforeSoften[i])) {
167      Entry.IsSExt = Entry.IsZExt = false;
168    }
169    Args.push_back(Entry);
170  }
171
172  if (LC == RTLIB::UNKNOWN_LIBCALL)
173    report_fatal_error("Unsupported library call operation!");
174  SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC),
175                                         getPointerTy(DAG.getDataLayout()));
176
177  Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
178  TargetLowering::CallLoweringInfo CLI(DAG);
179  bool signExtend = shouldSignExtendTypeInLibCall(RetVT, CallOptions.IsSExt);
180  bool zeroExtend = !signExtend;
181
182  if (CallOptions.IsSoften &&
183      !shouldExtendTypeInLibCall(CallOptions.RetVTBeforeSoften)) {
184    signExtend = zeroExtend = false;
185  }
186
187  CLI.setDebugLoc(dl)
188      .setChain(InChain)
189      .setLibCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
190      .setNoReturn(CallOptions.DoesNotReturn)
191      .setDiscardResult(!CallOptions.IsReturnValueUsed)
192      .setIsPostTypeLegalization(CallOptions.IsPostTypeLegalization)
193      .setSExtResult(signExtend)
194      .setZExtResult(zeroExtend);
195  return LowerCallTo(CLI);
196}
197
198bool TargetLowering::findOptimalMemOpLowering(
199    std::vector<EVT> &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS,
200    unsigned SrcAS, const AttributeList &FuncAttributes) const {
201  if (Limit != ~unsigned(0) && Op.isMemcpyWithFixedDstAlign() &&
202      Op.getSrcAlign() < Op.getDstAlign())
203    return false;
204
205  EVT VT = getOptimalMemOpType(Op, FuncAttributes);
206
207  if (VT == MVT::Other) {
208    // Use the largest integer type whose alignment constraints are satisfied.
209    // We only need to check DstAlign here as SrcAlign is always greater or
210    // equal to DstAlign (or zero).
211    VT = MVT::i64;
212    if (Op.isFixedDstAlign())
213      while (Op.getDstAlign() < (VT.getSizeInBits() / 8) &&
214             !allowsMisalignedMemoryAccesses(VT, DstAS, Op.getDstAlign()))
215        VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1);
216    assert(VT.isInteger());
217
218    // Find the largest legal integer type.
219    MVT LVT = MVT::i64;
220    while (!isTypeLegal(LVT))
221      LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1);
222    assert(LVT.isInteger());
223
224    // If the type we've chosen is larger than the largest legal integer type
225    // then use that instead.
226    if (VT.bitsGT(LVT))
227      VT = LVT;
228  }
229
230  unsigned NumMemOps = 0;
231  uint64_t Size = Op.size();
232  while (Size) {
233    unsigned VTSize = VT.getSizeInBits() / 8;
234    while (VTSize > Size) {
235      // For now, only use non-vector load / store's for the left-over pieces.
236      EVT NewVT = VT;
237      unsigned NewVTSize;
238
239      bool Found = false;
240      if (VT.isVector() || VT.isFloatingPoint()) {
241        NewVT = (VT.getSizeInBits() > 64) ? MVT::i64 : MVT::i32;
242        if (isOperationLegalOrCustom(ISD::STORE, NewVT) &&
243            isSafeMemOpType(NewVT.getSimpleVT()))
244          Found = true;
245        else if (NewVT == MVT::i64 &&
246                 isOperationLegalOrCustom(ISD::STORE, MVT::f64) &&
247                 isSafeMemOpType(MVT::f64)) {
248          // i64 is usually not legal on 32-bit targets, but f64 may be.
249          NewVT = MVT::f64;
250          Found = true;
251        }
252      }
253
254      if (!Found) {
255        do {
256          NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1);
257          if (NewVT == MVT::i8)
258            break;
259        } while (!isSafeMemOpType(NewVT.getSimpleVT()));
260      }
261      NewVTSize = NewVT.getSizeInBits() / 8;
262
263      // If the new VT cannot cover all of the remaining bits, then consider
264      // issuing a (or a pair of) unaligned and overlapping load / store.
265      unsigned Fast;
266      if (NumMemOps && Op.allowOverlap() && NewVTSize < Size &&
267          allowsMisalignedMemoryAccesses(
268              VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1),
269              MachineMemOperand::MONone, &Fast) &&
270          Fast)
271        VTSize = Size;
272      else {
273        VT = NewVT;
274        VTSize = NewVTSize;
275      }
276    }
277
278    if (++NumMemOps > Limit)
279      return false;
280
281    MemOps.push_back(VT);
282    Size -= VTSize;
283  }
284
285  return true;
286}
287
288/// Soften the operands of a comparison. This code is shared among BR_CC,
289/// SELECT_CC, and SETCC handlers.
290void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
291                                         SDValue &NewLHS, SDValue &NewRHS,
292                                         ISD::CondCode &CCCode,
293                                         const SDLoc &dl, const SDValue OldLHS,
294                                         const SDValue OldRHS) const {
295  SDValue Chain;
296  return softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, dl, OldLHS,
297                             OldRHS, Chain);
298}
299
300void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
301                                         SDValue &NewLHS, SDValue &NewRHS,
302                                         ISD::CondCode &CCCode,
303                                         const SDLoc &dl, const SDValue OldLHS,
304                                         const SDValue OldRHS,
305                                         SDValue &Chain,
306                                         bool IsSignaling) const {
307  // FIXME: Currently we cannot really respect all IEEE predicates due to libgcc
308  // not supporting it. We can update this code when libgcc provides such
309  // functions.
310
311  assert((VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128 || VT == MVT::ppcf128)
312         && "Unsupported setcc type!");
313
314  // Expand into one or more soft-fp libcall(s).
315  RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL;
316  bool ShouldInvertCC = false;
317  switch (CCCode) {
318  case ISD::SETEQ:
319  case ISD::SETOEQ:
320    LC1 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
321          (VT == MVT::f64) ? RTLIB::OEQ_F64 :
322          (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
323    break;
324  case ISD::SETNE:
325  case ISD::SETUNE:
326    LC1 = (VT == MVT::f32) ? RTLIB::UNE_F32 :
327          (VT == MVT::f64) ? RTLIB::UNE_F64 :
328          (VT == MVT::f128) ? RTLIB::UNE_F128 : RTLIB::UNE_PPCF128;
329    break;
330  case ISD::SETGE:
331  case ISD::SETOGE:
332    LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
333          (VT == MVT::f64) ? RTLIB::OGE_F64 :
334          (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
335    break;
336  case ISD::SETLT:
337  case ISD::SETOLT:
338    LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
339          (VT == MVT::f64) ? RTLIB::OLT_F64 :
340          (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
341    break;
342  case ISD::SETLE:
343  case ISD::SETOLE:
344    LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
345          (VT == MVT::f64) ? RTLIB::OLE_F64 :
346          (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
347    break;
348  case ISD::SETGT:
349  case ISD::SETOGT:
350    LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
351          (VT == MVT::f64) ? RTLIB::OGT_F64 :
352          (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
353    break;
354  case ISD::SETO:
355    ShouldInvertCC = true;
356    [[fallthrough]];
357  case ISD::SETUO:
358    LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
359          (VT == MVT::f64) ? RTLIB::UO_F64 :
360          (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
361    break;
362  case ISD::SETONE:
363    // SETONE = O && UNE
364    ShouldInvertCC = true;
365    [[fallthrough]];
366  case ISD::SETUEQ:
367    LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
368          (VT == MVT::f64) ? RTLIB::UO_F64 :
369          (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
370    LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
371          (VT == MVT::f64) ? RTLIB::OEQ_F64 :
372          (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
373    break;
374  default:
375    // Invert CC for unordered comparisons
376    ShouldInvertCC = true;
377    switch (CCCode) {
378    case ISD::SETULT:
379      LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
380            (VT == MVT::f64) ? RTLIB::OGE_F64 :
381            (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
382      break;
383    case ISD::SETULE:
384      LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
385            (VT == MVT::f64) ? RTLIB::OGT_F64 :
386            (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
387      break;
388    case ISD::SETUGT:
389      LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
390            (VT == MVT::f64) ? RTLIB::OLE_F64 :
391            (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
392      break;
393    case ISD::SETUGE:
394      LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
395            (VT == MVT::f64) ? RTLIB::OLT_F64 :
396            (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
397      break;
398    default: llvm_unreachable("Do not know how to soften this setcc!");
399    }
400  }
401
402  // Use the target specific return value for comparison lib calls.
403  EVT RetVT = getCmpLibcallReturnType();
404  SDValue Ops[2] = {NewLHS, NewRHS};
405  TargetLowering::MakeLibCallOptions CallOptions;
406  EVT OpsVT[2] = { OldLHS.getValueType(),
407                   OldRHS.getValueType() };
408  CallOptions.setTypeListBeforeSoften(OpsVT, RetVT, true);
409  auto Call = makeLibCall(DAG, LC1, RetVT, Ops, CallOptions, dl, Chain);
410  NewLHS = Call.first;
411  NewRHS = DAG.getConstant(0, dl, RetVT);
412
413  CCCode = getCmpLibcallCC(LC1);
414  if (ShouldInvertCC) {
415    assert(RetVT.isInteger());
416    CCCode = getSetCCInverse(CCCode, RetVT);
417  }
418
419  if (LC2 == RTLIB::UNKNOWN_LIBCALL) {
420    // Update Chain.
421    Chain = Call.second;
422  } else {
423    EVT SetCCVT =
424        getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT);
425    SDValue Tmp = DAG.getSetCC(dl, SetCCVT, NewLHS, NewRHS, CCCode);
426    auto Call2 = makeLibCall(DAG, LC2, RetVT, Ops, CallOptions, dl, Chain);
427    CCCode = getCmpLibcallCC(LC2);
428    if (ShouldInvertCC)
429      CCCode = getSetCCInverse(CCCode, RetVT);
430    NewLHS = DAG.getSetCC(dl, SetCCVT, Call2.first, NewRHS, CCCode);
431    if (Chain)
432      Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Call.second,
433                          Call2.second);
434    NewLHS = DAG.getNode(ShouldInvertCC ? ISD::AND : ISD::OR, dl,
435                         Tmp.getValueType(), Tmp, NewLHS);
436    NewRHS = SDValue();
437  }
438}
439
440/// Return the entry encoding for a jump table in the current function. The
441/// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum.
442unsigned TargetLowering::getJumpTableEncoding() const {
443  // In non-pic modes, just use the address of a block.
444  if (!isPositionIndependent())
445    return MachineJumpTableInfo::EK_BlockAddress;
446
447  // In PIC mode, if the target supports a GPRel32 directive, use it.
448  if (getTargetMachine().getMCAsmInfo()->getGPRel32Directive() != nullptr)
449    return MachineJumpTableInfo::EK_GPRel32BlockAddress;
450
451  // Otherwise, use a label difference.
452  return MachineJumpTableInfo::EK_LabelDifference32;
453}
454
455SDValue TargetLowering::getPICJumpTableRelocBase(SDValue Table,
456                                                 SelectionDAG &DAG) const {
457  // If our PIC model is GP relative, use the global offset table as the base.
458  unsigned JTEncoding = getJumpTableEncoding();
459
460  if ((JTEncoding == MachineJumpTableInfo::EK_GPRel64BlockAddress) ||
461      (JTEncoding == MachineJumpTableInfo::EK_GPRel32BlockAddress))
462    return DAG.getGLOBAL_OFFSET_TABLE(getPointerTy(DAG.getDataLayout()));
463
464  return Table;
465}
466
467/// This returns the relocation base for the given PIC jumptable, the same as
468/// getPICJumpTableRelocBase, but as an MCExpr.
469const MCExpr *
470TargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
471                                             unsigned JTI,MCContext &Ctx) const{
472  // The normal PIC reloc base is the label at the start of the jump table.
473  return MCSymbolRefExpr::create(MF->getJTISymbol(JTI, Ctx), Ctx);
474}
475
476SDValue TargetLowering::expandIndirectJTBranch(const SDLoc &dl, SDValue Value,
477                                               SDValue Addr, int JTI,
478                                               SelectionDAG &DAG) const {
479  SDValue Chain = Value;
480  // Jump table debug info is only needed if CodeView is enabled.
481  if (DAG.getTarget().getTargetTriple().isOSBinFormatCOFF()) {
482    Chain = DAG.getJumpTableDebugInfo(JTI, Chain, dl);
483  }
484  return DAG.getNode(ISD::BRIND, dl, MVT::Other, Chain, Addr);
485}
486
487bool
488TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
489  const TargetMachine &TM = getTargetMachine();
490  const GlobalValue *GV = GA->getGlobal();
491
492  // If the address is not even local to this DSO we will have to load it from
493  // a got and then add the offset.
494  if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
495    return false;
496
497  // If the code is position independent we will have to add a base register.
498  if (isPositionIndependent())
499    return false;
500
501  // Otherwise we can do it.
502  return true;
503}
504
505//===----------------------------------------------------------------------===//
506//  Optimization Methods
507//===----------------------------------------------------------------------===//
508
509/// If the specified instruction has a constant integer operand and there are
510/// bits set in that constant that are not demanded, then clear those bits and
511/// return true.
512bool TargetLowering::ShrinkDemandedConstant(SDValue Op,
513                                            const APInt &DemandedBits,
514                                            const APInt &DemandedElts,
515                                            TargetLoweringOpt &TLO) const {
516  SDLoc DL(Op);
517  unsigned Opcode = Op.getOpcode();
518
519  // Early-out if we've ended up calling an undemanded node, leave this to
520  // constant folding.
521  if (DemandedBits.isZero() || DemandedElts.isZero())
522    return false;
523
524  // Do target-specific constant optimization.
525  if (targetShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
526    return TLO.New.getNode();
527
528  // FIXME: ISD::SELECT, ISD::SELECT_CC
529  switch (Opcode) {
530  default:
531    break;
532  case ISD::XOR:
533  case ISD::AND:
534  case ISD::OR: {
535    auto *Op1C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
536    if (!Op1C || Op1C->isOpaque())
537      return false;
538
539    // If this is a 'not' op, don't touch it because that's a canonical form.
540    const APInt &C = Op1C->getAPIntValue();
541    if (Opcode == ISD::XOR && DemandedBits.isSubsetOf(C))
542      return false;
543
544    if (!C.isSubsetOf(DemandedBits)) {
545      EVT VT = Op.getValueType();
546      SDValue NewC = TLO.DAG.getConstant(DemandedBits & C, DL, VT);
547      SDValue NewOp = TLO.DAG.getNode(Opcode, DL, VT, Op.getOperand(0), NewC);
548      return TLO.CombineTo(Op, NewOp);
549    }
550
551    break;
552  }
553  }
554
555  return false;
556}
557
558bool TargetLowering::ShrinkDemandedConstant(SDValue Op,
559                                            const APInt &DemandedBits,
560                                            TargetLoweringOpt &TLO) const {
561  EVT VT = Op.getValueType();
562  APInt DemandedElts = VT.isVector()
563                           ? APInt::getAllOnes(VT.getVectorNumElements())
564                           : APInt(1, 1);
565  return ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO);
566}
567
568/// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
569/// This uses isTruncateFree/isZExtFree and ANY_EXTEND for the widening cast,
570/// but it could be generalized for targets with other types of implicit
571/// widening casts.
572bool TargetLowering::ShrinkDemandedOp(SDValue Op, unsigned BitWidth,
573                                      const APInt &DemandedBits,
574                                      TargetLoweringOpt &TLO) const {
575  assert(Op.getNumOperands() == 2 &&
576         "ShrinkDemandedOp only supports binary operators!");
577  assert(Op.getNode()->getNumValues() == 1 &&
578         "ShrinkDemandedOp only supports nodes with one result!");
579
580  EVT VT = Op.getValueType();
581  SelectionDAG &DAG = TLO.DAG;
582  SDLoc dl(Op);
583
584  // Early return, as this function cannot handle vector types.
585  if (VT.isVector())
586    return false;
587
588  // Don't do this if the node has another user, which may require the
589  // full value.
590  if (!Op.getNode()->hasOneUse())
591    return false;
592
593  // Search for the smallest integer type with free casts to and from
594  // Op's type. For expedience, just check power-of-2 integer types.
595  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
596  unsigned DemandedSize = DemandedBits.getActiveBits();
597  for (unsigned SmallVTBits = llvm::bit_ceil(DemandedSize);
598       SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
599    EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), SmallVTBits);
600    if (TLI.isTruncateFree(VT, SmallVT) && TLI.isZExtFree(SmallVT, VT)) {
601      // We found a type with free casts.
602      SDValue X = DAG.getNode(
603          Op.getOpcode(), dl, SmallVT,
604          DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)),
605          DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(1)));
606      assert(DemandedSize <= SmallVTBits && "Narrowed below demanded bits?");
607      SDValue Z = DAG.getNode(ISD::ANY_EXTEND, dl, VT, X);
608      return TLO.CombineTo(Op, Z);
609    }
610  }
611  return false;
612}
613
614bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
615                                          DAGCombinerInfo &DCI) const {
616  SelectionDAG &DAG = DCI.DAG;
617  TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
618                        !DCI.isBeforeLegalizeOps());
619  KnownBits Known;
620
621  bool Simplified = SimplifyDemandedBits(Op, DemandedBits, Known, TLO);
622  if (Simplified) {
623    DCI.AddToWorklist(Op.getNode());
624    DCI.CommitTargetLoweringOpt(TLO);
625  }
626  return Simplified;
627}
628
629bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
630                                          const APInt &DemandedElts,
631                                          DAGCombinerInfo &DCI) const {
632  SelectionDAG &DAG = DCI.DAG;
633  TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
634                        !DCI.isBeforeLegalizeOps());
635  KnownBits Known;
636
637  bool Simplified =
638      SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO);
639  if (Simplified) {
640    DCI.AddToWorklist(Op.getNode());
641    DCI.CommitTargetLoweringOpt(TLO);
642  }
643  return Simplified;
644}
645
646bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
647                                          KnownBits &Known,
648                                          TargetLoweringOpt &TLO,
649                                          unsigned Depth,
650                                          bool AssumeSingleUse) const {
651  EVT VT = Op.getValueType();
652
653  // Since the number of lanes in a scalable vector is unknown at compile time,
654  // we track one bit which is implicitly broadcast to all lanes.  This means
655  // that all lanes in a scalable vector are considered demanded.
656  APInt DemandedElts = VT.isFixedLengthVector()
657                           ? APInt::getAllOnes(VT.getVectorNumElements())
658                           : APInt(1, 1);
659  return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, Depth,
660                              AssumeSingleUse);
661}
662
663// TODO: Under what circumstances can we create nodes? Constant folding?
664SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
665    SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
666    SelectionDAG &DAG, unsigned Depth) const {
667  EVT VT = Op.getValueType();
668
669  // Limit search depth.
670  if (Depth >= SelectionDAG::MaxRecursionDepth)
671    return SDValue();
672
673  // Ignore UNDEFs.
674  if (Op.isUndef())
675    return SDValue();
676
677  // Not demanding any bits/elts from Op.
678  if (DemandedBits == 0 || DemandedElts == 0)
679    return DAG.getUNDEF(VT);
680
681  bool IsLE = DAG.getDataLayout().isLittleEndian();
682  unsigned NumElts = DemandedElts.getBitWidth();
683  unsigned BitWidth = DemandedBits.getBitWidth();
684  KnownBits LHSKnown, RHSKnown;
685  switch (Op.getOpcode()) {
686  case ISD::BITCAST: {
687    if (VT.isScalableVector())
688      return SDValue();
689
690    SDValue Src = peekThroughBitcasts(Op.getOperand(0));
691    EVT SrcVT = Src.getValueType();
692    EVT DstVT = Op.getValueType();
693    if (SrcVT == DstVT)
694      return Src;
695
696    unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
697    unsigned NumDstEltBits = DstVT.getScalarSizeInBits();
698    if (NumSrcEltBits == NumDstEltBits)
699      if (SDValue V = SimplifyMultipleUseDemandedBits(
700              Src, DemandedBits, DemandedElts, DAG, Depth + 1))
701        return DAG.getBitcast(DstVT, V);
702
703    if (SrcVT.isVector() && (NumDstEltBits % NumSrcEltBits) == 0) {
704      unsigned Scale = NumDstEltBits / NumSrcEltBits;
705      unsigned NumSrcElts = SrcVT.getVectorNumElements();
706      APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
707      APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
708      for (unsigned i = 0; i != Scale; ++i) {
709        unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
710        unsigned BitOffset = EltOffset * NumSrcEltBits;
711        APInt Sub = DemandedBits.extractBits(NumSrcEltBits, BitOffset);
712        if (!Sub.isZero()) {
713          DemandedSrcBits |= Sub;
714          for (unsigned j = 0; j != NumElts; ++j)
715            if (DemandedElts[j])
716              DemandedSrcElts.setBit((j * Scale) + i);
717        }
718      }
719
720      if (SDValue V = SimplifyMultipleUseDemandedBits(
721              Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
722        return DAG.getBitcast(DstVT, V);
723    }
724
725    // TODO - bigendian once we have test coverage.
726    if (IsLE && (NumSrcEltBits % NumDstEltBits) == 0) {
727      unsigned Scale = NumSrcEltBits / NumDstEltBits;
728      unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
729      APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
730      APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
731      for (unsigned i = 0; i != NumElts; ++i)
732        if (DemandedElts[i]) {
733          unsigned Offset = (i % Scale) * NumDstEltBits;
734          DemandedSrcBits.insertBits(DemandedBits, Offset);
735          DemandedSrcElts.setBit(i / Scale);
736        }
737
738      if (SDValue V = SimplifyMultipleUseDemandedBits(
739              Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
740        return DAG.getBitcast(DstVT, V);
741    }
742
743    break;
744  }
745  case ISD::AND: {
746    LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
747    RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
748
749    // If all of the demanded bits are known 1 on one side, return the other.
750    // These bits cannot contribute to the result of the 'and' in this
751    // context.
752    if (DemandedBits.isSubsetOf(LHSKnown.Zero | RHSKnown.One))
753      return Op.getOperand(0);
754    if (DemandedBits.isSubsetOf(RHSKnown.Zero | LHSKnown.One))
755      return Op.getOperand(1);
756    break;
757  }
758  case ISD::OR: {
759    LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
760    RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
761
762    // If all of the demanded bits are known zero on one side, return the
763    // other.  These bits cannot contribute to the result of the 'or' in this
764    // context.
765    if (DemandedBits.isSubsetOf(LHSKnown.One | RHSKnown.Zero))
766      return Op.getOperand(0);
767    if (DemandedBits.isSubsetOf(RHSKnown.One | LHSKnown.Zero))
768      return Op.getOperand(1);
769    break;
770  }
771  case ISD::XOR: {
772    LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
773    RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
774
775    // If all of the demanded bits are known zero on one side, return the
776    // other.
777    if (DemandedBits.isSubsetOf(RHSKnown.Zero))
778      return Op.getOperand(0);
779    if (DemandedBits.isSubsetOf(LHSKnown.Zero))
780      return Op.getOperand(1);
781    break;
782  }
783  case ISD::SHL: {
784    // If we are only demanding sign bits then we can use the shift source
785    // directly.
786    if (const APInt *MaxSA =
787            DAG.getValidMaximumShiftAmountConstant(Op, DemandedElts)) {
788      SDValue Op0 = Op.getOperand(0);
789      unsigned ShAmt = MaxSA->getZExtValue();
790      unsigned NumSignBits =
791          DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
792      unsigned UpperDemandedBits = BitWidth - DemandedBits.countr_zero();
793      if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
794        return Op0;
795    }
796    break;
797  }
798  case ISD::SETCC: {
799    SDValue Op0 = Op.getOperand(0);
800    SDValue Op1 = Op.getOperand(1);
801    ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
802    // If (1) we only need the sign-bit, (2) the setcc operands are the same
803    // width as the setcc result, and (3) the result of a setcc conforms to 0 or
804    // -1, we may be able to bypass the setcc.
805    if (DemandedBits.isSignMask() &&
806        Op0.getScalarValueSizeInBits() == BitWidth &&
807        getBooleanContents(Op0.getValueType()) ==
808            BooleanContent::ZeroOrNegativeOneBooleanContent) {
809      // If we're testing X < 0, then this compare isn't needed - just use X!
810      // FIXME: We're limiting to integer types here, but this should also work
811      // if we don't care about FP signed-zero. The use of SETLT with FP means
812      // that we don't care about NaNs.
813      if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
814          (isNullConstant(Op1) || ISD::isBuildVectorAllZeros(Op1.getNode())))
815        return Op0;
816    }
817    break;
818  }
819  case ISD::SIGN_EXTEND_INREG: {
820    // If none of the extended bits are demanded, eliminate the sextinreg.
821    SDValue Op0 = Op.getOperand(0);
822    EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
823    unsigned ExBits = ExVT.getScalarSizeInBits();
824    if (DemandedBits.getActiveBits() <= ExBits &&
825        shouldRemoveRedundantExtend(Op))
826      return Op0;
827    // If the input is already sign extended, just drop the extension.
828    unsigned NumSignBits = DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
829    if (NumSignBits >= (BitWidth - ExBits + 1))
830      return Op0;
831    break;
832  }
833  case ISD::ANY_EXTEND_VECTOR_INREG:
834  case ISD::SIGN_EXTEND_VECTOR_INREG:
835  case ISD::ZERO_EXTEND_VECTOR_INREG: {
836    if (VT.isScalableVector())
837      return SDValue();
838
839    // If we only want the lowest element and none of extended bits, then we can
840    // return the bitcasted source vector.
841    SDValue Src = Op.getOperand(0);
842    EVT SrcVT = Src.getValueType();
843    EVT DstVT = Op.getValueType();
844    if (IsLE && DemandedElts == 1 &&
845        DstVT.getSizeInBits() == SrcVT.getSizeInBits() &&
846        DemandedBits.getActiveBits() <= SrcVT.getScalarSizeInBits()) {
847      return DAG.getBitcast(DstVT, Src);
848    }
849    break;
850  }
851  case ISD::INSERT_VECTOR_ELT: {
852    if (VT.isScalableVector())
853      return SDValue();
854
855    // If we don't demand the inserted element, return the base vector.
856    SDValue Vec = Op.getOperand(0);
857    auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
858    EVT VecVT = Vec.getValueType();
859    if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements()) &&
860        !DemandedElts[CIdx->getZExtValue()])
861      return Vec;
862    break;
863  }
864  case ISD::INSERT_SUBVECTOR: {
865    if (VT.isScalableVector())
866      return SDValue();
867
868    SDValue Vec = Op.getOperand(0);
869    SDValue Sub = Op.getOperand(1);
870    uint64_t Idx = Op.getConstantOperandVal(2);
871    unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
872    APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
873    // If we don't demand the inserted subvector, return the base vector.
874    if (DemandedSubElts == 0)
875      return Vec;
876    break;
877  }
878  case ISD::VECTOR_SHUFFLE: {
879    assert(!VT.isScalableVector());
880    ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
881
882    // If all the demanded elts are from one operand and are inline,
883    // then we can use the operand directly.
884    bool AllUndef = true, IdentityLHS = true, IdentityRHS = true;
885    for (unsigned i = 0; i != NumElts; ++i) {
886      int M = ShuffleMask[i];
887      if (M < 0 || !DemandedElts[i])
888        continue;
889      AllUndef = false;
890      IdentityLHS &= (M == (int)i);
891      IdentityRHS &= ((M - NumElts) == i);
892    }
893
894    if (AllUndef)
895      return DAG.getUNDEF(Op.getValueType());
896    if (IdentityLHS)
897      return Op.getOperand(0);
898    if (IdentityRHS)
899      return Op.getOperand(1);
900    break;
901  }
902  default:
903    // TODO: Probably okay to remove after audit; here to reduce change size
904    // in initial enablement patch for scalable vectors
905    if (VT.isScalableVector())
906      return SDValue();
907
908    if (Op.getOpcode() >= ISD::BUILTIN_OP_END)
909      if (SDValue V = SimplifyMultipleUseDemandedBitsForTargetNode(
910              Op, DemandedBits, DemandedElts, DAG, Depth))
911        return V;
912    break;
913  }
914  return SDValue();
915}
916
917SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
918    SDValue Op, const APInt &DemandedBits, SelectionDAG &DAG,
919    unsigned Depth) const {
920  EVT VT = Op.getValueType();
921  // Since the number of lanes in a scalable vector is unknown at compile time,
922  // we track one bit which is implicitly broadcast to all lanes.  This means
923  // that all lanes in a scalable vector are considered demanded.
924  APInt DemandedElts = VT.isFixedLengthVector()
925                           ? APInt::getAllOnes(VT.getVectorNumElements())
926                           : APInt(1, 1);
927  return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
928                                         Depth);
929}
930
931SDValue TargetLowering::SimplifyMultipleUseDemandedVectorElts(
932    SDValue Op, const APInt &DemandedElts, SelectionDAG &DAG,
933    unsigned Depth) const {
934  APInt DemandedBits = APInt::getAllOnes(Op.getScalarValueSizeInBits());
935  return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
936                                         Depth);
937}
938
939// Attempt to form ext(avgfloor(A, B)) from shr(add(ext(A), ext(B)), 1).
940//      or to form ext(avgceil(A, B)) from shr(add(ext(A), ext(B), 1), 1).
941static SDValue combineShiftToAVG(SDValue Op, SelectionDAG &DAG,
942                                 const TargetLowering &TLI,
943                                 const APInt &DemandedBits,
944                                 const APInt &DemandedElts,
945                                 unsigned Depth) {
946  assert((Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SRA) &&
947         "SRL or SRA node is required here!");
948  // Is the right shift using an immediate value of 1?
949  ConstantSDNode *N1C = isConstOrConstSplat(Op.getOperand(1), DemandedElts);
950  if (!N1C || !N1C->isOne())
951    return SDValue();
952
953  // We are looking for an avgfloor
954  // add(ext, ext)
955  // or one of these as a avgceil
956  // add(add(ext, ext), 1)
957  // add(add(ext, 1), ext)
958  // add(ext, add(ext, 1))
959  SDValue Add = Op.getOperand(0);
960  if (Add.getOpcode() != ISD::ADD)
961    return SDValue();
962
963  SDValue ExtOpA = Add.getOperand(0);
964  SDValue ExtOpB = Add.getOperand(1);
965  SDValue Add2;
966  auto MatchOperands = [&](SDValue Op1, SDValue Op2, SDValue Op3, SDValue A) {
967    ConstantSDNode *ConstOp;
968    if ((ConstOp = isConstOrConstSplat(Op2, DemandedElts)) &&
969        ConstOp->isOne()) {
970      ExtOpA = Op1;
971      ExtOpB = Op3;
972      Add2 = A;
973      return true;
974    }
975    if ((ConstOp = isConstOrConstSplat(Op3, DemandedElts)) &&
976        ConstOp->isOne()) {
977      ExtOpA = Op1;
978      ExtOpB = Op2;
979      Add2 = A;
980      return true;
981    }
982    return false;
983  };
984  bool IsCeil =
985      (ExtOpA.getOpcode() == ISD::ADD &&
986       MatchOperands(ExtOpA.getOperand(0), ExtOpA.getOperand(1), ExtOpB, ExtOpA)) ||
987      (ExtOpB.getOpcode() == ISD::ADD &&
988       MatchOperands(ExtOpB.getOperand(0), ExtOpB.getOperand(1), ExtOpA, ExtOpB));
989
990  // If the shift is signed (sra):
991  //  - Needs >= 2 sign bit for both operands.
992  //  - Needs >= 2 zero bits.
993  // If the shift is unsigned (srl):
994  //  - Needs >= 1 zero bit for both operands.
995  //  - Needs 1 demanded bit zero and >= 2 sign bits.
996  unsigned ShiftOpc = Op.getOpcode();
997  bool IsSigned = false;
998  unsigned KnownBits;
999  unsigned NumSignedA = DAG.ComputeNumSignBits(ExtOpA, DemandedElts, Depth);
1000  unsigned NumSignedB = DAG.ComputeNumSignBits(ExtOpB, DemandedElts, Depth);
1001  unsigned NumSigned = std::min(NumSignedA, NumSignedB) - 1;
1002  unsigned NumZeroA =
1003      DAG.computeKnownBits(ExtOpA, DemandedElts, Depth).countMinLeadingZeros();
1004  unsigned NumZeroB =
1005      DAG.computeKnownBits(ExtOpB, DemandedElts, Depth).countMinLeadingZeros();
1006  unsigned NumZero = std::min(NumZeroA, NumZeroB);
1007
1008  switch (ShiftOpc) {
1009  default:
1010    llvm_unreachable("Unexpected ShiftOpc in combineShiftToAVG");
1011  case ISD::SRA: {
1012    if (NumZero >= 2 && NumSigned < NumZero) {
1013      IsSigned = false;
1014      KnownBits = NumZero;
1015      break;
1016    }
1017    if (NumSigned >= 1) {
1018      IsSigned = true;
1019      KnownBits = NumSigned;
1020      break;
1021    }
1022    return SDValue();
1023  }
1024  case ISD::SRL: {
1025    if (NumZero >= 1 && NumSigned < NumZero) {
1026      IsSigned = false;
1027      KnownBits = NumZero;
1028      break;
1029    }
1030    if (NumSigned >= 1 && DemandedBits.isSignBitClear()) {
1031      IsSigned = true;
1032      KnownBits = NumSigned;
1033      break;
1034    }
1035    return SDValue();
1036  }
1037  }
1038
1039  unsigned AVGOpc = IsCeil ? (IsSigned ? ISD::AVGCEILS : ISD::AVGCEILU)
1040                           : (IsSigned ? ISD::AVGFLOORS : ISD::AVGFLOORU);
1041
1042  // Find the smallest power-2 type that is legal for this vector size and
1043  // operation, given the original type size and the number of known sign/zero
1044  // bits.
1045  EVT VT = Op.getValueType();
1046  unsigned MinWidth =
1047      std::max<unsigned>(VT.getScalarSizeInBits() - KnownBits, 8);
1048  EVT NVT = EVT::getIntegerVT(*DAG.getContext(), llvm::bit_ceil(MinWidth));
1049  if (VT.isVector())
1050    NVT = EVT::getVectorVT(*DAG.getContext(), NVT, VT.getVectorElementCount());
1051  if (!TLI.isOperationLegalOrCustom(AVGOpc, NVT)) {
1052    // If we could not transform, and (both) adds are nuw/nsw, we can use the
1053    // larger type size to do the transform.
1054    if (!TLI.isOperationLegalOrCustom(AVGOpc, VT))
1055      return SDValue();
1056    if (DAG.willNotOverflowAdd(IsSigned, Add.getOperand(0),
1057                               Add.getOperand(1)) &&
1058        (!Add2 || DAG.willNotOverflowAdd(IsSigned, Add2.getOperand(0),
1059                                         Add2.getOperand(1))))
1060      NVT = VT;
1061    else
1062      return SDValue();
1063  }
1064
1065  SDLoc DL(Op);
1066  SDValue ResultAVG =
1067      DAG.getNode(AVGOpc, DL, NVT, DAG.getExtOrTrunc(IsSigned, ExtOpA, DL, NVT),
1068                  DAG.getExtOrTrunc(IsSigned, ExtOpB, DL, NVT));
1069  return DAG.getExtOrTrunc(IsSigned, ResultAVG, DL, VT);
1070}
1071
1072/// Look at Op. At this point, we know that only the OriginalDemandedBits of the
1073/// result of Op are ever used downstream. If we can use this information to
1074/// simplify Op, create a new simplified DAG node and return true, returning the
1075/// original and new nodes in Old and New. Otherwise, analyze the expression and
1076/// return a mask of Known bits for the expression (used to simplify the
1077/// caller).  The Known bits may only be accurate for those bits in the
1078/// OriginalDemandedBits and OriginalDemandedElts.
1079bool TargetLowering::SimplifyDemandedBits(
1080    SDValue Op, const APInt &OriginalDemandedBits,
1081    const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
1082    unsigned Depth, bool AssumeSingleUse) const {
1083  unsigned BitWidth = OriginalDemandedBits.getBitWidth();
1084  assert(Op.getScalarValueSizeInBits() == BitWidth &&
1085         "Mask size mismatches value type size!");
1086
1087  // Don't know anything.
1088  Known = KnownBits(BitWidth);
1089
1090  EVT VT = Op.getValueType();
1091  bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
1092  unsigned NumElts = OriginalDemandedElts.getBitWidth();
1093  assert((!VT.isFixedLengthVector() || NumElts == VT.getVectorNumElements()) &&
1094         "Unexpected vector size");
1095
1096  APInt DemandedBits = OriginalDemandedBits;
1097  APInt DemandedElts = OriginalDemandedElts;
1098  SDLoc dl(Op);
1099  auto &DL = TLO.DAG.getDataLayout();
1100
1101  // Undef operand.
1102  if (Op.isUndef())
1103    return false;
1104
1105  // We can't simplify target constants.
1106  if (Op.getOpcode() == ISD::TargetConstant)
1107    return false;
1108
1109  if (Op.getOpcode() == ISD::Constant) {
1110    // We know all of the bits for a constant!
1111    Known = KnownBits::makeConstant(Op->getAsAPIntVal());
1112    return false;
1113  }
1114
1115  if (Op.getOpcode() == ISD::ConstantFP) {
1116    // We know all of the bits for a floating point constant!
1117    Known = KnownBits::makeConstant(
1118        cast<ConstantFPSDNode>(Op)->getValueAPF().bitcastToAPInt());
1119    return false;
1120  }
1121
1122  // Other users may use these bits.
1123  bool HasMultiUse = false;
1124  if (!AssumeSingleUse && !Op.getNode()->hasOneUse()) {
1125    if (Depth >= SelectionDAG::MaxRecursionDepth) {
1126      // Limit search depth.
1127      return false;
1128    }
1129    // Allow multiple uses, just set the DemandedBits/Elts to all bits.
1130    DemandedBits = APInt::getAllOnes(BitWidth);
1131    DemandedElts = APInt::getAllOnes(NumElts);
1132    HasMultiUse = true;
1133  } else if (OriginalDemandedBits == 0 || OriginalDemandedElts == 0) {
1134    // Not demanding any bits/elts from Op.
1135    return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
1136  } else if (Depth >= SelectionDAG::MaxRecursionDepth) {
1137    // Limit search depth.
1138    return false;
1139  }
1140
1141  KnownBits Known2;
1142  switch (Op.getOpcode()) {
1143  case ISD::SCALAR_TO_VECTOR: {
1144    if (VT.isScalableVector())
1145      return false;
1146    if (!DemandedElts[0])
1147      return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
1148
1149    KnownBits SrcKnown;
1150    SDValue Src = Op.getOperand(0);
1151    unsigned SrcBitWidth = Src.getScalarValueSizeInBits();
1152    APInt SrcDemandedBits = DemandedBits.zext(SrcBitWidth);
1153    if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcKnown, TLO, Depth + 1))
1154      return true;
1155
1156    // Upper elements are undef, so only get the knownbits if we just demand
1157    // the bottom element.
1158    if (DemandedElts == 1)
1159      Known = SrcKnown.anyextOrTrunc(BitWidth);
1160    break;
1161  }
1162  case ISD::BUILD_VECTOR:
1163    // Collect the known bits that are shared by every demanded element.
1164    // TODO: Call SimplifyDemandedBits for non-constant demanded elements.
1165    Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1166    return false; // Don't fall through, will infinitely loop.
1167  case ISD::SPLAT_VECTOR: {
1168    SDValue Scl = Op.getOperand(0);
1169    APInt DemandedSclBits = DemandedBits.zextOrTrunc(Scl.getValueSizeInBits());
1170    KnownBits KnownScl;
1171    if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1))
1172      return true;
1173
1174    // Implicitly truncate the bits to match the official semantics of
1175    // SPLAT_VECTOR.
1176    Known = KnownScl.trunc(BitWidth);
1177    break;
1178  }
1179  case ISD::LOAD: {
1180    auto *LD = cast<LoadSDNode>(Op);
1181    if (getTargetConstantFromLoad(LD)) {
1182      Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1183      return false; // Don't fall through, will infinitely loop.
1184    }
1185    if (ISD::isZEXTLoad(Op.getNode()) && Op.getResNo() == 0) {
1186      // If this is a ZEXTLoad and we are looking at the loaded value.
1187      EVT MemVT = LD->getMemoryVT();
1188      unsigned MemBits = MemVT.getScalarSizeInBits();
1189      Known.Zero.setBitsFrom(MemBits);
1190      return false; // Don't fall through, will infinitely loop.
1191    }
1192    break;
1193  }
1194  case ISD::INSERT_VECTOR_ELT: {
1195    if (VT.isScalableVector())
1196      return false;
1197    SDValue Vec = Op.getOperand(0);
1198    SDValue Scl = Op.getOperand(1);
1199    auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
1200    EVT VecVT = Vec.getValueType();
1201
1202    // If index isn't constant, assume we need all vector elements AND the
1203    // inserted element.
1204    APInt DemandedVecElts(DemandedElts);
1205    if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements())) {
1206      unsigned Idx = CIdx->getZExtValue();
1207      DemandedVecElts.clearBit(Idx);
1208
1209      // Inserted element is not required.
1210      if (!DemandedElts[Idx])
1211        return TLO.CombineTo(Op, Vec);
1212    }
1213
1214    KnownBits KnownScl;
1215    unsigned NumSclBits = Scl.getScalarValueSizeInBits();
1216    APInt DemandedSclBits = DemandedBits.zextOrTrunc(NumSclBits);
1217    if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1))
1218      return true;
1219
1220    Known = KnownScl.anyextOrTrunc(BitWidth);
1221
1222    KnownBits KnownVec;
1223    if (SimplifyDemandedBits(Vec, DemandedBits, DemandedVecElts, KnownVec, TLO,
1224                             Depth + 1))
1225      return true;
1226
1227    if (!!DemandedVecElts)
1228      Known = Known.intersectWith(KnownVec);
1229
1230    return false;
1231  }
1232  case ISD::INSERT_SUBVECTOR: {
1233    if (VT.isScalableVector())
1234      return false;
1235    // Demand any elements from the subvector and the remainder from the src its
1236    // inserted into.
1237    SDValue Src = Op.getOperand(0);
1238    SDValue Sub = Op.getOperand(1);
1239    uint64_t Idx = Op.getConstantOperandVal(2);
1240    unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
1241    APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
1242    APInt DemandedSrcElts = DemandedElts;
1243    DemandedSrcElts.insertBits(APInt::getZero(NumSubElts), Idx);
1244
1245    KnownBits KnownSub, KnownSrc;
1246    if (SimplifyDemandedBits(Sub, DemandedBits, DemandedSubElts, KnownSub, TLO,
1247                             Depth + 1))
1248      return true;
1249    if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, KnownSrc, TLO,
1250                             Depth + 1))
1251      return true;
1252
1253    Known.Zero.setAllBits();
1254    Known.One.setAllBits();
1255    if (!!DemandedSubElts)
1256      Known = Known.intersectWith(KnownSub);
1257    if (!!DemandedSrcElts)
1258      Known = Known.intersectWith(KnownSrc);
1259
1260    // Attempt to avoid multi-use src if we don't need anything from it.
1261    if (!DemandedBits.isAllOnes() || !DemandedSubElts.isAllOnes() ||
1262        !DemandedSrcElts.isAllOnes()) {
1263      SDValue NewSub = SimplifyMultipleUseDemandedBits(
1264          Sub, DemandedBits, DemandedSubElts, TLO.DAG, Depth + 1);
1265      SDValue NewSrc = SimplifyMultipleUseDemandedBits(
1266          Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
1267      if (NewSub || NewSrc) {
1268        NewSub = NewSub ? NewSub : Sub;
1269        NewSrc = NewSrc ? NewSrc : Src;
1270        SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc, NewSub,
1271                                        Op.getOperand(2));
1272        return TLO.CombineTo(Op, NewOp);
1273      }
1274    }
1275    break;
1276  }
1277  case ISD::EXTRACT_SUBVECTOR: {
1278    if (VT.isScalableVector())
1279      return false;
1280    // Offset the demanded elts by the subvector index.
1281    SDValue Src = Op.getOperand(0);
1282    if (Src.getValueType().isScalableVector())
1283      break;
1284    uint64_t Idx = Op.getConstantOperandVal(1);
1285    unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
1286    APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx);
1287
1288    if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, Known, TLO,
1289                             Depth + 1))
1290      return true;
1291
1292    // Attempt to avoid multi-use src if we don't need anything from it.
1293    if (!DemandedBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
1294      SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
1295          Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
1296      if (DemandedSrc) {
1297        SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc,
1298                                        Op.getOperand(1));
1299        return TLO.CombineTo(Op, NewOp);
1300      }
1301    }
1302    break;
1303  }
1304  case ISD::CONCAT_VECTORS: {
1305    if (VT.isScalableVector())
1306      return false;
1307    Known.Zero.setAllBits();
1308    Known.One.setAllBits();
1309    EVT SubVT = Op.getOperand(0).getValueType();
1310    unsigned NumSubVecs = Op.getNumOperands();
1311    unsigned NumSubElts = SubVT.getVectorNumElements();
1312    for (unsigned i = 0; i != NumSubVecs; ++i) {
1313      APInt DemandedSubElts =
1314          DemandedElts.extractBits(NumSubElts, i * NumSubElts);
1315      if (SimplifyDemandedBits(Op.getOperand(i), DemandedBits, DemandedSubElts,
1316                               Known2, TLO, Depth + 1))
1317        return true;
1318      // Known bits are shared by every demanded subvector element.
1319      if (!!DemandedSubElts)
1320        Known = Known.intersectWith(Known2);
1321    }
1322    break;
1323  }
1324  case ISD::VECTOR_SHUFFLE: {
1325    assert(!VT.isScalableVector());
1326    ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
1327
1328    // Collect demanded elements from shuffle operands..
1329    APInt DemandedLHS, DemandedRHS;
1330    if (!getShuffleDemandedElts(NumElts, ShuffleMask, DemandedElts, DemandedLHS,
1331                                DemandedRHS))
1332      break;
1333
1334    if (!!DemandedLHS || !!DemandedRHS) {
1335      SDValue Op0 = Op.getOperand(0);
1336      SDValue Op1 = Op.getOperand(1);
1337
1338      Known.Zero.setAllBits();
1339      Known.One.setAllBits();
1340      if (!!DemandedLHS) {
1341        if (SimplifyDemandedBits(Op0, DemandedBits, DemandedLHS, Known2, TLO,
1342                                 Depth + 1))
1343          return true;
1344        Known = Known.intersectWith(Known2);
1345      }
1346      if (!!DemandedRHS) {
1347        if (SimplifyDemandedBits(Op1, DemandedBits, DemandedRHS, Known2, TLO,
1348                                 Depth + 1))
1349          return true;
1350        Known = Known.intersectWith(Known2);
1351      }
1352
1353      // Attempt to avoid multi-use ops if we don't need anything from them.
1354      SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1355          Op0, DemandedBits, DemandedLHS, TLO.DAG, Depth + 1);
1356      SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1357          Op1, DemandedBits, DemandedRHS, TLO.DAG, Depth + 1);
1358      if (DemandedOp0 || DemandedOp1) {
1359        Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1360        Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1361        SDValue NewOp = TLO.DAG.getVectorShuffle(VT, dl, Op0, Op1, ShuffleMask);
1362        return TLO.CombineTo(Op, NewOp);
1363      }
1364    }
1365    break;
1366  }
1367  case ISD::AND: {
1368    SDValue Op0 = Op.getOperand(0);
1369    SDValue Op1 = Op.getOperand(1);
1370
1371    // If the RHS is a constant, check to see if the LHS would be zero without
1372    // using the bits from the RHS.  Below, we use knowledge about the RHS to
1373    // simplify the LHS, here we're using information from the LHS to simplify
1374    // the RHS.
1375    if (ConstantSDNode *RHSC = isConstOrConstSplat(Op1)) {
1376      // Do not increment Depth here; that can cause an infinite loop.
1377      KnownBits LHSKnown = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth);
1378      // If the LHS already has zeros where RHSC does, this 'and' is dead.
1379      if ((LHSKnown.Zero & DemandedBits) ==
1380          (~RHSC->getAPIntValue() & DemandedBits))
1381        return TLO.CombineTo(Op, Op0);
1382
1383      // If any of the set bits in the RHS are known zero on the LHS, shrink
1384      // the constant.
1385      if (ShrinkDemandedConstant(Op, ~LHSKnown.Zero & DemandedBits,
1386                                 DemandedElts, TLO))
1387        return true;
1388
1389      // Bitwise-not (xor X, -1) is a special case: we don't usually shrink its
1390      // constant, but if this 'and' is only clearing bits that were just set by
1391      // the xor, then this 'and' can be eliminated by shrinking the mask of
1392      // the xor. For example, for a 32-bit X:
1393      // and (xor (srl X, 31), -1), 1 --> xor (srl X, 31), 1
1394      if (isBitwiseNot(Op0) && Op0.hasOneUse() &&
1395          LHSKnown.One == ~RHSC->getAPIntValue()) {
1396        SDValue Xor = TLO.DAG.getNode(ISD::XOR, dl, VT, Op0.getOperand(0), Op1);
1397        return TLO.CombineTo(Op, Xor);
1398      }
1399    }
1400
1401    // AND(INSERT_SUBVECTOR(C,X,I),M) -> INSERT_SUBVECTOR(AND(C,M),X,I)
1402    // iff 'C' is Undef/Constant and AND(X,M) == X (for DemandedBits).
1403    if (Op0.getOpcode() == ISD::INSERT_SUBVECTOR && !VT.isScalableVector() &&
1404        (Op0.getOperand(0).isUndef() ||
1405         ISD::isBuildVectorOfConstantSDNodes(Op0.getOperand(0).getNode())) &&
1406        Op0->hasOneUse()) {
1407      unsigned NumSubElts =
1408          Op0.getOperand(1).getValueType().getVectorNumElements();
1409      unsigned SubIdx = Op0.getConstantOperandVal(2);
1410      APInt DemandedSub =
1411          APInt::getBitsSet(NumElts, SubIdx, SubIdx + NumSubElts);
1412      KnownBits KnownSubMask =
1413          TLO.DAG.computeKnownBits(Op1, DemandedSub & DemandedElts, Depth + 1);
1414      if (DemandedBits.isSubsetOf(KnownSubMask.One)) {
1415        SDValue NewAnd =
1416            TLO.DAG.getNode(ISD::AND, dl, VT, Op0.getOperand(0), Op1);
1417        SDValue NewInsert =
1418            TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, dl, VT, NewAnd,
1419                            Op0.getOperand(1), Op0.getOperand(2));
1420        return TLO.CombineTo(Op, NewInsert);
1421      }
1422    }
1423
1424    if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1425                             Depth + 1))
1426      return true;
1427    assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1428    if (SimplifyDemandedBits(Op0, ~Known.Zero & DemandedBits, DemandedElts,
1429                             Known2, TLO, Depth + 1))
1430      return true;
1431    assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1432
1433    // If all of the demanded bits are known one on one side, return the other.
1434    // These bits cannot contribute to the result of the 'and'.
1435    if (DemandedBits.isSubsetOf(Known2.Zero | Known.One))
1436      return TLO.CombineTo(Op, Op0);
1437    if (DemandedBits.isSubsetOf(Known.Zero | Known2.One))
1438      return TLO.CombineTo(Op, Op1);
1439    // If all of the demanded bits in the inputs are known zeros, return zero.
1440    if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
1441      return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, VT));
1442    // If the RHS is a constant, see if we can simplify it.
1443    if (ShrinkDemandedConstant(Op, ~Known2.Zero & DemandedBits, DemandedElts,
1444                               TLO))
1445      return true;
1446    // If the operation can be done in a smaller type, do so.
1447    if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1448      return true;
1449
1450    // Attempt to avoid multi-use ops if we don't need anything from them.
1451    if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1452      SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1453          Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1454      SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1455          Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1456      if (DemandedOp0 || DemandedOp1) {
1457        Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1458        Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1459        SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1460        return TLO.CombineTo(Op, NewOp);
1461      }
1462    }
1463
1464    Known &= Known2;
1465    break;
1466  }
1467  case ISD::OR: {
1468    SDValue Op0 = Op.getOperand(0);
1469    SDValue Op1 = Op.getOperand(1);
1470    SDNodeFlags Flags = Op.getNode()->getFlags();
1471    if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1472                             Depth + 1)) {
1473      if (Flags.hasDisjoint()) {
1474        Flags.setDisjoint(false);
1475        Op->setFlags(Flags);
1476      }
1477      return true;
1478    }
1479    assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1480    if (SimplifyDemandedBits(Op0, ~Known.One & DemandedBits, DemandedElts,
1481                             Known2, TLO, Depth + 1)) {
1482      if (Flags.hasDisjoint()) {
1483        Flags.setDisjoint(false);
1484        Op->setFlags(Flags);
1485      }
1486      return true;
1487    }
1488    assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1489
1490    // If all of the demanded bits are known zero on one side, return the other.
1491    // These bits cannot contribute to the result of the 'or'.
1492    if (DemandedBits.isSubsetOf(Known2.One | Known.Zero))
1493      return TLO.CombineTo(Op, Op0);
1494    if (DemandedBits.isSubsetOf(Known.One | Known2.Zero))
1495      return TLO.CombineTo(Op, Op1);
1496    // If the RHS is a constant, see if we can simplify it.
1497    if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1498      return true;
1499    // If the operation can be done in a smaller type, do so.
1500    if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1501      return true;
1502
1503    // Attempt to avoid multi-use ops if we don't need anything from them.
1504    if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1505      SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1506          Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1507      SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1508          Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1509      if (DemandedOp0 || DemandedOp1) {
1510        Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1511        Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1512        SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1513        return TLO.CombineTo(Op, NewOp);
1514      }
1515    }
1516
1517    // (or (and X, C1), (and (or X, Y), C2)) -> (or (and X, C1|C2), (and Y, C2))
1518    // TODO: Use SimplifyMultipleUseDemandedBits to peek through masks.
1519    if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::AND &&
1520        Op0->hasOneUse() && Op1->hasOneUse()) {
1521      // Attempt to match all commutations - m_c_Or would've been useful!
1522      for (int I = 0; I != 2; ++I) {
1523        SDValue X = Op.getOperand(I).getOperand(0);
1524        SDValue C1 = Op.getOperand(I).getOperand(1);
1525        SDValue Alt = Op.getOperand(1 - I).getOperand(0);
1526        SDValue C2 = Op.getOperand(1 - I).getOperand(1);
1527        if (Alt.getOpcode() == ISD::OR) {
1528          for (int J = 0; J != 2; ++J) {
1529            if (X == Alt.getOperand(J)) {
1530              SDValue Y = Alt.getOperand(1 - J);
1531              if (SDValue C12 = TLO.DAG.FoldConstantArithmetic(ISD::OR, dl, VT,
1532                                                               {C1, C2})) {
1533                SDValue MaskX = TLO.DAG.getNode(ISD::AND, dl, VT, X, C12);
1534                SDValue MaskY = TLO.DAG.getNode(ISD::AND, dl, VT, Y, C2);
1535                return TLO.CombineTo(
1536                    Op, TLO.DAG.getNode(ISD::OR, dl, VT, MaskX, MaskY));
1537              }
1538            }
1539          }
1540        }
1541      }
1542    }
1543
1544    Known |= Known2;
1545    break;
1546  }
1547  case ISD::XOR: {
1548    SDValue Op0 = Op.getOperand(0);
1549    SDValue Op1 = Op.getOperand(1);
1550
1551    if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1552                             Depth + 1))
1553      return true;
1554    assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1555    if (SimplifyDemandedBits(Op0, DemandedBits, DemandedElts, Known2, TLO,
1556                             Depth + 1))
1557      return true;
1558    assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1559
1560    // If all of the demanded bits are known zero on one side, return the other.
1561    // These bits cannot contribute to the result of the 'xor'.
1562    if (DemandedBits.isSubsetOf(Known.Zero))
1563      return TLO.CombineTo(Op, Op0);
1564    if (DemandedBits.isSubsetOf(Known2.Zero))
1565      return TLO.CombineTo(Op, Op1);
1566    // If the operation can be done in a smaller type, do so.
1567    if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1568      return true;
1569
1570    // If all of the unknown bits are known to be zero on one side or the other
1571    // turn this into an *inclusive* or.
1572    //    e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
1573    if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
1574      return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, VT, Op0, Op1));
1575
1576    ConstantSDNode *C = isConstOrConstSplat(Op1, DemandedElts);
1577    if (C) {
1578      // If one side is a constant, and all of the set bits in the constant are
1579      // also known set on the other side, turn this into an AND, as we know
1580      // the bits will be cleared.
1581      //    e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
1582      // NB: it is okay if more bits are known than are requested
1583      if (C->getAPIntValue() == Known2.One) {
1584        SDValue ANDC =
1585            TLO.DAG.getConstant(~C->getAPIntValue() & DemandedBits, dl, VT);
1586        return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT, Op0, ANDC));
1587      }
1588
1589      // If the RHS is a constant, see if we can change it. Don't alter a -1
1590      // constant because that's a 'not' op, and that is better for combining
1591      // and codegen.
1592      if (!C->isAllOnes() && DemandedBits.isSubsetOf(C->getAPIntValue())) {
1593        // We're flipping all demanded bits. Flip the undemanded bits too.
1594        SDValue New = TLO.DAG.getNOT(dl, Op0, VT);
1595        return TLO.CombineTo(Op, New);
1596      }
1597
1598      unsigned Op0Opcode = Op0.getOpcode();
1599      if ((Op0Opcode == ISD::SRL || Op0Opcode == ISD::SHL) && Op0.hasOneUse()) {
1600        if (ConstantSDNode *ShiftC =
1601                isConstOrConstSplat(Op0.getOperand(1), DemandedElts)) {
1602          // Don't crash on an oversized shift. We can not guarantee that a
1603          // bogus shift has been simplified to undef.
1604          if (ShiftC->getAPIntValue().ult(BitWidth)) {
1605            uint64_t ShiftAmt = ShiftC->getZExtValue();
1606            APInt Ones = APInt::getAllOnes(BitWidth);
1607            Ones = Op0Opcode == ISD::SHL ? Ones.shl(ShiftAmt)
1608                                         : Ones.lshr(ShiftAmt);
1609            const TargetLowering &TLI = TLO.DAG.getTargetLoweringInfo();
1610            if ((DemandedBits & C->getAPIntValue()) == (DemandedBits & Ones) &&
1611                TLI.isDesirableToCommuteXorWithShift(Op.getNode())) {
1612              // If the xor constant is a demanded mask, do a 'not' before the
1613              // shift:
1614              // xor (X << ShiftC), XorC --> (not X) << ShiftC
1615              // xor (X >> ShiftC), XorC --> (not X) >> ShiftC
1616              SDValue Not = TLO.DAG.getNOT(dl, Op0.getOperand(0), VT);
1617              return TLO.CombineTo(Op, TLO.DAG.getNode(Op0Opcode, dl, VT, Not,
1618                                                       Op0.getOperand(1)));
1619            }
1620          }
1621        }
1622      }
1623    }
1624
1625    // If we can't turn this into a 'not', try to shrink the constant.
1626    if (!C || !C->isAllOnes())
1627      if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1628        return true;
1629
1630    // Attempt to avoid multi-use ops if we don't need anything from them.
1631    if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1632      SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1633          Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1634      SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1635          Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1636      if (DemandedOp0 || DemandedOp1) {
1637        Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1638        Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1639        SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1640        return TLO.CombineTo(Op, NewOp);
1641      }
1642    }
1643
1644    Known ^= Known2;
1645    break;
1646  }
1647  case ISD::SELECT:
1648    if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
1649                             Known, TLO, Depth + 1))
1650      return true;
1651    if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, DemandedElts,
1652                             Known2, TLO, Depth + 1))
1653      return true;
1654    assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1655    assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1656
1657    // If the operands are constants, see if we can simplify them.
1658    if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1659      return true;
1660
1661    // Only known if known in both the LHS and RHS.
1662    Known = Known.intersectWith(Known2);
1663    break;
1664  case ISD::VSELECT:
1665    if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
1666                             Known, TLO, Depth + 1))
1667      return true;
1668    if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, DemandedElts,
1669                             Known2, TLO, Depth + 1))
1670      return true;
1671    assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1672    assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1673
1674    // Only known if known in both the LHS and RHS.
1675    Known = Known.intersectWith(Known2);
1676    break;
1677  case ISD::SELECT_CC:
1678    if (SimplifyDemandedBits(Op.getOperand(3), DemandedBits, DemandedElts,
1679                             Known, TLO, Depth + 1))
1680      return true;
1681    if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
1682                             Known2, TLO, Depth + 1))
1683      return true;
1684    assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1685    assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1686
1687    // If the operands are constants, see if we can simplify them.
1688    if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1689      return true;
1690
1691    // Only known if known in both the LHS and RHS.
1692    Known = Known.intersectWith(Known2);
1693    break;
1694  case ISD::SETCC: {
1695    SDValue Op0 = Op.getOperand(0);
1696    SDValue Op1 = Op.getOperand(1);
1697    ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
1698    // If (1) we only need the sign-bit, (2) the setcc operands are the same
1699    // width as the setcc result, and (3) the result of a setcc conforms to 0 or
1700    // -1, we may be able to bypass the setcc.
1701    if (DemandedBits.isSignMask() &&
1702        Op0.getScalarValueSizeInBits() == BitWidth &&
1703        getBooleanContents(Op0.getValueType()) ==
1704            BooleanContent::ZeroOrNegativeOneBooleanContent) {
1705      // If we're testing X < 0, then this compare isn't needed - just use X!
1706      // FIXME: We're limiting to integer types here, but this should also work
1707      // if we don't care about FP signed-zero. The use of SETLT with FP means
1708      // that we don't care about NaNs.
1709      if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
1710          (isNullConstant(Op1) || ISD::isBuildVectorAllZeros(Op1.getNode())))
1711        return TLO.CombineTo(Op, Op0);
1712
1713      // TODO: Should we check for other forms of sign-bit comparisons?
1714      // Examples: X <= -1, X >= 0
1715    }
1716    if (getBooleanContents(Op0.getValueType()) ==
1717            TargetLowering::ZeroOrOneBooleanContent &&
1718        BitWidth > 1)
1719      Known.Zero.setBitsFrom(1);
1720    break;
1721  }
1722  case ISD::SHL: {
1723    SDValue Op0 = Op.getOperand(0);
1724    SDValue Op1 = Op.getOperand(1);
1725    EVT ShiftVT = Op1.getValueType();
1726
1727    if (const APInt *SA =
1728            TLO.DAG.getValidShiftAmountConstant(Op, DemandedElts)) {
1729      unsigned ShAmt = SA->getZExtValue();
1730      if (ShAmt == 0)
1731        return TLO.CombineTo(Op, Op0);
1732
1733      // If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
1734      // single shift.  We can do this if the bottom bits (which are shifted
1735      // out) are never demanded.
1736      // TODO - support non-uniform vector amounts.
1737      if (Op0.getOpcode() == ISD::SRL) {
1738        if (!DemandedBits.intersects(APInt::getLowBitsSet(BitWidth, ShAmt))) {
1739          if (const APInt *SA2 =
1740                  TLO.DAG.getValidShiftAmountConstant(Op0, DemandedElts)) {
1741            unsigned C1 = SA2->getZExtValue();
1742            unsigned Opc = ISD::SHL;
1743            int Diff = ShAmt - C1;
1744            if (Diff < 0) {
1745              Diff = -Diff;
1746              Opc = ISD::SRL;
1747            }
1748            SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
1749            return TLO.CombineTo(
1750                Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
1751          }
1752        }
1753      }
1754
1755      // Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits
1756      // are not demanded. This will likely allow the anyext to be folded away.
1757      // TODO - support non-uniform vector amounts.
1758      if (Op0.getOpcode() == ISD::ANY_EXTEND) {
1759        SDValue InnerOp = Op0.getOperand(0);
1760        EVT InnerVT = InnerOp.getValueType();
1761        unsigned InnerBits = InnerVT.getScalarSizeInBits();
1762        if (ShAmt < InnerBits && DemandedBits.getActiveBits() <= InnerBits &&
1763            isTypeDesirableForOp(ISD::SHL, InnerVT)) {
1764          SDValue NarrowShl = TLO.DAG.getNode(
1765              ISD::SHL, dl, InnerVT, InnerOp,
1766              TLO.DAG.getShiftAmountConstant(ShAmt, InnerVT, dl));
1767          return TLO.CombineTo(
1768              Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));
1769        }
1770
1771        // Repeat the SHL optimization above in cases where an extension
1772        // intervenes: (shl (anyext (shr x, c1)), c2) to
1773        // (shl (anyext x), c2-c1).  This requires that the bottom c1 bits
1774        // aren't demanded (as above) and that the shifted upper c1 bits of
1775        // x aren't demanded.
1776        // TODO - support non-uniform vector amounts.
1777        if (InnerOp.getOpcode() == ISD::SRL && Op0.hasOneUse() &&
1778            InnerOp.hasOneUse()) {
1779          if (const APInt *SA2 =
1780                  TLO.DAG.getValidShiftAmountConstant(InnerOp, DemandedElts)) {
1781            unsigned InnerShAmt = SA2->getZExtValue();
1782            if (InnerShAmt < ShAmt && InnerShAmt < InnerBits &&
1783                DemandedBits.getActiveBits() <=
1784                    (InnerBits - InnerShAmt + ShAmt) &&
1785                DemandedBits.countr_zero() >= ShAmt) {
1786              SDValue NewSA =
1787                  TLO.DAG.getConstant(ShAmt - InnerShAmt, dl, ShiftVT);
1788              SDValue NewExt = TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT,
1789                                               InnerOp.getOperand(0));
1790              return TLO.CombineTo(
1791                  Op, TLO.DAG.getNode(ISD::SHL, dl, VT, NewExt, NewSA));
1792            }
1793          }
1794        }
1795      }
1796
1797      APInt InDemandedMask = DemandedBits.lshr(ShAmt);
1798      if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
1799                               Depth + 1)) {
1800        SDNodeFlags Flags = Op.getNode()->getFlags();
1801        if (Flags.hasNoSignedWrap() || Flags.hasNoUnsignedWrap()) {
1802          // Disable the nsw and nuw flags. We can no longer guarantee that we
1803          // won't wrap after simplification.
1804          Flags.setNoSignedWrap(false);
1805          Flags.setNoUnsignedWrap(false);
1806          Op->setFlags(Flags);
1807        }
1808        return true;
1809      }
1810      assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1811      Known.Zero <<= ShAmt;
1812      Known.One <<= ShAmt;
1813      // low bits known zero.
1814      Known.Zero.setLowBits(ShAmt);
1815
1816      // Attempt to avoid multi-use ops if we don't need anything from them.
1817      if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
1818        SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1819            Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
1820        if (DemandedOp0) {
1821          SDValue NewOp = TLO.DAG.getNode(ISD::SHL, dl, VT, DemandedOp0, Op1);
1822          return TLO.CombineTo(Op, NewOp);
1823        }
1824      }
1825
1826      // Try shrinking the operation as long as the shift amount will still be
1827      // in range.
1828      if ((ShAmt < DemandedBits.getActiveBits()) &&
1829          ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1830        return true;
1831
1832      // Narrow shift to lower half - similar to ShrinkDemandedOp.
1833      // (shl i64:x, K) -> (i64 zero_extend (shl (i32 (trunc i64:x)), K))
1834      // Only do this if we demand the upper half so the knownbits are correct.
1835      unsigned HalfWidth = BitWidth / 2;
1836      if ((BitWidth % 2) == 0 && !VT.isVector() && ShAmt < HalfWidth &&
1837          DemandedBits.countLeadingOnes() >= HalfWidth) {
1838        EVT HalfVT = EVT::getIntegerVT(*TLO.DAG.getContext(), HalfWidth);
1839        if (isNarrowingProfitable(VT, HalfVT) &&
1840            isTypeDesirableForOp(ISD::SHL, HalfVT) &&
1841            isTruncateFree(VT, HalfVT) && isZExtFree(HalfVT, VT) &&
1842            (!TLO.LegalOperations() || isOperationLegal(ISD::SHL, HalfVT))) {
1843          // If we're demanding the upper bits at all, we must ensure
1844          // that the upper bits of the shift result are known to be zero,
1845          // which is equivalent to the narrow shift being NUW.
1846          if (bool IsNUW = (Known.countMinLeadingZeros() >= HalfWidth)) {
1847            bool IsNSW = Known.countMinSignBits() > HalfWidth;
1848            SDNodeFlags Flags;
1849            Flags.setNoSignedWrap(IsNSW);
1850            Flags.setNoUnsignedWrap(IsNUW);
1851            SDValue NewOp = TLO.DAG.getNode(ISD::TRUNCATE, dl, HalfVT, Op0);
1852            SDValue NewShiftAmt = TLO.DAG.getShiftAmountConstant(
1853                ShAmt, HalfVT, dl, TLO.LegalTypes());
1854            SDValue NewShift = TLO.DAG.getNode(ISD::SHL, dl, HalfVT, NewOp,
1855                                               NewShiftAmt, Flags);
1856            SDValue NewExt =
1857                TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, NewShift);
1858            return TLO.CombineTo(Op, NewExt);
1859          }
1860        }
1861      }
1862    } else {
1863      // This is a variable shift, so we can't shift the demand mask by a known
1864      // amount. But if we are not demanding high bits, then we are not
1865      // demanding those bits from the pre-shifted operand either.
1866      if (unsigned CTLZ = DemandedBits.countl_zero()) {
1867        APInt DemandedFromOp(APInt::getLowBitsSet(BitWidth, BitWidth - CTLZ));
1868        if (SimplifyDemandedBits(Op0, DemandedFromOp, DemandedElts, Known, TLO,
1869                                 Depth + 1)) {
1870          SDNodeFlags Flags = Op.getNode()->getFlags();
1871          if (Flags.hasNoSignedWrap() || Flags.hasNoUnsignedWrap()) {
1872            // Disable the nsw and nuw flags. We can no longer guarantee that we
1873            // won't wrap after simplification.
1874            Flags.setNoSignedWrap(false);
1875            Flags.setNoUnsignedWrap(false);
1876            Op->setFlags(Flags);
1877          }
1878          return true;
1879        }
1880        Known.resetAll();
1881      }
1882    }
1883
1884    // If we are only demanding sign bits then we can use the shift source
1885    // directly.
1886    if (const APInt *MaxSA =
1887            TLO.DAG.getValidMaximumShiftAmountConstant(Op, DemandedElts)) {
1888      unsigned ShAmt = MaxSA->getZExtValue();
1889      unsigned NumSignBits =
1890          TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
1891      unsigned UpperDemandedBits = BitWidth - DemandedBits.countr_zero();
1892      if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
1893        return TLO.CombineTo(Op, Op0);
1894    }
1895    break;
1896  }
1897  case ISD::SRL: {
1898    SDValue Op0 = Op.getOperand(0);
1899    SDValue Op1 = Op.getOperand(1);
1900    EVT ShiftVT = Op1.getValueType();
1901
1902    // Try to match AVG patterns.
1903    if (SDValue AVG = combineShiftToAVG(Op, TLO.DAG, *this, DemandedBits,
1904                                        DemandedElts, Depth + 1))
1905      return TLO.CombineTo(Op, AVG);
1906
1907    if (const APInt *SA =
1908            TLO.DAG.getValidShiftAmountConstant(Op, DemandedElts)) {
1909      unsigned ShAmt = SA->getZExtValue();
1910      if (ShAmt == 0)
1911        return TLO.CombineTo(Op, Op0);
1912
1913      // If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
1914      // single shift.  We can do this if the top bits (which are shifted out)
1915      // are never demanded.
1916      // TODO - support non-uniform vector amounts.
1917      if (Op0.getOpcode() == ISD::SHL) {
1918        if (!DemandedBits.intersects(APInt::getHighBitsSet(BitWidth, ShAmt))) {
1919          if (const APInt *SA2 =
1920                  TLO.DAG.getValidShiftAmountConstant(Op0, DemandedElts)) {
1921            unsigned C1 = SA2->getZExtValue();
1922            unsigned Opc = ISD::SRL;
1923            int Diff = ShAmt - C1;
1924            if (Diff < 0) {
1925              Diff = -Diff;
1926              Opc = ISD::SHL;
1927            }
1928            SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
1929            return TLO.CombineTo(
1930                Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
1931          }
1932        }
1933      }
1934
1935      APInt InDemandedMask = (DemandedBits << ShAmt);
1936
1937      // If the shift is exact, then it does demand the low bits (and knows that
1938      // they are zero).
1939      if (Op->getFlags().hasExact())
1940        InDemandedMask.setLowBits(ShAmt);
1941
1942      // Narrow shift to lower half - similar to ShrinkDemandedOp.
1943      // (srl i64:x, K) -> (i64 zero_extend (srl (i32 (trunc i64:x)), K))
1944      if ((BitWidth % 2) == 0 && !VT.isVector()) {
1945        APInt HiBits = APInt::getHighBitsSet(BitWidth, BitWidth / 2);
1946        EVT HalfVT = EVT::getIntegerVT(*TLO.DAG.getContext(), BitWidth / 2);
1947        if (isNarrowingProfitable(VT, HalfVT) &&
1948            isTypeDesirableForOp(ISD::SRL, HalfVT) &&
1949            isTruncateFree(VT, HalfVT) && isZExtFree(HalfVT, VT) &&
1950            (!TLO.LegalOperations() || isOperationLegal(ISD::SRL, HalfVT)) &&
1951            ((InDemandedMask.countLeadingZeros() >= (BitWidth / 2)) ||
1952             TLO.DAG.MaskedValueIsZero(Op0, HiBits))) {
1953          SDValue NewOp = TLO.DAG.getNode(ISD::TRUNCATE, dl, HalfVT, Op0);
1954          SDValue NewShiftAmt = TLO.DAG.getShiftAmountConstant(
1955              ShAmt, HalfVT, dl, TLO.LegalTypes());
1956          SDValue NewShift =
1957              TLO.DAG.getNode(ISD::SRL, dl, HalfVT, NewOp, NewShiftAmt);
1958          return TLO.CombineTo(
1959              Op, TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, NewShift));
1960        }
1961      }
1962
1963      // Compute the new bits that are at the top now.
1964      if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
1965                               Depth + 1))
1966        return true;
1967      assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1968      Known.Zero.lshrInPlace(ShAmt);
1969      Known.One.lshrInPlace(ShAmt);
1970      // High bits known zero.
1971      Known.Zero.setHighBits(ShAmt);
1972
1973      // Attempt to avoid multi-use ops if we don't need anything from them.
1974      if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
1975        SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1976            Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
1977        if (DemandedOp0) {
1978          SDValue NewOp = TLO.DAG.getNode(ISD::SRL, dl, VT, DemandedOp0, Op1);
1979          return TLO.CombineTo(Op, NewOp);
1980        }
1981      }
1982    } else {
1983      // Use generic knownbits computation as it has support for non-uniform
1984      // shift amounts.
1985      Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1986    }
1987    break;
1988  }
1989  case ISD::SRA: {
1990    SDValue Op0 = Op.getOperand(0);
1991    SDValue Op1 = Op.getOperand(1);
1992    EVT ShiftVT = Op1.getValueType();
1993
1994    // If we only want bits that already match the signbit then we don't need
1995    // to shift.
1996    unsigned NumHiDemandedBits = BitWidth - DemandedBits.countr_zero();
1997    if (TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1) >=
1998        NumHiDemandedBits)
1999      return TLO.CombineTo(Op, Op0);
2000
2001    // If this is an arithmetic shift right and only the low-bit is set, we can
2002    // always convert this into a logical shr, even if the shift amount is
2003    // variable.  The low bit of the shift cannot be an input sign bit unless
2004    // the shift amount is >= the size of the datatype, which is undefined.
2005    if (DemandedBits.isOne())
2006      return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
2007
2008    // Try to match AVG patterns.
2009    if (SDValue AVG = combineShiftToAVG(Op, TLO.DAG, *this, DemandedBits,
2010                                        DemandedElts, Depth + 1))
2011      return TLO.CombineTo(Op, AVG);
2012
2013    if (const APInt *SA =
2014            TLO.DAG.getValidShiftAmountConstant(Op, DemandedElts)) {
2015      unsigned ShAmt = SA->getZExtValue();
2016      if (ShAmt == 0)
2017        return TLO.CombineTo(Op, Op0);
2018
2019      // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target
2020      // supports sext_inreg.
2021      if (Op0.getOpcode() == ISD::SHL) {
2022        if (const APInt *InnerSA =
2023                TLO.DAG.getValidShiftAmountConstant(Op0, DemandedElts)) {
2024          unsigned LowBits = BitWidth - ShAmt;
2025          EVT ExtVT = EVT::getIntegerVT(*TLO.DAG.getContext(), LowBits);
2026          if (VT.isVector())
2027            ExtVT = EVT::getVectorVT(*TLO.DAG.getContext(), ExtVT,
2028                                     VT.getVectorElementCount());
2029
2030          if (*InnerSA == ShAmt) {
2031            if (!TLO.LegalOperations() ||
2032                getOperationAction(ISD::SIGN_EXTEND_INREG, ExtVT) == Legal)
2033              return TLO.CombineTo(
2034                  Op, TLO.DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, VT,
2035                                      Op0.getOperand(0),
2036                                      TLO.DAG.getValueType(ExtVT)));
2037
2038            // Even if we can't convert to sext_inreg, we might be able to
2039            // remove this shift pair if the input is already sign extended.
2040            unsigned NumSignBits =
2041                TLO.DAG.ComputeNumSignBits(Op0.getOperand(0), DemandedElts);
2042            if (NumSignBits > ShAmt)
2043              return TLO.CombineTo(Op, Op0.getOperand(0));
2044          }
2045        }
2046      }
2047
2048      APInt InDemandedMask = (DemandedBits << ShAmt);
2049
2050      // If the shift is exact, then it does demand the low bits (and knows that
2051      // they are zero).
2052      if (Op->getFlags().hasExact())
2053        InDemandedMask.setLowBits(ShAmt);
2054
2055      // If any of the demanded bits are produced by the sign extension, we also
2056      // demand the input sign bit.
2057      if (DemandedBits.countl_zero() < ShAmt)
2058        InDemandedMask.setSignBit();
2059
2060      if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
2061                               Depth + 1))
2062        return true;
2063      assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2064      Known.Zero.lshrInPlace(ShAmt);
2065      Known.One.lshrInPlace(ShAmt);
2066
2067      // If the input sign bit is known to be zero, or if none of the top bits
2068      // are demanded, turn this into an unsigned shift right.
2069      if (Known.Zero[BitWidth - ShAmt - 1] ||
2070          DemandedBits.countl_zero() >= ShAmt) {
2071        SDNodeFlags Flags;
2072        Flags.setExact(Op->getFlags().hasExact());
2073        return TLO.CombineTo(
2074            Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1, Flags));
2075      }
2076
2077      int Log2 = DemandedBits.exactLogBase2();
2078      if (Log2 >= 0) {
2079        // The bit must come from the sign.
2080        SDValue NewSA = TLO.DAG.getConstant(BitWidth - 1 - Log2, dl, ShiftVT);
2081        return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, NewSA));
2082      }
2083
2084      if (Known.One[BitWidth - ShAmt - 1])
2085        // New bits are known one.
2086        Known.One.setHighBits(ShAmt);
2087
2088      // Attempt to avoid multi-use ops if we don't need anything from them.
2089      if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2090        SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2091            Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
2092        if (DemandedOp0) {
2093          SDValue NewOp = TLO.DAG.getNode(ISD::SRA, dl, VT, DemandedOp0, Op1);
2094          return TLO.CombineTo(Op, NewOp);
2095        }
2096      }
2097    }
2098    break;
2099  }
2100  case ISD::FSHL:
2101  case ISD::FSHR: {
2102    SDValue Op0 = Op.getOperand(0);
2103    SDValue Op1 = Op.getOperand(1);
2104    SDValue Op2 = Op.getOperand(2);
2105    bool IsFSHL = (Op.getOpcode() == ISD::FSHL);
2106
2107    if (ConstantSDNode *SA = isConstOrConstSplat(Op2, DemandedElts)) {
2108      unsigned Amt = SA->getAPIntValue().urem(BitWidth);
2109
2110      // For fshl, 0-shift returns the 1st arg.
2111      // For fshr, 0-shift returns the 2nd arg.
2112      if (Amt == 0) {
2113        if (SimplifyDemandedBits(IsFSHL ? Op0 : Op1, DemandedBits, DemandedElts,
2114                                 Known, TLO, Depth + 1))
2115          return true;
2116        break;
2117      }
2118
2119      // fshl: (Op0 << Amt) | (Op1 >> (BW - Amt))
2120      // fshr: (Op0 << (BW - Amt)) | (Op1 >> Amt)
2121      APInt Demanded0 = DemandedBits.lshr(IsFSHL ? Amt : (BitWidth - Amt));
2122      APInt Demanded1 = DemandedBits << (IsFSHL ? (BitWidth - Amt) : Amt);
2123      if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
2124                               Depth + 1))
2125        return true;
2126      if (SimplifyDemandedBits(Op1, Demanded1, DemandedElts, Known, TLO,
2127                               Depth + 1))
2128        return true;
2129
2130      Known2.One <<= (IsFSHL ? Amt : (BitWidth - Amt));
2131      Known2.Zero <<= (IsFSHL ? Amt : (BitWidth - Amt));
2132      Known.One.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt);
2133      Known.Zero.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt);
2134      Known = Known.unionWith(Known2);
2135
2136      // Attempt to avoid multi-use ops if we don't need anything from them.
2137      if (!Demanded0.isAllOnes() || !Demanded1.isAllOnes() ||
2138          !DemandedElts.isAllOnes()) {
2139        SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2140            Op0, Demanded0, DemandedElts, TLO.DAG, Depth + 1);
2141        SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
2142            Op1, Demanded1, DemandedElts, TLO.DAG, Depth + 1);
2143        if (DemandedOp0 || DemandedOp1) {
2144          DemandedOp0 = DemandedOp0 ? DemandedOp0 : Op0;
2145          DemandedOp1 = DemandedOp1 ? DemandedOp1 : Op1;
2146          SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedOp0,
2147                                          DemandedOp1, Op2);
2148          return TLO.CombineTo(Op, NewOp);
2149        }
2150      }
2151    }
2152
2153    // For pow-2 bitwidths we only demand the bottom modulo amt bits.
2154    if (isPowerOf2_32(BitWidth)) {
2155      APInt DemandedAmtBits(Op2.getScalarValueSizeInBits(), BitWidth - 1);
2156      if (SimplifyDemandedBits(Op2, DemandedAmtBits, DemandedElts,
2157                               Known2, TLO, Depth + 1))
2158        return true;
2159    }
2160    break;
2161  }
2162  case ISD::ROTL:
2163  case ISD::ROTR: {
2164    SDValue Op0 = Op.getOperand(0);
2165    SDValue Op1 = Op.getOperand(1);
2166    bool IsROTL = (Op.getOpcode() == ISD::ROTL);
2167
2168    // If we're rotating an 0/-1 value, then it stays an 0/-1 value.
2169    if (BitWidth == TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1))
2170      return TLO.CombineTo(Op, Op0);
2171
2172    if (ConstantSDNode *SA = isConstOrConstSplat(Op1, DemandedElts)) {
2173      unsigned Amt = SA->getAPIntValue().urem(BitWidth);
2174      unsigned RevAmt = BitWidth - Amt;
2175
2176      // rotl: (Op0 << Amt) | (Op0 >> (BW - Amt))
2177      // rotr: (Op0 << (BW - Amt)) | (Op0 >> Amt)
2178      APInt Demanded0 = DemandedBits.rotr(IsROTL ? Amt : RevAmt);
2179      if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
2180                               Depth + 1))
2181        return true;
2182
2183      // rot*(x, 0) --> x
2184      if (Amt == 0)
2185        return TLO.CombineTo(Op, Op0);
2186
2187      // See if we don't demand either half of the rotated bits.
2188      if ((!TLO.LegalOperations() || isOperationLegal(ISD::SHL, VT)) &&
2189          DemandedBits.countr_zero() >= (IsROTL ? Amt : RevAmt)) {
2190        Op1 = TLO.DAG.getConstant(IsROTL ? Amt : RevAmt, dl, Op1.getValueType());
2191        return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, Op1));
2192      }
2193      if ((!TLO.LegalOperations() || isOperationLegal(ISD::SRL, VT)) &&
2194          DemandedBits.countl_zero() >= (IsROTL ? RevAmt : Amt)) {
2195        Op1 = TLO.DAG.getConstant(IsROTL ? RevAmt : Amt, dl, Op1.getValueType());
2196        return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
2197      }
2198    }
2199
2200    // For pow-2 bitwidths we only demand the bottom modulo amt bits.
2201    if (isPowerOf2_32(BitWidth)) {
2202      APInt DemandedAmtBits(Op1.getScalarValueSizeInBits(), BitWidth - 1);
2203      if (SimplifyDemandedBits(Op1, DemandedAmtBits, DemandedElts, Known2, TLO,
2204                               Depth + 1))
2205        return true;
2206    }
2207    break;
2208  }
2209  case ISD::SMIN:
2210  case ISD::SMAX:
2211  case ISD::UMIN:
2212  case ISD::UMAX: {
2213    unsigned Opc = Op.getOpcode();
2214    SDValue Op0 = Op.getOperand(0);
2215    SDValue Op1 = Op.getOperand(1);
2216
2217    // If we're only demanding signbits, then we can simplify to OR/AND node.
2218    unsigned BitOp =
2219        (Opc == ISD::SMIN || Opc == ISD::UMAX) ? ISD::OR : ISD::AND;
2220    unsigned NumSignBits =
2221        std::min(TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1),
2222                 TLO.DAG.ComputeNumSignBits(Op1, DemandedElts, Depth + 1));
2223    unsigned NumDemandedUpperBits = BitWidth - DemandedBits.countr_zero();
2224    if (NumSignBits >= NumDemandedUpperBits)
2225      return TLO.CombineTo(Op, TLO.DAG.getNode(BitOp, SDLoc(Op), VT, Op0, Op1));
2226
2227    // Check if one arg is always less/greater than (or equal) to the other arg.
2228    KnownBits Known0 = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth + 1);
2229    KnownBits Known1 = TLO.DAG.computeKnownBits(Op1, DemandedElts, Depth + 1);
2230    switch (Opc) {
2231    case ISD::SMIN:
2232      if (std::optional<bool> IsSLE = KnownBits::sle(Known0, Known1))
2233        return TLO.CombineTo(Op, *IsSLE ? Op0 : Op1);
2234      if (std::optional<bool> IsSLT = KnownBits::slt(Known0, Known1))
2235        return TLO.CombineTo(Op, *IsSLT ? Op0 : Op1);
2236      Known = KnownBits::smin(Known0, Known1);
2237      break;
2238    case ISD::SMAX:
2239      if (std::optional<bool> IsSGE = KnownBits::sge(Known0, Known1))
2240        return TLO.CombineTo(Op, *IsSGE ? Op0 : Op1);
2241      if (std::optional<bool> IsSGT = KnownBits::sgt(Known0, Known1))
2242        return TLO.CombineTo(Op, *IsSGT ? Op0 : Op1);
2243      Known = KnownBits::smax(Known0, Known1);
2244      break;
2245    case ISD::UMIN:
2246      if (std::optional<bool> IsULE = KnownBits::ule(Known0, Known1))
2247        return TLO.CombineTo(Op, *IsULE ? Op0 : Op1);
2248      if (std::optional<bool> IsULT = KnownBits::ult(Known0, Known1))
2249        return TLO.CombineTo(Op, *IsULT ? Op0 : Op1);
2250      Known = KnownBits::umin(Known0, Known1);
2251      break;
2252    case ISD::UMAX:
2253      if (std::optional<bool> IsUGE = KnownBits::uge(Known0, Known1))
2254        return TLO.CombineTo(Op, *IsUGE ? Op0 : Op1);
2255      if (std::optional<bool> IsUGT = KnownBits::ugt(Known0, Known1))
2256        return TLO.CombineTo(Op, *IsUGT ? Op0 : Op1);
2257      Known = KnownBits::umax(Known0, Known1);
2258      break;
2259    }
2260    break;
2261  }
2262  case ISD::BITREVERSE: {
2263    SDValue Src = Op.getOperand(0);
2264    APInt DemandedSrcBits = DemandedBits.reverseBits();
2265    if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
2266                             Depth + 1))
2267      return true;
2268    Known.One = Known2.One.reverseBits();
2269    Known.Zero = Known2.Zero.reverseBits();
2270    break;
2271  }
2272  case ISD::BSWAP: {
2273    SDValue Src = Op.getOperand(0);
2274
2275    // If the only bits demanded come from one byte of the bswap result,
2276    // just shift the input byte into position to eliminate the bswap.
2277    unsigned NLZ = DemandedBits.countl_zero();
2278    unsigned NTZ = DemandedBits.countr_zero();
2279
2280    // Round NTZ down to the next byte.  If we have 11 trailing zeros, then
2281    // we need all the bits down to bit 8.  Likewise, round NLZ.  If we
2282    // have 14 leading zeros, round to 8.
2283    NLZ = alignDown(NLZ, 8);
2284    NTZ = alignDown(NTZ, 8);
2285    // If we need exactly one byte, we can do this transformation.
2286    if (BitWidth - NLZ - NTZ == 8) {
2287      // Replace this with either a left or right shift to get the byte into
2288      // the right place.
2289      unsigned ShiftOpcode = NLZ > NTZ ? ISD::SRL : ISD::SHL;
2290      if (!TLO.LegalOperations() || isOperationLegal(ShiftOpcode, VT)) {
2291        EVT ShiftAmtTy = getShiftAmountTy(VT, DL);
2292        unsigned ShiftAmount = NLZ > NTZ ? NLZ - NTZ : NTZ - NLZ;
2293        SDValue ShAmt = TLO.DAG.getConstant(ShiftAmount, dl, ShiftAmtTy);
2294        SDValue NewOp = TLO.DAG.getNode(ShiftOpcode, dl, VT, Src, ShAmt);
2295        return TLO.CombineTo(Op, NewOp);
2296      }
2297    }
2298
2299    APInt DemandedSrcBits = DemandedBits.byteSwap();
2300    if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
2301                             Depth + 1))
2302      return true;
2303    Known.One = Known2.One.byteSwap();
2304    Known.Zero = Known2.Zero.byteSwap();
2305    break;
2306  }
2307  case ISD::CTPOP: {
2308    // If only 1 bit is demanded, replace with PARITY as long as we're before
2309    // op legalization.
2310    // FIXME: Limit to scalars for now.
2311    if (DemandedBits.isOne() && !TLO.LegalOps && !VT.isVector())
2312      return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::PARITY, dl, VT,
2313                                               Op.getOperand(0)));
2314
2315    Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2316    break;
2317  }
2318  case ISD::SIGN_EXTEND_INREG: {
2319    SDValue Op0 = Op.getOperand(0);
2320    EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2321    unsigned ExVTBits = ExVT.getScalarSizeInBits();
2322
2323    // If we only care about the highest bit, don't bother shifting right.
2324    if (DemandedBits.isSignMask()) {
2325      unsigned MinSignedBits =
2326          TLO.DAG.ComputeMaxSignificantBits(Op0, DemandedElts, Depth + 1);
2327      bool AlreadySignExtended = ExVTBits >= MinSignedBits;
2328      // However if the input is already sign extended we expect the sign
2329      // extension to be dropped altogether later and do not simplify.
2330      if (!AlreadySignExtended) {
2331        // Compute the correct shift amount type, which must be getShiftAmountTy
2332        // for scalar types after legalization.
2333        SDValue ShiftAmt = TLO.DAG.getConstant(BitWidth - ExVTBits, dl,
2334                                               getShiftAmountTy(VT, DL));
2335        return TLO.CombineTo(Op,
2336                             TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, ShiftAmt));
2337      }
2338    }
2339
2340    // If none of the extended bits are demanded, eliminate the sextinreg.
2341    if (DemandedBits.getActiveBits() <= ExVTBits)
2342      return TLO.CombineTo(Op, Op0);
2343
2344    APInt InputDemandedBits = DemandedBits.getLoBits(ExVTBits);
2345
2346    // Since the sign extended bits are demanded, we know that the sign
2347    // bit is demanded.
2348    InputDemandedBits.setBit(ExVTBits - 1);
2349
2350    if (SimplifyDemandedBits(Op0, InputDemandedBits, DemandedElts, Known, TLO,
2351                             Depth + 1))
2352      return true;
2353    assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2354
2355    // If the sign bit of the input is known set or clear, then we know the
2356    // top bits of the result.
2357
2358    // If the input sign bit is known zero, convert this into a zero extension.
2359    if (Known.Zero[ExVTBits - 1])
2360      return TLO.CombineTo(Op, TLO.DAG.getZeroExtendInReg(Op0, dl, ExVT));
2361
2362    APInt Mask = APInt::getLowBitsSet(BitWidth, ExVTBits);
2363    if (Known.One[ExVTBits - 1]) { // Input sign bit known set
2364      Known.One.setBitsFrom(ExVTBits);
2365      Known.Zero &= Mask;
2366    } else { // Input sign bit unknown
2367      Known.Zero &= Mask;
2368      Known.One &= Mask;
2369    }
2370    break;
2371  }
2372  case ISD::BUILD_PAIR: {
2373    EVT HalfVT = Op.getOperand(0).getValueType();
2374    unsigned HalfBitWidth = HalfVT.getScalarSizeInBits();
2375
2376    APInt MaskLo = DemandedBits.getLoBits(HalfBitWidth).trunc(HalfBitWidth);
2377    APInt MaskHi = DemandedBits.getHiBits(HalfBitWidth).trunc(HalfBitWidth);
2378
2379    KnownBits KnownLo, KnownHi;
2380
2381    if (SimplifyDemandedBits(Op.getOperand(0), MaskLo, KnownLo, TLO, Depth + 1))
2382      return true;
2383
2384    if (SimplifyDemandedBits(Op.getOperand(1), MaskHi, KnownHi, TLO, Depth + 1))
2385      return true;
2386
2387    Known = KnownHi.concat(KnownLo);
2388    break;
2389  }
2390  case ISD::ZERO_EXTEND_VECTOR_INREG:
2391    if (VT.isScalableVector())
2392      return false;
2393    [[fallthrough]];
2394  case ISD::ZERO_EXTEND: {
2395    SDValue Src = Op.getOperand(0);
2396    EVT SrcVT = Src.getValueType();
2397    unsigned InBits = SrcVT.getScalarSizeInBits();
2398    unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2399    bool IsVecInReg = Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
2400
2401    // If none of the top bits are demanded, convert this into an any_extend.
2402    if (DemandedBits.getActiveBits() <= InBits) {
2403      // If we only need the non-extended bits of the bottom element
2404      // then we can just bitcast to the result.
2405      if (IsLE && IsVecInReg && DemandedElts == 1 &&
2406          VT.getSizeInBits() == SrcVT.getSizeInBits())
2407        return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2408
2409      unsigned Opc =
2410          IsVecInReg ? ISD::ANY_EXTEND_VECTOR_INREG : ISD::ANY_EXTEND;
2411      if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
2412        return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
2413    }
2414
2415    SDNodeFlags Flags = Op->getFlags();
2416    APInt InDemandedBits = DemandedBits.trunc(InBits);
2417    APInt InDemandedElts = DemandedElts.zext(InElts);
2418    if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2419                             Depth + 1)) {
2420      if (Flags.hasNonNeg()) {
2421        Flags.setNonNeg(false);
2422        Op->setFlags(Flags);
2423      }
2424      return true;
2425    }
2426    assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2427    assert(Known.getBitWidth() == InBits && "Src width has changed?");
2428    Known = Known.zext(BitWidth);
2429
2430    // Attempt to avoid multi-use ops if we don't need anything from them.
2431    if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2432            Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2433      return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2434    break;
2435  }
2436  case ISD::SIGN_EXTEND_VECTOR_INREG:
2437    if (VT.isScalableVector())
2438      return false;
2439    [[fallthrough]];
2440  case ISD::SIGN_EXTEND: {
2441    SDValue Src = Op.getOperand(0);
2442    EVT SrcVT = Src.getValueType();
2443    unsigned InBits = SrcVT.getScalarSizeInBits();
2444    unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2445    bool IsVecInReg = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG;
2446
2447    APInt InDemandedElts = DemandedElts.zext(InElts);
2448    APInt InDemandedBits = DemandedBits.trunc(InBits);
2449
2450    // Since some of the sign extended bits are demanded, we know that the sign
2451    // bit is demanded.
2452    InDemandedBits.setBit(InBits - 1);
2453
2454    // If none of the top bits are demanded, convert this into an any_extend.
2455    if (DemandedBits.getActiveBits() <= InBits) {
2456      // If we only need the non-extended bits of the bottom element
2457      // then we can just bitcast to the result.
2458      if (IsLE && IsVecInReg && DemandedElts == 1 &&
2459          VT.getSizeInBits() == SrcVT.getSizeInBits())
2460        return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2461
2462      // Don't lose an all signbits 0/-1 splat on targets with 0/-1 booleans.
2463      if (getBooleanContents(VT) != ZeroOrNegativeOneBooleanContent ||
2464          TLO.DAG.ComputeNumSignBits(Src, InDemandedElts, Depth + 1) !=
2465              InBits) {
2466        unsigned Opc =
2467            IsVecInReg ? ISD::ANY_EXTEND_VECTOR_INREG : ISD::ANY_EXTEND;
2468        if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
2469          return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
2470      }
2471    }
2472
2473    if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2474                             Depth + 1))
2475      return true;
2476    assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2477    assert(Known.getBitWidth() == InBits && "Src width has changed?");
2478
2479    // If the sign bit is known one, the top bits match.
2480    Known = Known.sext(BitWidth);
2481
2482    // If the sign bit is known zero, convert this to a zero extend.
2483    if (Known.isNonNegative()) {
2484      unsigned Opc =
2485          IsVecInReg ? ISD::ZERO_EXTEND_VECTOR_INREG : ISD::ZERO_EXTEND;
2486      if (!TLO.LegalOperations() || isOperationLegal(Opc, VT)) {
2487        SDNodeFlags Flags;
2488        if (!IsVecInReg)
2489          Flags.setNonNeg(true);
2490        return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src, Flags));
2491      }
2492    }
2493
2494    // Attempt to avoid multi-use ops if we don't need anything from them.
2495    if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2496            Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2497      return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2498    break;
2499  }
2500  case ISD::ANY_EXTEND_VECTOR_INREG:
2501    if (VT.isScalableVector())
2502      return false;
2503    [[fallthrough]];
2504  case ISD::ANY_EXTEND: {
2505    SDValue Src = Op.getOperand(0);
2506    EVT SrcVT = Src.getValueType();
2507    unsigned InBits = SrcVT.getScalarSizeInBits();
2508    unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2509    bool IsVecInReg = Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG;
2510
2511    // If we only need the bottom element then we can just bitcast.
2512    // TODO: Handle ANY_EXTEND?
2513    if (IsLE && IsVecInReg && DemandedElts == 1 &&
2514        VT.getSizeInBits() == SrcVT.getSizeInBits())
2515      return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2516
2517    APInt InDemandedBits = DemandedBits.trunc(InBits);
2518    APInt InDemandedElts = DemandedElts.zext(InElts);
2519    if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2520                             Depth + 1))
2521      return true;
2522    assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2523    assert(Known.getBitWidth() == InBits && "Src width has changed?");
2524    Known = Known.anyext(BitWidth);
2525
2526    // Attempt to avoid multi-use ops if we don't need anything from them.
2527    if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2528            Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2529      return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2530    break;
2531  }
2532  case ISD::TRUNCATE: {
2533    SDValue Src = Op.getOperand(0);
2534
2535    // Simplify the input, using demanded bit information, and compute the known
2536    // zero/one bits live out.
2537    unsigned OperandBitWidth = Src.getScalarValueSizeInBits();
2538    APInt TruncMask = DemandedBits.zext(OperandBitWidth);
2539    if (SimplifyDemandedBits(Src, TruncMask, DemandedElts, Known, TLO,
2540                             Depth + 1))
2541      return true;
2542    Known = Known.trunc(BitWidth);
2543
2544    // Attempt to avoid multi-use ops if we don't need anything from them.
2545    if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2546            Src, TruncMask, DemandedElts, TLO.DAG, Depth + 1))
2547      return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, NewSrc));
2548
2549    // If the input is only used by this truncate, see if we can shrink it based
2550    // on the known demanded bits.
2551    switch (Src.getOpcode()) {
2552    default:
2553      break;
2554    case ISD::SRL:
2555      // Shrink SRL by a constant if none of the high bits shifted in are
2556      // demanded.
2557      if (TLO.LegalTypes() && !isTypeDesirableForOp(ISD::SRL, VT))
2558        // Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is
2559        // undesirable.
2560        break;
2561
2562      if (Src.getNode()->hasOneUse()) {
2563        const APInt *ShAmtC =
2564            TLO.DAG.getValidShiftAmountConstant(Src, DemandedElts);
2565        if (!ShAmtC || ShAmtC->uge(BitWidth))
2566          break;
2567        uint64_t ShVal = ShAmtC->getZExtValue();
2568
2569        APInt HighBits =
2570            APInt::getHighBitsSet(OperandBitWidth, OperandBitWidth - BitWidth);
2571        HighBits.lshrInPlace(ShVal);
2572        HighBits = HighBits.trunc(BitWidth);
2573
2574        if (!(HighBits & DemandedBits)) {
2575          // None of the shifted in bits are needed.  Add a truncate of the
2576          // shift input, then shift it.
2577          SDValue NewShAmt = TLO.DAG.getConstant(
2578              ShVal, dl, getShiftAmountTy(VT, DL, TLO.LegalTypes()));
2579          SDValue NewTrunc =
2580              TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, Src.getOperand(0));
2581          return TLO.CombineTo(
2582              Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc, NewShAmt));
2583        }
2584      }
2585      break;
2586    }
2587
2588    assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2589    break;
2590  }
2591  case ISD::AssertZext: {
2592    // AssertZext demands all of the high bits, plus any of the low bits
2593    // demanded by its users.
2594    EVT ZVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2595    APInt InMask = APInt::getLowBitsSet(BitWidth, ZVT.getSizeInBits());
2596    if (SimplifyDemandedBits(Op.getOperand(0), ~InMask | DemandedBits, Known,
2597                             TLO, Depth + 1))
2598      return true;
2599    assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2600
2601    Known.Zero |= ~InMask;
2602    Known.One &= (~Known.Zero);
2603    break;
2604  }
2605  case ISD::EXTRACT_VECTOR_ELT: {
2606    SDValue Src = Op.getOperand(0);
2607    SDValue Idx = Op.getOperand(1);
2608    ElementCount SrcEltCnt = Src.getValueType().getVectorElementCount();
2609    unsigned EltBitWidth = Src.getScalarValueSizeInBits();
2610
2611    if (SrcEltCnt.isScalable())
2612      return false;
2613
2614    // Demand the bits from every vector element without a constant index.
2615    unsigned NumSrcElts = SrcEltCnt.getFixedValue();
2616    APInt DemandedSrcElts = APInt::getAllOnes(NumSrcElts);
2617    if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx))
2618      if (CIdx->getAPIntValue().ult(NumSrcElts))
2619        DemandedSrcElts = APInt::getOneBitSet(NumSrcElts, CIdx->getZExtValue());
2620
2621    // If BitWidth > EltBitWidth the value is anyext:ed. So we do not know
2622    // anything about the extended bits.
2623    APInt DemandedSrcBits = DemandedBits;
2624    if (BitWidth > EltBitWidth)
2625      DemandedSrcBits = DemandedSrcBits.trunc(EltBitWidth);
2626
2627    if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts, Known2, TLO,
2628                             Depth + 1))
2629      return true;
2630
2631    // Attempt to avoid multi-use ops if we don't need anything from them.
2632    if (!DemandedSrcBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
2633      if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
2634              Src, DemandedSrcBits, DemandedSrcElts, TLO.DAG, Depth + 1)) {
2635        SDValue NewOp =
2636            TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc, Idx);
2637        return TLO.CombineTo(Op, NewOp);
2638      }
2639    }
2640
2641    Known = Known2;
2642    if (BitWidth > EltBitWidth)
2643      Known = Known.anyext(BitWidth);
2644    break;
2645  }
2646  case ISD::BITCAST: {
2647    if (VT.isScalableVector())
2648      return false;
2649    SDValue Src = Op.getOperand(0);
2650    EVT SrcVT = Src.getValueType();
2651    unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
2652
2653    // If this is an FP->Int bitcast and if the sign bit is the only
2654    // thing demanded, turn this into a FGETSIGN.
2655    if (!TLO.LegalOperations() && !VT.isVector() && !SrcVT.isVector() &&
2656        DemandedBits == APInt::getSignMask(Op.getValueSizeInBits()) &&
2657        SrcVT.isFloatingPoint()) {
2658      bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, VT);
2659      bool i32Legal = isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32);
2660      if ((OpVTLegal || i32Legal) && VT.isSimple() && SrcVT != MVT::f16 &&
2661          SrcVT != MVT::f128) {
2662        // Cannot eliminate/lower SHL for f128 yet.
2663        EVT Ty = OpVTLegal ? VT : MVT::i32;
2664        // Make a FGETSIGN + SHL to move the sign bit into the appropriate
2665        // place.  We expect the SHL to be eliminated by other optimizations.
2666        SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, dl, Ty, Src);
2667        unsigned OpVTSizeInBits = Op.getValueSizeInBits();
2668        if (!OpVTLegal && OpVTSizeInBits > 32)
2669          Sign = TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Sign);
2670        unsigned ShVal = Op.getValueSizeInBits() - 1;
2671        SDValue ShAmt = TLO.DAG.getConstant(ShVal, dl, VT);
2672        return TLO.CombineTo(Op,
2673                             TLO.DAG.getNode(ISD::SHL, dl, VT, Sign, ShAmt));
2674      }
2675    }
2676
2677    // Bitcast from a vector using SimplifyDemanded Bits/VectorElts.
2678    // Demand the elt/bit if any of the original elts/bits are demanded.
2679    if (SrcVT.isVector() && (BitWidth % NumSrcEltBits) == 0) {
2680      unsigned Scale = BitWidth / NumSrcEltBits;
2681      unsigned NumSrcElts = SrcVT.getVectorNumElements();
2682      APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
2683      APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
2684      for (unsigned i = 0; i != Scale; ++i) {
2685        unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
2686        unsigned BitOffset = EltOffset * NumSrcEltBits;
2687        APInt Sub = DemandedBits.extractBits(NumSrcEltBits, BitOffset);
2688        if (!Sub.isZero()) {
2689          DemandedSrcBits |= Sub;
2690          for (unsigned j = 0; j != NumElts; ++j)
2691            if (DemandedElts[j])
2692              DemandedSrcElts.setBit((j * Scale) + i);
2693        }
2694      }
2695
2696      APInt KnownSrcUndef, KnownSrcZero;
2697      if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
2698                                     KnownSrcZero, TLO, Depth + 1))
2699        return true;
2700
2701      KnownBits KnownSrcBits;
2702      if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
2703                               KnownSrcBits, TLO, Depth + 1))
2704        return true;
2705    } else if (IsLE && (NumSrcEltBits % BitWidth) == 0) {
2706      // TODO - bigendian once we have test coverage.
2707      unsigned Scale = NumSrcEltBits / BitWidth;
2708      unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
2709      APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
2710      APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
2711      for (unsigned i = 0; i != NumElts; ++i)
2712        if (DemandedElts[i]) {
2713          unsigned Offset = (i % Scale) * BitWidth;
2714          DemandedSrcBits.insertBits(DemandedBits, Offset);
2715          DemandedSrcElts.setBit(i / Scale);
2716        }
2717
2718      if (SrcVT.isVector()) {
2719        APInt KnownSrcUndef, KnownSrcZero;
2720        if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
2721                                       KnownSrcZero, TLO, Depth + 1))
2722          return true;
2723      }
2724
2725      KnownBits KnownSrcBits;
2726      if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
2727                               KnownSrcBits, TLO, Depth + 1))
2728        return true;
2729
2730      // Attempt to avoid multi-use ops if we don't need anything from them.
2731      if (!DemandedSrcBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
2732        if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
2733                Src, DemandedSrcBits, DemandedSrcElts, TLO.DAG, Depth + 1)) {
2734          SDValue NewOp = TLO.DAG.getBitcast(VT, DemandedSrc);
2735          return TLO.CombineTo(Op, NewOp);
2736        }
2737      }
2738    }
2739
2740    // If this is a bitcast, let computeKnownBits handle it.  Only do this on a
2741    // recursive call where Known may be useful to the caller.
2742    if (Depth > 0) {
2743      Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2744      return false;
2745    }
2746    break;
2747  }
2748  case ISD::MUL:
2749    if (DemandedBits.isPowerOf2()) {
2750      // The LSB of X*Y is set only if (X & 1) == 1 and (Y & 1) == 1.
2751      // If we demand exactly one bit N and we have "X * (C' << N)" where C' is
2752      // odd (has LSB set), then the left-shifted low bit of X is the answer.
2753      unsigned CTZ = DemandedBits.countr_zero();
2754      ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(1), DemandedElts);
2755      if (C && C->getAPIntValue().countr_zero() == CTZ) {
2756        EVT ShiftAmtTy = getShiftAmountTy(VT, TLO.DAG.getDataLayout());
2757        SDValue AmtC = TLO.DAG.getConstant(CTZ, dl, ShiftAmtTy);
2758        SDValue Shl = TLO.DAG.getNode(ISD::SHL, dl, VT, Op.getOperand(0), AmtC);
2759        return TLO.CombineTo(Op, Shl);
2760      }
2761    }
2762    // For a squared value "X * X", the bottom 2 bits are 0 and X[0] because:
2763    // X * X is odd iff X is odd.
2764    // 'Quadratic Reciprocity': X * X -> 0 for bit[1]
2765    if (Op.getOperand(0) == Op.getOperand(1) && DemandedBits.ult(4)) {
2766      SDValue One = TLO.DAG.getConstant(1, dl, VT);
2767      SDValue And1 = TLO.DAG.getNode(ISD::AND, dl, VT, Op.getOperand(0), One);
2768      return TLO.CombineTo(Op, And1);
2769    }
2770    [[fallthrough]];
2771  case ISD::ADD:
2772  case ISD::SUB: {
2773    // Add, Sub, and Mul don't demand any bits in positions beyond that
2774    // of the highest bit demanded of them.
2775    SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);
2776    SDNodeFlags Flags = Op.getNode()->getFlags();
2777    unsigned DemandedBitsLZ = DemandedBits.countl_zero();
2778    APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - DemandedBitsLZ);
2779    KnownBits KnownOp0, KnownOp1;
2780    if (SimplifyDemandedBits(Op0, LoMask, DemandedElts, KnownOp0, TLO,
2781                             Depth + 1) ||
2782        SimplifyDemandedBits(Op1, LoMask, DemandedElts, KnownOp1, TLO,
2783                             Depth + 1) ||
2784        // See if the operation should be performed at a smaller bit width.
2785        ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) {
2786      if (Flags.hasNoSignedWrap() || Flags.hasNoUnsignedWrap()) {
2787        // Disable the nsw and nuw flags. We can no longer guarantee that we
2788        // won't wrap after simplification.
2789        Flags.setNoSignedWrap(false);
2790        Flags.setNoUnsignedWrap(false);
2791        Op->setFlags(Flags);
2792      }
2793      return true;
2794    }
2795
2796    // neg x with only low bit demanded is simply x.
2797    if (Op.getOpcode() == ISD::SUB && DemandedBits.isOne() &&
2798        isNullConstant(Op0))
2799      return TLO.CombineTo(Op, Op1);
2800
2801    // Attempt to avoid multi-use ops if we don't need anything from them.
2802    if (!LoMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2803      SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2804          Op0, LoMask, DemandedElts, TLO.DAG, Depth + 1);
2805      SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
2806          Op1, LoMask, DemandedElts, TLO.DAG, Depth + 1);
2807      if (DemandedOp0 || DemandedOp1) {
2808        Flags.setNoSignedWrap(false);
2809        Flags.setNoUnsignedWrap(false);
2810        Op0 = DemandedOp0 ? DemandedOp0 : Op0;
2811        Op1 = DemandedOp1 ? DemandedOp1 : Op1;
2812        SDValue NewOp =
2813            TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1, Flags);
2814        return TLO.CombineTo(Op, NewOp);
2815      }
2816    }
2817
2818    // If we have a constant operand, we may be able to turn it into -1 if we
2819    // do not demand the high bits. This can make the constant smaller to
2820    // encode, allow more general folding, or match specialized instruction
2821    // patterns (eg, 'blsr' on x86). Don't bother changing 1 to -1 because that
2822    // is probably not useful (and could be detrimental).
2823    ConstantSDNode *C = isConstOrConstSplat(Op1);
2824    APInt HighMask = APInt::getHighBitsSet(BitWidth, DemandedBitsLZ);
2825    if (C && !C->isAllOnes() && !C->isOne() &&
2826        (C->getAPIntValue() | HighMask).isAllOnes()) {
2827      SDValue Neg1 = TLO.DAG.getAllOnesConstant(dl, VT);
2828      // Disable the nsw and nuw flags. We can no longer guarantee that we
2829      // won't wrap after simplification.
2830      Flags.setNoSignedWrap(false);
2831      Flags.setNoUnsignedWrap(false);
2832      SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Neg1, Flags);
2833      return TLO.CombineTo(Op, NewOp);
2834    }
2835
2836    // Match a multiply with a disguised negated-power-of-2 and convert to a
2837    // an equivalent shift-left amount.
2838    // Example: (X * MulC) + Op1 --> Op1 - (X << log2(-MulC))
2839    auto getShiftLeftAmt = [&HighMask](SDValue Mul) -> unsigned {
2840      if (Mul.getOpcode() != ISD::MUL || !Mul.hasOneUse())
2841        return 0;
2842
2843      // Don't touch opaque constants. Also, ignore zero and power-of-2
2844      // multiplies. Those will get folded later.
2845      ConstantSDNode *MulC = isConstOrConstSplat(Mul.getOperand(1));
2846      if (MulC && !MulC->isOpaque() && !MulC->isZero() &&
2847          !MulC->getAPIntValue().isPowerOf2()) {
2848        APInt UnmaskedC = MulC->getAPIntValue() | HighMask;
2849        if (UnmaskedC.isNegatedPowerOf2())
2850          return (-UnmaskedC).logBase2();
2851      }
2852      return 0;
2853    };
2854
2855    auto foldMul = [&](ISD::NodeType NT, SDValue X, SDValue Y, unsigned ShlAmt) {
2856      EVT ShiftAmtTy = getShiftAmountTy(VT, TLO.DAG.getDataLayout());
2857      SDValue ShlAmtC = TLO.DAG.getConstant(ShlAmt, dl, ShiftAmtTy);
2858      SDValue Shl = TLO.DAG.getNode(ISD::SHL, dl, VT, X, ShlAmtC);
2859      SDValue Res = TLO.DAG.getNode(NT, dl, VT, Y, Shl);
2860      return TLO.CombineTo(Op, Res);
2861    };
2862
2863    if (isOperationLegalOrCustom(ISD::SHL, VT)) {
2864      if (Op.getOpcode() == ISD::ADD) {
2865        // (X * MulC) + Op1 --> Op1 - (X << log2(-MulC))
2866        if (unsigned ShAmt = getShiftLeftAmt(Op0))
2867          return foldMul(ISD::SUB, Op0.getOperand(0), Op1, ShAmt);
2868        // Op0 + (X * MulC) --> Op0 - (X << log2(-MulC))
2869        if (unsigned ShAmt = getShiftLeftAmt(Op1))
2870          return foldMul(ISD::SUB, Op1.getOperand(0), Op0, ShAmt);
2871      }
2872      if (Op.getOpcode() == ISD::SUB) {
2873        // Op0 - (X * MulC) --> Op0 + (X << log2(-MulC))
2874        if (unsigned ShAmt = getShiftLeftAmt(Op1))
2875          return foldMul(ISD::ADD, Op1.getOperand(0), Op0, ShAmt);
2876      }
2877    }
2878
2879    if (Op.getOpcode() == ISD::MUL) {
2880      Known = KnownBits::mul(KnownOp0, KnownOp1);
2881    } else { // Op.getOpcode() is either ISD::ADD or ISD::SUB.
2882      Known = KnownBits::computeForAddSub(Op.getOpcode() == ISD::ADD,
2883                                          Flags.hasNoSignedWrap(), KnownOp0,
2884                                          KnownOp1);
2885    }
2886    break;
2887  }
2888  default:
2889    // We also ask the target about intrinsics (which could be specific to it).
2890    if (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
2891        Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
2892      // TODO: Probably okay to remove after audit; here to reduce change size
2893      // in initial enablement patch for scalable vectors
2894      if (Op.getValueType().isScalableVector())
2895        break;
2896      if (SimplifyDemandedBitsForTargetNode(Op, DemandedBits, DemandedElts,
2897                                            Known, TLO, Depth))
2898        return true;
2899      break;
2900    }
2901
2902    // Just use computeKnownBits to compute output bits.
2903    Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2904    break;
2905  }
2906
2907  // If we know the value of all of the demanded bits, return this as a
2908  // constant.
2909  if (!isTargetCanonicalConstantNode(Op) &&
2910      DemandedBits.isSubsetOf(Known.Zero | Known.One)) {
2911    // Avoid folding to a constant if any OpaqueConstant is involved.
2912    const SDNode *N = Op.getNode();
2913    for (SDNode *Op :
2914         llvm::make_range(SDNodeIterator::begin(N), SDNodeIterator::end(N))) {
2915      if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op))
2916        if (C->isOpaque())
2917          return false;
2918    }
2919    if (VT.isInteger())
2920      return TLO.CombineTo(Op, TLO.DAG.getConstant(Known.One, dl, VT));
2921    if (VT.isFloatingPoint())
2922      return TLO.CombineTo(
2923          Op,
2924          TLO.DAG.getConstantFP(
2925              APFloat(TLO.DAG.EVTToAPFloatSemantics(VT), Known.One), dl, VT));
2926  }
2927
2928  // A multi use 'all demanded elts' simplify failed to find any knownbits.
2929  // Try again just for the original demanded elts.
2930  // Ensure we do this AFTER constant folding above.
2931  if (HasMultiUse && Known.isUnknown() && !OriginalDemandedElts.isAllOnes())
2932    Known = TLO.DAG.computeKnownBits(Op, OriginalDemandedElts, Depth);
2933
2934  return false;
2935}
2936
2937bool TargetLowering::SimplifyDemandedVectorElts(SDValue Op,
2938                                                const APInt &DemandedElts,
2939                                                DAGCombinerInfo &DCI) const {
2940  SelectionDAG &DAG = DCI.DAG;
2941  TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
2942                        !DCI.isBeforeLegalizeOps());
2943
2944  APInt KnownUndef, KnownZero;
2945  bool Simplified =
2946      SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero, TLO);
2947  if (Simplified) {
2948    DCI.AddToWorklist(Op.getNode());
2949    DCI.CommitTargetLoweringOpt(TLO);
2950  }
2951
2952  return Simplified;
2953}
2954
2955/// Given a vector binary operation and known undefined elements for each input
2956/// operand, compute whether each element of the output is undefined.
2957static APInt getKnownUndefForVectorBinop(SDValue BO, SelectionDAG &DAG,
2958                                         const APInt &UndefOp0,
2959                                         const APInt &UndefOp1) {
2960  EVT VT = BO.getValueType();
2961  assert(DAG.getTargetLoweringInfo().isBinOp(BO.getOpcode()) && VT.isVector() &&
2962         "Vector binop only");
2963
2964  EVT EltVT = VT.getVectorElementType();
2965  unsigned NumElts = VT.isFixedLengthVector() ? VT.getVectorNumElements() : 1;
2966  assert(UndefOp0.getBitWidth() == NumElts &&
2967         UndefOp1.getBitWidth() == NumElts && "Bad type for undef analysis");
2968
2969  auto getUndefOrConstantElt = [&](SDValue V, unsigned Index,
2970                                   const APInt &UndefVals) {
2971    if (UndefVals[Index])
2972      return DAG.getUNDEF(EltVT);
2973
2974    if (auto *BV = dyn_cast<BuildVectorSDNode>(V)) {
2975      // Try hard to make sure that the getNode() call is not creating temporary
2976      // nodes. Ignore opaque integers because they do not constant fold.
2977      SDValue Elt = BV->getOperand(Index);
2978      auto *C = dyn_cast<ConstantSDNode>(Elt);
2979      if (isa<ConstantFPSDNode>(Elt) || Elt.isUndef() || (C && !C->isOpaque()))
2980        return Elt;
2981    }
2982
2983    return SDValue();
2984  };
2985
2986  APInt KnownUndef = APInt::getZero(NumElts);
2987  for (unsigned i = 0; i != NumElts; ++i) {
2988    // If both inputs for this element are either constant or undef and match
2989    // the element type, compute the constant/undef result for this element of
2990    // the vector.
2991    // TODO: Ideally we would use FoldConstantArithmetic() here, but that does
2992    // not handle FP constants. The code within getNode() should be refactored
2993    // to avoid the danger of creating a bogus temporary node here.
2994    SDValue C0 = getUndefOrConstantElt(BO.getOperand(0), i, UndefOp0);
2995    SDValue C1 = getUndefOrConstantElt(BO.getOperand(1), i, UndefOp1);
2996    if (C0 && C1 && C0.getValueType() == EltVT && C1.getValueType() == EltVT)
2997      if (DAG.getNode(BO.getOpcode(), SDLoc(BO), EltVT, C0, C1).isUndef())
2998        KnownUndef.setBit(i);
2999  }
3000  return KnownUndef;
3001}
3002
3003bool TargetLowering::SimplifyDemandedVectorElts(
3004    SDValue Op, const APInt &OriginalDemandedElts, APInt &KnownUndef,
3005    APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth,
3006    bool AssumeSingleUse) const {
3007  EVT VT = Op.getValueType();
3008  unsigned Opcode = Op.getOpcode();
3009  APInt DemandedElts = OriginalDemandedElts;
3010  unsigned NumElts = DemandedElts.getBitWidth();
3011  assert(VT.isVector() && "Expected vector op");
3012
3013  KnownUndef = KnownZero = APInt::getZero(NumElts);
3014
3015  const TargetLowering &TLI = TLO.DAG.getTargetLoweringInfo();
3016  if (!TLI.shouldSimplifyDemandedVectorElts(Op, TLO))
3017    return false;
3018
3019  // TODO: For now we assume we know nothing about scalable vectors.
3020  if (VT.isScalableVector())
3021    return false;
3022
3023  assert(VT.getVectorNumElements() == NumElts &&
3024         "Mask size mismatches value type element count!");
3025
3026  // Undef operand.
3027  if (Op.isUndef()) {
3028    KnownUndef.setAllBits();
3029    return false;
3030  }
3031
3032  // If Op has other users, assume that all elements are needed.
3033  if (!AssumeSingleUse && !Op.getNode()->hasOneUse())
3034    DemandedElts.setAllBits();
3035
3036  // Not demanding any elements from Op.
3037  if (DemandedElts == 0) {
3038    KnownUndef.setAllBits();
3039    return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
3040  }
3041
3042  // Limit search depth.
3043  if (Depth >= SelectionDAG::MaxRecursionDepth)
3044    return false;
3045
3046  SDLoc DL(Op);
3047  unsigned EltSizeInBits = VT.getScalarSizeInBits();
3048  bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
3049
3050  // Helper for demanding the specified elements and all the bits of both binary
3051  // operands.
3052  auto SimplifyDemandedVectorEltsBinOp = [&](SDValue Op0, SDValue Op1) {
3053    SDValue NewOp0 = SimplifyMultipleUseDemandedVectorElts(Op0, DemandedElts,
3054                                                           TLO.DAG, Depth + 1);
3055    SDValue NewOp1 = SimplifyMultipleUseDemandedVectorElts(Op1, DemandedElts,
3056                                                           TLO.DAG, Depth + 1);
3057    if (NewOp0 || NewOp1) {
3058      SDValue NewOp =
3059          TLO.DAG.getNode(Opcode, SDLoc(Op), VT, NewOp0 ? NewOp0 : Op0,
3060                          NewOp1 ? NewOp1 : Op1, Op->getFlags());
3061      return TLO.CombineTo(Op, NewOp);
3062    }
3063    return false;
3064  };
3065
3066  switch (Opcode) {
3067  case ISD::SCALAR_TO_VECTOR: {
3068    if (!DemandedElts[0]) {
3069      KnownUndef.setAllBits();
3070      return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
3071    }
3072    SDValue ScalarSrc = Op.getOperand(0);
3073    if (ScalarSrc.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
3074      SDValue Src = ScalarSrc.getOperand(0);
3075      SDValue Idx = ScalarSrc.getOperand(1);
3076      EVT SrcVT = Src.getValueType();
3077
3078      ElementCount SrcEltCnt = SrcVT.getVectorElementCount();
3079
3080      if (SrcEltCnt.isScalable())
3081        return false;
3082
3083      unsigned NumSrcElts = SrcEltCnt.getFixedValue();
3084      if (isNullConstant(Idx)) {
3085        APInt SrcDemandedElts = APInt::getOneBitSet(NumSrcElts, 0);
3086        APInt SrcUndef = KnownUndef.zextOrTrunc(NumSrcElts);
3087        APInt SrcZero = KnownZero.zextOrTrunc(NumSrcElts);
3088        if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
3089                                       TLO, Depth + 1))
3090          return true;
3091      }
3092    }
3093    KnownUndef.setHighBits(NumElts - 1);
3094    break;
3095  }
3096  case ISD::BITCAST: {
3097    SDValue Src = Op.getOperand(0);
3098    EVT SrcVT = Src.getValueType();
3099
3100    // We only handle vectors here.
3101    // TODO - investigate calling SimplifyDemandedBits/ComputeKnownBits?
3102    if (!SrcVT.isVector())
3103      break;
3104
3105    // Fast handling of 'identity' bitcasts.
3106    unsigned NumSrcElts = SrcVT.getVectorNumElements();
3107    if (NumSrcElts == NumElts)
3108      return SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef,
3109                                        KnownZero, TLO, Depth + 1);
3110
3111    APInt SrcDemandedElts, SrcZero, SrcUndef;
3112
3113    // Bitcast from 'large element' src vector to 'small element' vector, we
3114    // must demand a source element if any DemandedElt maps to it.
3115    if ((NumElts % NumSrcElts) == 0) {
3116      unsigned Scale = NumElts / NumSrcElts;
3117      SrcDemandedElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
3118      if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
3119                                     TLO, Depth + 1))
3120        return true;
3121
3122      // Try calling SimplifyDemandedBits, converting demanded elts to the bits
3123      // of the large element.
3124      // TODO - bigendian once we have test coverage.
3125      if (IsLE) {
3126        unsigned SrcEltSizeInBits = SrcVT.getScalarSizeInBits();
3127        APInt SrcDemandedBits = APInt::getZero(SrcEltSizeInBits);
3128        for (unsigned i = 0; i != NumElts; ++i)
3129          if (DemandedElts[i]) {
3130            unsigned Ofs = (i % Scale) * EltSizeInBits;
3131            SrcDemandedBits.setBits(Ofs, Ofs + EltSizeInBits);
3132          }
3133
3134        KnownBits Known;
3135        if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcDemandedElts, Known,
3136                                 TLO, Depth + 1))
3137          return true;
3138
3139        // The bitcast has split each wide element into a number of
3140        // narrow subelements. We have just computed the Known bits
3141        // for wide elements. See if element splitting results in
3142        // some subelements being zero. Only for demanded elements!
3143        for (unsigned SubElt = 0; SubElt != Scale; ++SubElt) {
3144          if (!Known.Zero.extractBits(EltSizeInBits, SubElt * EltSizeInBits)
3145                   .isAllOnes())
3146            continue;
3147          for (unsigned SrcElt = 0; SrcElt != NumSrcElts; ++SrcElt) {
3148            unsigned Elt = Scale * SrcElt + SubElt;
3149            if (DemandedElts[Elt])
3150              KnownZero.setBit(Elt);
3151          }
3152        }
3153      }
3154
3155      // If the src element is zero/undef then all the output elements will be -
3156      // only demanded elements are guaranteed to be correct.
3157      for (unsigned i = 0; i != NumSrcElts; ++i) {
3158        if (SrcDemandedElts[i]) {
3159          if (SrcZero[i])
3160            KnownZero.setBits(i * Scale, (i + 1) * Scale);
3161          if (SrcUndef[i])
3162            KnownUndef.setBits(i * Scale, (i + 1) * Scale);
3163        }
3164      }
3165    }
3166
3167    // Bitcast from 'small element' src vector to 'large element' vector, we
3168    // demand all smaller source elements covered by the larger demanded element
3169    // of this vector.
3170    if ((NumSrcElts % NumElts) == 0) {
3171      unsigned Scale = NumSrcElts / NumElts;
3172      SrcDemandedElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
3173      if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
3174                                     TLO, Depth + 1))
3175        return true;
3176
3177      // If all the src elements covering an output element are zero/undef, then
3178      // the output element will be as well, assuming it was demanded.
3179      for (unsigned i = 0; i != NumElts; ++i) {
3180        if (DemandedElts[i]) {
3181          if (SrcZero.extractBits(Scale, i * Scale).isAllOnes())
3182            KnownZero.setBit(i);
3183          if (SrcUndef.extractBits(Scale, i * Scale).isAllOnes())
3184            KnownUndef.setBit(i);
3185        }
3186      }
3187    }
3188    break;
3189  }
3190  case ISD::BUILD_VECTOR: {
3191    // Check all elements and simplify any unused elements with UNDEF.
3192    if (!DemandedElts.isAllOnes()) {
3193      // Don't simplify BROADCASTS.
3194      if (llvm::any_of(Op->op_values(),
3195                       [&](SDValue Elt) { return Op.getOperand(0) != Elt; })) {
3196        SmallVector<SDValue, 32> Ops(Op->op_begin(), Op->op_end());
3197        bool Updated = false;
3198        for (unsigned i = 0; i != NumElts; ++i) {
3199          if (!DemandedElts[i] && !Ops[i].isUndef()) {
3200            Ops[i] = TLO.DAG.getUNDEF(Ops[0].getValueType());
3201            KnownUndef.setBit(i);
3202            Updated = true;
3203          }
3204        }
3205        if (Updated)
3206          return TLO.CombineTo(Op, TLO.DAG.getBuildVector(VT, DL, Ops));
3207      }
3208    }
3209    for (unsigned i = 0; i != NumElts; ++i) {
3210      SDValue SrcOp = Op.getOperand(i);
3211      if (SrcOp.isUndef()) {
3212        KnownUndef.setBit(i);
3213      } else if (EltSizeInBits == SrcOp.getScalarValueSizeInBits() &&
3214                 (isNullConstant(SrcOp) || isNullFPConstant(SrcOp))) {
3215        KnownZero.setBit(i);
3216      }
3217    }
3218    break;
3219  }
3220  case ISD::CONCAT_VECTORS: {
3221    EVT SubVT = Op.getOperand(0).getValueType();
3222    unsigned NumSubVecs = Op.getNumOperands();
3223    unsigned NumSubElts = SubVT.getVectorNumElements();
3224    for (unsigned i = 0; i != NumSubVecs; ++i) {
3225      SDValue SubOp = Op.getOperand(i);
3226      APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts);
3227      APInt SubUndef, SubZero;
3228      if (SimplifyDemandedVectorElts(SubOp, SubElts, SubUndef, SubZero, TLO,
3229                                     Depth + 1))
3230        return true;
3231      KnownUndef.insertBits(SubUndef, i * NumSubElts);
3232      KnownZero.insertBits(SubZero, i * NumSubElts);
3233    }
3234
3235    // Attempt to avoid multi-use ops if we don't need anything from them.
3236    if (!DemandedElts.isAllOnes()) {
3237      bool FoundNewSub = false;
3238      SmallVector<SDValue, 2> DemandedSubOps;
3239      for (unsigned i = 0; i != NumSubVecs; ++i) {
3240        SDValue SubOp = Op.getOperand(i);
3241        APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts);
3242        SDValue NewSubOp = SimplifyMultipleUseDemandedVectorElts(
3243            SubOp, SubElts, TLO.DAG, Depth + 1);
3244        DemandedSubOps.push_back(NewSubOp ? NewSubOp : SubOp);
3245        FoundNewSub = NewSubOp ? true : FoundNewSub;
3246      }
3247      if (FoundNewSub) {
3248        SDValue NewOp =
3249            TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, DemandedSubOps);
3250        return TLO.CombineTo(Op, NewOp);
3251      }
3252    }
3253    break;
3254  }
3255  case ISD::INSERT_SUBVECTOR: {
3256    // Demand any elements from the subvector and the remainder from the src its
3257    // inserted into.
3258    SDValue Src = Op.getOperand(0);
3259    SDValue Sub = Op.getOperand(1);
3260    uint64_t Idx = Op.getConstantOperandVal(2);
3261    unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
3262    APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
3263    APInt DemandedSrcElts = DemandedElts;
3264    DemandedSrcElts.insertBits(APInt::getZero(NumSubElts), Idx);
3265
3266    APInt SubUndef, SubZero;
3267    if (SimplifyDemandedVectorElts(Sub, DemandedSubElts, SubUndef, SubZero, TLO,
3268                                   Depth + 1))
3269      return true;
3270
3271    // If none of the src operand elements are demanded, replace it with undef.
3272    if (!DemandedSrcElts && !Src.isUndef())
3273      return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
3274                                               TLO.DAG.getUNDEF(VT), Sub,
3275                                               Op.getOperand(2)));
3276
3277    if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownUndef, KnownZero,
3278                                   TLO, Depth + 1))
3279      return true;
3280    KnownUndef.insertBits(SubUndef, Idx);
3281    KnownZero.insertBits(SubZero, Idx);
3282
3283    // Attempt to avoid multi-use ops if we don't need anything from them.
3284    if (!DemandedSrcElts.isAllOnes() || !DemandedSubElts.isAllOnes()) {
3285      SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
3286          Src, DemandedSrcElts, TLO.DAG, Depth + 1);
3287      SDValue NewSub = SimplifyMultipleUseDemandedVectorElts(
3288          Sub, DemandedSubElts, TLO.DAG, Depth + 1);
3289      if (NewSrc || NewSub) {
3290        NewSrc = NewSrc ? NewSrc : Src;
3291        NewSub = NewSub ? NewSub : Sub;
3292        SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewSrc,
3293                                        NewSub, Op.getOperand(2));
3294        return TLO.CombineTo(Op, NewOp);
3295      }
3296    }
3297    break;
3298  }
3299  case ISD::EXTRACT_SUBVECTOR: {
3300    // Offset the demanded elts by the subvector index.
3301    SDValue Src = Op.getOperand(0);
3302    if (Src.getValueType().isScalableVector())
3303      break;
3304    uint64_t Idx = Op.getConstantOperandVal(1);
3305    unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
3306    APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx);
3307
3308    APInt SrcUndef, SrcZero;
3309    if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
3310                                   Depth + 1))
3311      return true;
3312    KnownUndef = SrcUndef.extractBits(NumElts, Idx);
3313    KnownZero = SrcZero.extractBits(NumElts, Idx);
3314
3315    // Attempt to avoid multi-use ops if we don't need anything from them.
3316    if (!DemandedElts.isAllOnes()) {
3317      SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
3318          Src, DemandedSrcElts, TLO.DAG, Depth + 1);
3319      if (NewSrc) {
3320        SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewSrc,
3321                                        Op.getOperand(1));
3322        return TLO.CombineTo(Op, NewOp);
3323      }
3324    }
3325    break;
3326  }
3327  case ISD::INSERT_VECTOR_ELT: {
3328    SDValue Vec = Op.getOperand(0);
3329    SDValue Scl = Op.getOperand(1);
3330    auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
3331
3332    // For a legal, constant insertion index, if we don't need this insertion
3333    // then strip it, else remove it from the demanded elts.
3334    if (CIdx && CIdx->getAPIntValue().ult(NumElts)) {
3335      unsigned Idx = CIdx->getZExtValue();
3336      if (!DemandedElts[Idx])
3337        return TLO.CombineTo(Op, Vec);
3338
3339      APInt DemandedVecElts(DemandedElts);
3340      DemandedVecElts.clearBit(Idx);
3341      if (SimplifyDemandedVectorElts(Vec, DemandedVecElts, KnownUndef,
3342                                     KnownZero, TLO, Depth + 1))
3343        return true;
3344
3345      KnownUndef.setBitVal(Idx, Scl.isUndef());
3346
3347      KnownZero.setBitVal(Idx, isNullConstant(Scl) || isNullFPConstant(Scl));
3348      break;
3349    }
3350
3351    APInt VecUndef, VecZero;
3352    if (SimplifyDemandedVectorElts(Vec, DemandedElts, VecUndef, VecZero, TLO,
3353                                   Depth + 1))
3354      return true;
3355    // Without knowing the insertion index we can't set KnownUndef/KnownZero.
3356    break;
3357  }
3358  case ISD::VSELECT: {
3359    SDValue Sel = Op.getOperand(0);
3360    SDValue LHS = Op.getOperand(1);
3361    SDValue RHS = Op.getOperand(2);
3362
3363    // Try to transform the select condition based on the current demanded
3364    // elements.
3365    APInt UndefSel, ZeroSel;
3366    if (SimplifyDemandedVectorElts(Sel, DemandedElts, UndefSel, ZeroSel, TLO,
3367                                   Depth + 1))
3368      return true;
3369
3370    // See if we can simplify either vselect operand.
3371    APInt DemandedLHS(DemandedElts);
3372    APInt DemandedRHS(DemandedElts);
3373    APInt UndefLHS, ZeroLHS;
3374    APInt UndefRHS, ZeroRHS;
3375    if (SimplifyDemandedVectorElts(LHS, DemandedLHS, UndefLHS, ZeroLHS, TLO,
3376                                   Depth + 1))
3377      return true;
3378    if (SimplifyDemandedVectorElts(RHS, DemandedRHS, UndefRHS, ZeroRHS, TLO,
3379                                   Depth + 1))
3380      return true;
3381
3382    KnownUndef = UndefLHS & UndefRHS;
3383    KnownZero = ZeroLHS & ZeroRHS;
3384
3385    // If we know that the selected element is always zero, we don't need the
3386    // select value element.
3387    APInt DemandedSel = DemandedElts & ~KnownZero;
3388    if (DemandedSel != DemandedElts)
3389      if (SimplifyDemandedVectorElts(Sel, DemandedSel, UndefSel, ZeroSel, TLO,
3390                                     Depth + 1))
3391        return true;
3392
3393    break;
3394  }
3395  case ISD::VECTOR_SHUFFLE: {
3396    SDValue LHS = Op.getOperand(0);
3397    SDValue RHS = Op.getOperand(1);
3398    ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
3399
3400    // Collect demanded elements from shuffle operands..
3401    APInt DemandedLHS(NumElts, 0);
3402    APInt DemandedRHS(NumElts, 0);
3403    for (unsigned i = 0; i != NumElts; ++i) {
3404      int M = ShuffleMask[i];
3405      if (M < 0 || !DemandedElts[i])
3406        continue;
3407      assert(0 <= M && M < (int)(2 * NumElts) && "Shuffle index out of range");
3408      if (M < (int)NumElts)
3409        DemandedLHS.setBit(M);
3410      else
3411        DemandedRHS.setBit(M - NumElts);
3412    }
3413
3414    // See if we can simplify either shuffle operand.
3415    APInt UndefLHS, ZeroLHS;
3416    APInt UndefRHS, ZeroRHS;
3417    if (SimplifyDemandedVectorElts(LHS, DemandedLHS, UndefLHS, ZeroLHS, TLO,
3418                                   Depth + 1))
3419      return true;
3420    if (SimplifyDemandedVectorElts(RHS, DemandedRHS, UndefRHS, ZeroRHS, TLO,
3421                                   Depth + 1))
3422      return true;
3423
3424    // Simplify mask using undef elements from LHS/RHS.
3425    bool Updated = false;
3426    bool IdentityLHS = true, IdentityRHS = true;
3427    SmallVector<int, 32> NewMask(ShuffleMask);
3428    for (unsigned i = 0; i != NumElts; ++i) {
3429      int &M = NewMask[i];
3430      if (M < 0)
3431        continue;
3432      if (!DemandedElts[i] || (M < (int)NumElts && UndefLHS[M]) ||
3433          (M >= (int)NumElts && UndefRHS[M - NumElts])) {
3434        Updated = true;
3435        M = -1;
3436      }
3437      IdentityLHS &= (M < 0) || (M == (int)i);
3438      IdentityRHS &= (M < 0) || ((M - NumElts) == i);
3439    }
3440
3441    // Update legal shuffle masks based on demanded elements if it won't reduce
3442    // to Identity which can cause premature removal of the shuffle mask.
3443    if (Updated && !IdentityLHS && !IdentityRHS && !TLO.LegalOps) {
3444      SDValue LegalShuffle =
3445          buildLegalVectorShuffle(VT, DL, LHS, RHS, NewMask, TLO.DAG);
3446      if (LegalShuffle)
3447        return TLO.CombineTo(Op, LegalShuffle);
3448    }
3449
3450    // Propagate undef/zero elements from LHS/RHS.
3451    for (unsigned i = 0; i != NumElts; ++i) {
3452      int M = ShuffleMask[i];
3453      if (M < 0) {
3454        KnownUndef.setBit(i);
3455      } else if (M < (int)NumElts) {
3456        if (UndefLHS[M])
3457          KnownUndef.setBit(i);
3458        if (ZeroLHS[M])
3459          KnownZero.setBit(i);
3460      } else {
3461        if (UndefRHS[M - NumElts])
3462          KnownUndef.setBit(i);
3463        if (ZeroRHS[M - NumElts])
3464          KnownZero.setBit(i);
3465      }
3466    }
3467    break;
3468  }
3469  case ISD::ANY_EXTEND_VECTOR_INREG:
3470  case ISD::SIGN_EXTEND_VECTOR_INREG:
3471  case ISD::ZERO_EXTEND_VECTOR_INREG: {
3472    APInt SrcUndef, SrcZero;
3473    SDValue Src = Op.getOperand(0);
3474    unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
3475    APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts);
3476    if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
3477                                   Depth + 1))
3478      return true;
3479    KnownZero = SrcZero.zextOrTrunc(NumElts);
3480    KnownUndef = SrcUndef.zextOrTrunc(NumElts);
3481
3482    if (IsLE && Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG &&
3483        Op.getValueSizeInBits() == Src.getValueSizeInBits() &&
3484        DemandedSrcElts == 1) {
3485      // aext - if we just need the bottom element then we can bitcast.
3486      return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
3487    }
3488
3489    if (Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) {
3490      // zext(undef) upper bits are guaranteed to be zero.
3491      if (DemandedElts.isSubsetOf(KnownUndef))
3492        return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3493      KnownUndef.clearAllBits();
3494
3495      // zext - if we just need the bottom element then we can mask:
3496      // zext(and(x,c)) -> and(x,c') iff the zext is the only user of the and.
3497      if (IsLE && DemandedSrcElts == 1 && Src.getOpcode() == ISD::AND &&
3498          Op->isOnlyUserOf(Src.getNode()) &&
3499          Op.getValueSizeInBits() == Src.getValueSizeInBits()) {
3500        SDLoc DL(Op);
3501        EVT SrcVT = Src.getValueType();
3502        EVT SrcSVT = SrcVT.getScalarType();
3503        SmallVector<SDValue> MaskElts;
3504        MaskElts.push_back(TLO.DAG.getAllOnesConstant(DL, SrcSVT));
3505        MaskElts.append(NumSrcElts - 1, TLO.DAG.getConstant(0, DL, SrcSVT));
3506        SDValue Mask = TLO.DAG.getBuildVector(SrcVT, DL, MaskElts);
3507        if (SDValue Fold = TLO.DAG.FoldConstantArithmetic(
3508                ISD::AND, DL, SrcVT, {Src.getOperand(1), Mask})) {
3509          Fold = TLO.DAG.getNode(ISD::AND, DL, SrcVT, Src.getOperand(0), Fold);
3510          return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Fold));
3511        }
3512      }
3513    }
3514    break;
3515  }
3516
3517  // TODO: There are more binop opcodes that could be handled here - MIN,
3518  // MAX, saturated math, etc.
3519  case ISD::ADD: {
3520    SDValue Op0 = Op.getOperand(0);
3521    SDValue Op1 = Op.getOperand(1);
3522    if (Op0 == Op1 && Op->isOnlyUserOf(Op0.getNode())) {
3523      APInt UndefLHS, ZeroLHS;
3524      if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3525                                     Depth + 1, /*AssumeSingleUse*/ true))
3526        return true;
3527    }
3528    [[fallthrough]];
3529  }
3530  case ISD::OR:
3531  case ISD::XOR:
3532  case ISD::SUB:
3533  case ISD::FADD:
3534  case ISD::FSUB:
3535  case ISD::FMUL:
3536  case ISD::FDIV:
3537  case ISD::FREM: {
3538    SDValue Op0 = Op.getOperand(0);
3539    SDValue Op1 = Op.getOperand(1);
3540
3541    APInt UndefRHS, ZeroRHS;
3542    if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,
3543                                   Depth + 1))
3544      return true;
3545    APInt UndefLHS, ZeroLHS;
3546    if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3547                                   Depth + 1))
3548      return true;
3549
3550    KnownZero = ZeroLHS & ZeroRHS;
3551    KnownUndef = getKnownUndefForVectorBinop(Op, TLO.DAG, UndefLHS, UndefRHS);
3552
3553    // Attempt to avoid multi-use ops if we don't need anything from them.
3554    // TODO - use KnownUndef to relax the demandedelts?
3555    if (!DemandedElts.isAllOnes())
3556      if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3557        return true;
3558    break;
3559  }
3560  case ISD::SHL:
3561  case ISD::SRL:
3562  case ISD::SRA:
3563  case ISD::ROTL:
3564  case ISD::ROTR: {
3565    SDValue Op0 = Op.getOperand(0);
3566    SDValue Op1 = Op.getOperand(1);
3567
3568    APInt UndefRHS, ZeroRHS;
3569    if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,
3570                                   Depth + 1))
3571      return true;
3572    APInt UndefLHS, ZeroLHS;
3573    if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3574                                   Depth + 1))
3575      return true;
3576
3577    KnownZero = ZeroLHS;
3578    KnownUndef = UndefLHS & UndefRHS; // TODO: use getKnownUndefForVectorBinop?
3579
3580    // Attempt to avoid multi-use ops if we don't need anything from them.
3581    // TODO - use KnownUndef to relax the demandedelts?
3582    if (!DemandedElts.isAllOnes())
3583      if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3584        return true;
3585    break;
3586  }
3587  case ISD::MUL:
3588  case ISD::MULHU:
3589  case ISD::MULHS:
3590  case ISD::AND: {
3591    SDValue Op0 = Op.getOperand(0);
3592    SDValue Op1 = Op.getOperand(1);
3593
3594    APInt SrcUndef, SrcZero;
3595    if (SimplifyDemandedVectorElts(Op1, DemandedElts, SrcUndef, SrcZero, TLO,
3596                                   Depth + 1))
3597      return true;
3598    // If we know that a demanded element was zero in Op1 we don't need to
3599    // demand it in Op0 - its guaranteed to be zero.
3600    APInt DemandedElts0 = DemandedElts & ~SrcZero;
3601    if (SimplifyDemandedVectorElts(Op0, DemandedElts0, KnownUndef, KnownZero,
3602                                   TLO, Depth + 1))
3603      return true;
3604
3605    KnownUndef &= DemandedElts0;
3606    KnownZero &= DemandedElts0;
3607
3608    // If every element pair has a zero/undef then just fold to zero.
3609    // fold (and x, undef) -> 0  /  (and x, 0) -> 0
3610    // fold (mul x, undef) -> 0  /  (mul x, 0) -> 0
3611    if (DemandedElts.isSubsetOf(SrcZero | KnownZero | SrcUndef | KnownUndef))
3612      return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3613
3614    // If either side has a zero element, then the result element is zero, even
3615    // if the other is an UNDEF.
3616    // TODO: Extend getKnownUndefForVectorBinop to also deal with known zeros
3617    // and then handle 'and' nodes with the rest of the binop opcodes.
3618    KnownZero |= SrcZero;
3619    KnownUndef &= SrcUndef;
3620    KnownUndef &= ~KnownZero;
3621
3622    // Attempt to avoid multi-use ops if we don't need anything from them.
3623    if (!DemandedElts.isAllOnes())
3624      if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3625        return true;
3626    break;
3627  }
3628  case ISD::TRUNCATE:
3629  case ISD::SIGN_EXTEND:
3630  case ISD::ZERO_EXTEND:
3631    if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
3632                                   KnownZero, TLO, Depth + 1))
3633      return true;
3634
3635    if (Op.getOpcode() == ISD::ZERO_EXTEND) {
3636      // zext(undef) upper bits are guaranteed to be zero.
3637      if (DemandedElts.isSubsetOf(KnownUndef))
3638        return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3639      KnownUndef.clearAllBits();
3640    }
3641    break;
3642  default: {
3643    if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
3644      if (SimplifyDemandedVectorEltsForTargetNode(Op, DemandedElts, KnownUndef,
3645                                                  KnownZero, TLO, Depth))
3646        return true;
3647    } else {
3648      KnownBits Known;
3649      APInt DemandedBits = APInt::getAllOnes(EltSizeInBits);
3650      if (SimplifyDemandedBits(Op, DemandedBits, OriginalDemandedElts, Known,
3651                               TLO, Depth, AssumeSingleUse))
3652        return true;
3653    }
3654    break;
3655  }
3656  }
3657  assert((KnownUndef & KnownZero) == 0 && "Elements flagged as undef AND zero");
3658
3659  // Constant fold all undef cases.
3660  // TODO: Handle zero cases as well.
3661  if (DemandedElts.isSubsetOf(KnownUndef))
3662    return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
3663
3664  return false;
3665}
3666
3667/// Determine which of the bits specified in Mask are known to be either zero or
3668/// one and return them in the Known.
3669void TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
3670                                                   KnownBits &Known,
3671                                                   const APInt &DemandedElts,
3672                                                   const SelectionDAG &DAG,
3673                                                   unsigned Depth) const {
3674  assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3675          Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3676          Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3677          Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3678         "Should use MaskedValueIsZero if you don't know whether Op"
3679         " is a target node!");
3680  Known.resetAll();
3681}
3682
3683void TargetLowering::computeKnownBitsForTargetInstr(
3684    GISelKnownBits &Analysis, Register R, KnownBits &Known,
3685    const APInt &DemandedElts, const MachineRegisterInfo &MRI,
3686    unsigned Depth) const {
3687  Known.resetAll();
3688}
3689
3690void TargetLowering::computeKnownBitsForFrameIndex(
3691  const int FrameIdx, KnownBits &Known, const MachineFunction &MF) const {
3692  // The low bits are known zero if the pointer is aligned.
3693  Known.Zero.setLowBits(Log2(MF.getFrameInfo().getObjectAlign(FrameIdx)));
3694}
3695
3696Align TargetLowering::computeKnownAlignForTargetInstr(
3697  GISelKnownBits &Analysis, Register R, const MachineRegisterInfo &MRI,
3698  unsigned Depth) const {
3699  return Align(1);
3700}
3701
3702/// This method can be implemented by targets that want to expose additional
3703/// information about sign bits to the DAG Combiner.
3704unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
3705                                                         const APInt &,
3706                                                         const SelectionDAG &,
3707                                                         unsigned Depth) const {
3708  assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3709          Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3710          Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3711          Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3712         "Should use ComputeNumSignBits if you don't know whether Op"
3713         " is a target node!");
3714  return 1;
3715}
3716
3717unsigned TargetLowering::computeNumSignBitsForTargetInstr(
3718  GISelKnownBits &Analysis, Register R, const APInt &DemandedElts,
3719  const MachineRegisterInfo &MRI, unsigned Depth) const {
3720  return 1;
3721}
3722
3723bool TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
3724    SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero,
3725    TargetLoweringOpt &TLO, unsigned Depth) const {
3726  assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3727          Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3728          Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3729          Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3730         "Should use SimplifyDemandedVectorElts if you don't know whether Op"
3731         " is a target node!");
3732  return false;
3733}
3734
3735bool TargetLowering::SimplifyDemandedBitsForTargetNode(
3736    SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
3737    KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const {
3738  assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3739          Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3740          Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3741          Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3742         "Should use SimplifyDemandedBits if you don't know whether Op"
3743         " is a target node!");
3744  computeKnownBitsForTargetNode(Op, Known, DemandedElts, TLO.DAG, Depth);
3745  return false;
3746}
3747
3748SDValue TargetLowering::SimplifyMultipleUseDemandedBitsForTargetNode(
3749    SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
3750    SelectionDAG &DAG, unsigned Depth) const {
3751  assert(
3752      (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3753       Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3754       Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3755       Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3756      "Should use SimplifyMultipleUseDemandedBits if you don't know whether Op"
3757      " is a target node!");
3758  return SDValue();
3759}
3760
3761SDValue
3762TargetLowering::buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0,
3763                                        SDValue N1, MutableArrayRef<int> Mask,
3764                                        SelectionDAG &DAG) const {
3765  bool LegalMask = isShuffleMaskLegal(Mask, VT);
3766  if (!LegalMask) {
3767    std::swap(N0, N1);
3768    ShuffleVectorSDNode::commuteMask(Mask);
3769    LegalMask = isShuffleMaskLegal(Mask, VT);
3770  }
3771
3772  if (!LegalMask)
3773    return SDValue();
3774
3775  return DAG.getVectorShuffle(VT, DL, N0, N1, Mask);
3776}
3777
3778const Constant *TargetLowering::getTargetConstantFromLoad(LoadSDNode*) const {
3779  return nullptr;
3780}
3781
3782bool TargetLowering::isGuaranteedNotToBeUndefOrPoisonForTargetNode(
3783    SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
3784    bool PoisonOnly, unsigned Depth) const {
3785  assert(
3786      (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3787       Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3788       Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3789       Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3790      "Should use isGuaranteedNotToBeUndefOrPoison if you don't know whether Op"
3791      " is a target node!");
3792  return false;
3793}
3794
3795bool TargetLowering::canCreateUndefOrPoisonForTargetNode(
3796    SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
3797    bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const {
3798  assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3799          Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3800          Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3801          Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3802         "Should use canCreateUndefOrPoison if you don't know whether Op"
3803         " is a target node!");
3804  // Be conservative and return true.
3805  return true;
3806}
3807
3808bool TargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
3809                                                  const SelectionDAG &DAG,
3810                                                  bool SNaN,
3811                                                  unsigned Depth) const {
3812  assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3813          Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3814          Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3815          Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3816         "Should use isKnownNeverNaN if you don't know whether Op"
3817         " is a target node!");
3818  return false;
3819}
3820
3821bool TargetLowering::isSplatValueForTargetNode(SDValue Op,
3822                                               const APInt &DemandedElts,
3823                                               APInt &UndefElts,
3824                                               const SelectionDAG &DAG,
3825                                               unsigned Depth) const {
3826  assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3827          Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3828          Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3829          Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3830         "Should use isSplatValue if you don't know whether Op"
3831         " is a target node!");
3832  return false;
3833}
3834
3835// FIXME: Ideally, this would use ISD::isConstantSplatVector(), but that must
3836// work with truncating build vectors and vectors with elements of less than
3837// 8 bits.
3838bool TargetLowering::isConstTrueVal(SDValue N) const {
3839  if (!N)
3840    return false;
3841
3842  unsigned EltWidth;
3843  APInt CVal;
3844  if (ConstantSDNode *CN = isConstOrConstSplat(N, /*AllowUndefs=*/false,
3845                                               /*AllowTruncation=*/true)) {
3846    CVal = CN->getAPIntValue();
3847    EltWidth = N.getValueType().getScalarSizeInBits();
3848  } else
3849    return false;
3850
3851  // If this is a truncating splat, truncate the splat value.
3852  // Otherwise, we may fail to match the expected values below.
3853  if (EltWidth < CVal.getBitWidth())
3854    CVal = CVal.trunc(EltWidth);
3855
3856  switch (getBooleanContents(N.getValueType())) {
3857  case UndefinedBooleanContent:
3858    return CVal[0];
3859  case ZeroOrOneBooleanContent:
3860    return CVal.isOne();
3861  case ZeroOrNegativeOneBooleanContent:
3862    return CVal.isAllOnes();
3863  }
3864
3865  llvm_unreachable("Invalid boolean contents");
3866}
3867
3868bool TargetLowering::isConstFalseVal(SDValue N) const {
3869  if (!N)
3870    return false;
3871
3872  const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
3873  if (!CN) {
3874    const BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N);
3875    if (!BV)
3876      return false;
3877
3878    // Only interested in constant splats, we don't care about undef
3879    // elements in identifying boolean constants and getConstantSplatNode
3880    // returns NULL if all ops are undef;
3881    CN = BV->getConstantSplatNode();
3882    if (!CN)
3883      return false;
3884  }
3885
3886  if (getBooleanContents(N->getValueType(0)) == UndefinedBooleanContent)
3887    return !CN->getAPIntValue()[0];
3888
3889  return CN->isZero();
3890}
3891
3892bool TargetLowering::isExtendedTrueVal(const ConstantSDNode *N, EVT VT,
3893                                       bool SExt) const {
3894  if (VT == MVT::i1)
3895    return N->isOne();
3896
3897  TargetLowering::BooleanContent Cnt = getBooleanContents(VT);
3898  switch (Cnt) {
3899  case TargetLowering::ZeroOrOneBooleanContent:
3900    // An extended value of 1 is always true, unless its original type is i1,
3901    // in which case it will be sign extended to -1.
3902    return (N->isOne() && !SExt) || (SExt && (N->getValueType(0) != MVT::i1));
3903  case TargetLowering::UndefinedBooleanContent:
3904  case TargetLowering::ZeroOrNegativeOneBooleanContent:
3905    return N->isAllOnes() && SExt;
3906  }
3907  llvm_unreachable("Unexpected enumeration.");
3908}
3909
3910/// This helper function of SimplifySetCC tries to optimize the comparison when
3911/// either operand of the SetCC node is a bitwise-and instruction.
3912SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
3913                                         ISD::CondCode Cond, const SDLoc &DL,
3914                                         DAGCombinerInfo &DCI) const {
3915  if (N1.getOpcode() == ISD::AND && N0.getOpcode() != ISD::AND)
3916    std::swap(N0, N1);
3917
3918  SelectionDAG &DAG = DCI.DAG;
3919  EVT OpVT = N0.getValueType();
3920  if (N0.getOpcode() != ISD::AND || !OpVT.isInteger() ||
3921      (Cond != ISD::SETEQ && Cond != ISD::SETNE))
3922    return SDValue();
3923
3924  // (X & Y) != 0 --> zextOrTrunc(X & Y)
3925  // iff everything but LSB is known zero:
3926  if (Cond == ISD::SETNE && isNullConstant(N1) &&
3927      (getBooleanContents(OpVT) == TargetLowering::UndefinedBooleanContent ||
3928       getBooleanContents(OpVT) == TargetLowering::ZeroOrOneBooleanContent)) {
3929    unsigned NumEltBits = OpVT.getScalarSizeInBits();
3930    APInt UpperBits = APInt::getHighBitsSet(NumEltBits, NumEltBits - 1);
3931    if (DAG.MaskedValueIsZero(N0, UpperBits))
3932      return DAG.getBoolExtOrTrunc(N0, DL, VT, OpVT);
3933  }
3934
3935  // Try to eliminate a power-of-2 mask constant by converting to a signbit
3936  // test in a narrow type that we can truncate to with no cost. Examples:
3937  // (i32 X & 32768) == 0 --> (trunc X to i16) >= 0
3938  // (i32 X & 32768) != 0 --> (trunc X to i16) < 0
3939  // TODO: This conservatively checks for type legality on the source and
3940  //       destination types. That may inhibit optimizations, but it also
3941  //       allows setcc->shift transforms that may be more beneficial.
3942  auto *AndC = dyn_cast<ConstantSDNode>(N0.getOperand(1));
3943  if (AndC && isNullConstant(N1) && AndC->getAPIntValue().isPowerOf2() &&
3944      isTypeLegal(OpVT) && N0.hasOneUse()) {
3945    EVT NarrowVT = EVT::getIntegerVT(*DAG.getContext(),
3946                                     AndC->getAPIntValue().getActiveBits());
3947    if (isTruncateFree(OpVT, NarrowVT) && isTypeLegal(NarrowVT)) {
3948      SDValue Trunc = DAG.getZExtOrTrunc(N0.getOperand(0), DL, NarrowVT);
3949      SDValue Zero = DAG.getConstant(0, DL, NarrowVT);
3950      return DAG.getSetCC(DL, VT, Trunc, Zero,
3951                          Cond == ISD::SETEQ ? ISD::SETGE : ISD::SETLT);
3952    }
3953  }
3954
3955  // Match these patterns in any of their permutations:
3956  // (X & Y) == Y
3957  // (X & Y) != Y
3958  SDValue X, Y;
3959  if (N0.getOperand(0) == N1) {
3960    X = N0.getOperand(1);
3961    Y = N0.getOperand(0);
3962  } else if (N0.getOperand(1) == N1) {
3963    X = N0.getOperand(0);
3964    Y = N0.getOperand(1);
3965  } else {
3966    return SDValue();
3967  }
3968
3969  // TODO: We should invert (X & Y) eq/ne 0 -> (X & Y) ne/eq Y if
3970  // `isXAndYEqZeroPreferableToXAndYEqY` is false. This is a bit difficult as
3971  // its liable to create and infinite loop.
3972  SDValue Zero = DAG.getConstant(0, DL, OpVT);
3973  if (isXAndYEqZeroPreferableToXAndYEqY(Cond, OpVT) &&
3974      DAG.isKnownToBeAPowerOfTwo(Y)) {
3975    // Simplify X & Y == Y to X & Y != 0 if Y has exactly one bit set.
3976    // Note that where Y is variable and is known to have at most one bit set
3977    // (for example, if it is Z & 1) we cannot do this; the expressions are not
3978    // equivalent when Y == 0.
3979    assert(OpVT.isInteger());
3980    Cond = ISD::getSetCCInverse(Cond, OpVT);
3981    if (DCI.isBeforeLegalizeOps() ||
3982        isCondCodeLegal(Cond, N0.getSimpleValueType()))
3983      return DAG.getSetCC(DL, VT, N0, Zero, Cond);
3984  } else if (N0.hasOneUse() && hasAndNotCompare(Y)) {
3985    // If the target supports an 'and-not' or 'and-complement' logic operation,
3986    // try to use that to make a comparison operation more efficient.
3987    // But don't do this transform if the mask is a single bit because there are
3988    // more efficient ways to deal with that case (for example, 'bt' on x86 or
3989    // 'rlwinm' on PPC).
3990
3991    // Bail out if the compare operand that we want to turn into a zero is
3992    // already a zero (otherwise, infinite loop).
3993    if (isNullConstant(Y))
3994      return SDValue();
3995
3996    // Transform this into: ~X & Y == 0.
3997    SDValue NotX = DAG.getNOT(SDLoc(X), X, OpVT);
3998    SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, NotX, Y);
3999    return DAG.getSetCC(DL, VT, NewAnd, Zero, Cond);
4000  }
4001
4002  return SDValue();
4003}
4004
4005/// There are multiple IR patterns that could be checking whether certain
4006/// truncation of a signed number would be lossy or not. The pattern which is
4007/// best at IR level, may not lower optimally. Thus, we want to unfold it.
4008/// We are looking for the following pattern: (KeptBits is a constant)
4009///   (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits)
4010/// KeptBits won't be bitwidth(x), that will be constant-folded to true/false.
4011/// KeptBits also can't be 1, that would have been folded to  %x dstcond 0
4012/// We will unfold it into the natural trunc+sext pattern:
4013///   ((%x << C) a>> C) dstcond %x
4014/// Where  C = bitwidth(x) - KeptBits  and  C u< bitwidth(x)
4015SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck(
4016    EVT SCCVT, SDValue N0, SDValue N1, ISD::CondCode Cond, DAGCombinerInfo &DCI,
4017    const SDLoc &DL) const {
4018  // We must be comparing with a constant.
4019  ConstantSDNode *C1;
4020  if (!(C1 = dyn_cast<ConstantSDNode>(N1)))
4021    return SDValue();
4022
4023  // N0 should be:  add %x, (1 << (KeptBits-1))
4024  if (N0->getOpcode() != ISD::ADD)
4025    return SDValue();
4026
4027  // And we must be 'add'ing a constant.
4028  ConstantSDNode *C01;
4029  if (!(C01 = dyn_cast<ConstantSDNode>(N0->getOperand(1))))
4030    return SDValue();
4031
4032  SDValue X = N0->getOperand(0);
4033  EVT XVT = X.getValueType();
4034
4035  // Validate constants ...
4036
4037  APInt I1 = C1->getAPIntValue();
4038
4039  ISD::CondCode NewCond;
4040  if (Cond == ISD::CondCode::SETULT) {
4041    NewCond = ISD::CondCode::SETEQ;
4042  } else if (Cond == ISD::CondCode::SETULE) {
4043    NewCond = ISD::CondCode::SETEQ;
4044    // But need to 'canonicalize' the constant.
4045    I1 += 1;
4046  } else if (Cond == ISD::CondCode::SETUGT) {
4047    NewCond = ISD::CondCode::SETNE;
4048    // But need to 'canonicalize' the constant.
4049    I1 += 1;
4050  } else if (Cond == ISD::CondCode::SETUGE) {
4051    NewCond = ISD::CondCode::SETNE;
4052  } else
4053    return SDValue();
4054
4055  APInt I01 = C01->getAPIntValue();
4056
4057  auto checkConstants = [&I1, &I01]() -> bool {
4058    // Both of them must be power-of-two, and the constant from setcc is bigger.
4059    return I1.ugt(I01) && I1.isPowerOf2() && I01.isPowerOf2();
4060  };
4061
4062  if (checkConstants()) {
4063    // Great, e.g. got  icmp ult i16 (add i16 %x, 128), 256
4064  } else {
4065    // What if we invert constants? (and the target predicate)
4066    I1.negate();
4067    I01.negate();
4068    assert(XVT.isInteger());
4069    NewCond = getSetCCInverse(NewCond, XVT);
4070    if (!checkConstants())
4071      return SDValue();
4072    // Great, e.g. got  icmp uge i16 (add i16 %x, -128), -256
4073  }
4074
4075  // They are power-of-two, so which bit is set?
4076  const unsigned KeptBits = I1.logBase2();
4077  const unsigned KeptBitsMinusOne = I01.logBase2();
4078
4079  // Magic!
4080  if (KeptBits != (KeptBitsMinusOne + 1))
4081    return SDValue();
4082  assert(KeptBits > 0 && KeptBits < XVT.getSizeInBits() && "unreachable");
4083
4084  // We don't want to do this in every single case.
4085  SelectionDAG &DAG = DCI.DAG;
4086  if (!DAG.getTargetLoweringInfo().shouldTransformSignedTruncationCheck(
4087          XVT, KeptBits))
4088    return SDValue();
4089
4090  const unsigned MaskedBits = XVT.getSizeInBits() - KeptBits;
4091  assert(MaskedBits > 0 && MaskedBits < XVT.getSizeInBits() && "unreachable");
4092
4093  // Unfold into:  ((%x << C) a>> C) cond %x
4094  // Where 'cond' will be either 'eq' or 'ne'.
4095  SDValue ShiftAmt = DAG.getConstant(MaskedBits, DL, XVT);
4096  SDValue T0 = DAG.getNode(ISD::SHL, DL, XVT, X, ShiftAmt);
4097  SDValue T1 = DAG.getNode(ISD::SRA, DL, XVT, T0, ShiftAmt);
4098  SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, X, NewCond);
4099
4100  return T2;
4101}
4102
4103// (X & (C l>>/<< Y)) ==/!= 0  -->  ((X <</l>> Y) & C) ==/!= 0
4104SDValue TargetLowering::optimizeSetCCByHoistingAndByConstFromLogicalShift(
4105    EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond,
4106    DAGCombinerInfo &DCI, const SDLoc &DL) const {
4107  assert(isConstOrConstSplat(N1C) && isConstOrConstSplat(N1C)->isZero() &&
4108         "Should be a comparison with 0.");
4109  assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4110         "Valid only for [in]equality comparisons.");
4111
4112  unsigned NewShiftOpcode;
4113  SDValue X, C, Y;
4114
4115  SelectionDAG &DAG = DCI.DAG;
4116  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4117
4118  // Look for '(C l>>/<< Y)'.
4119  auto Match = [&NewShiftOpcode, &X, &C, &Y, &TLI, &DAG](SDValue V) {
4120    // The shift should be one-use.
4121    if (!V.hasOneUse())
4122      return false;
4123    unsigned OldShiftOpcode = V.getOpcode();
4124    switch (OldShiftOpcode) {
4125    case ISD::SHL:
4126      NewShiftOpcode = ISD::SRL;
4127      break;
4128    case ISD::SRL:
4129      NewShiftOpcode = ISD::SHL;
4130      break;
4131    default:
4132      return false; // must be a logical shift.
4133    }
4134    // We should be shifting a constant.
4135    // FIXME: best to use isConstantOrConstantVector().
4136    C = V.getOperand(0);
4137    ConstantSDNode *CC =
4138        isConstOrConstSplat(C, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
4139    if (!CC)
4140      return false;
4141    Y = V.getOperand(1);
4142
4143    ConstantSDNode *XC =
4144        isConstOrConstSplat(X, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
4145    return TLI.shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
4146        X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG);
4147  };
4148
4149  // LHS of comparison should be an one-use 'and'.
4150  if (N0.getOpcode() != ISD::AND || !N0.hasOneUse())
4151    return SDValue();
4152
4153  X = N0.getOperand(0);
4154  SDValue Mask = N0.getOperand(1);
4155
4156  // 'and' is commutative!
4157  if (!Match(Mask)) {
4158    std::swap(X, Mask);
4159    if (!Match(Mask))
4160      return SDValue();
4161  }
4162
4163  EVT VT = X.getValueType();
4164
4165  // Produce:
4166  // ((X 'OppositeShiftOpcode' Y) & C) Cond 0
4167  SDValue T0 = DAG.getNode(NewShiftOpcode, DL, VT, X, Y);
4168  SDValue T1 = DAG.getNode(ISD::AND, DL, VT, T0, C);
4169  SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, N1C, Cond);
4170  return T2;
4171}
4172
4173/// Try to fold an equality comparison with a {add/sub/xor} binary operation as
4174/// the 1st operand (N0). Callers are expected to swap the N0/N1 parameters to
4175/// handle the commuted versions of these patterns.
4176SDValue TargetLowering::foldSetCCWithBinOp(EVT VT, SDValue N0, SDValue N1,
4177                                           ISD::CondCode Cond, const SDLoc &DL,
4178                                           DAGCombinerInfo &DCI) const {
4179  unsigned BOpcode = N0.getOpcode();
4180  assert((BOpcode == ISD::ADD || BOpcode == ISD::SUB || BOpcode == ISD::XOR) &&
4181         "Unexpected binop");
4182  assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) && "Unexpected condcode");
4183
4184  // (X + Y) == X --> Y == 0
4185  // (X - Y) == X --> Y == 0
4186  // (X ^ Y) == X --> Y == 0
4187  SelectionDAG &DAG = DCI.DAG;
4188  EVT OpVT = N0.getValueType();
4189  SDValue X = N0.getOperand(0);
4190  SDValue Y = N0.getOperand(1);
4191  if (X == N1)
4192    return DAG.getSetCC(DL, VT, Y, DAG.getConstant(0, DL, OpVT), Cond);
4193
4194  if (Y != N1)
4195    return SDValue();
4196
4197  // (X + Y) == Y --> X == 0
4198  // (X ^ Y) == Y --> X == 0
4199  if (BOpcode == ISD::ADD || BOpcode == ISD::XOR)
4200    return DAG.getSetCC(DL, VT, X, DAG.getConstant(0, DL, OpVT), Cond);
4201
4202  // The shift would not be valid if the operands are boolean (i1).
4203  if (!N0.hasOneUse() || OpVT.getScalarSizeInBits() == 1)
4204    return SDValue();
4205
4206  // (X - Y) == Y --> X == Y << 1
4207  EVT ShiftVT = getShiftAmountTy(OpVT, DAG.getDataLayout(),
4208                                 !DCI.isBeforeLegalize());
4209  SDValue One = DAG.getConstant(1, DL, ShiftVT);
4210  SDValue YShl1 = DAG.getNode(ISD::SHL, DL, N1.getValueType(), Y, One);
4211  if (!DCI.isCalledByLegalizer())
4212    DCI.AddToWorklist(YShl1.getNode());
4213  return DAG.getSetCC(DL, VT, X, YShl1, Cond);
4214}
4215
4216static SDValue simplifySetCCWithCTPOP(const TargetLowering &TLI, EVT VT,
4217                                      SDValue N0, const APInt &C1,
4218                                      ISD::CondCode Cond, const SDLoc &dl,
4219                                      SelectionDAG &DAG) {
4220  // Look through truncs that don't change the value of a ctpop.
4221  // FIXME: Add vector support? Need to be careful with setcc result type below.
4222  SDValue CTPOP = N0;
4223  if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() && !VT.isVector() &&
4224      N0.getScalarValueSizeInBits() > Log2_32(N0.getOperand(0).getScalarValueSizeInBits()))
4225    CTPOP = N0.getOperand(0);
4226
4227  if (CTPOP.getOpcode() != ISD::CTPOP || !CTPOP.hasOneUse())
4228    return SDValue();
4229
4230  EVT CTVT = CTPOP.getValueType();
4231  SDValue CTOp = CTPOP.getOperand(0);
4232
4233  // Expand a power-of-2-or-zero comparison based on ctpop:
4234  // (ctpop x) u< 2 -> (x & x-1) == 0
4235  // (ctpop x) u> 1 -> (x & x-1) != 0
4236  if (Cond == ISD::SETULT || Cond == ISD::SETUGT) {
4237    // Keep the CTPOP if it is a cheap vector op.
4238    if (CTVT.isVector() && TLI.isCtpopFast(CTVT))
4239      return SDValue();
4240
4241    unsigned CostLimit = TLI.getCustomCtpopCost(CTVT, Cond);
4242    if (C1.ugt(CostLimit + (Cond == ISD::SETULT)))
4243      return SDValue();
4244    if (C1 == 0 && (Cond == ISD::SETULT))
4245      return SDValue(); // This is handled elsewhere.
4246
4247    unsigned Passes = C1.getLimitedValue() - (Cond == ISD::SETULT);
4248
4249    SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
4250    SDValue Result = CTOp;
4251    for (unsigned i = 0; i < Passes; i++) {
4252      SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, Result, NegOne);
4253      Result = DAG.getNode(ISD::AND, dl, CTVT, Result, Add);
4254    }
4255    ISD::CondCode CC = Cond == ISD::SETULT ? ISD::SETEQ : ISD::SETNE;
4256    return DAG.getSetCC(dl, VT, Result, DAG.getConstant(0, dl, CTVT), CC);
4257  }
4258
4259  // Expand a power-of-2 comparison based on ctpop
4260  if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && C1 == 1) {
4261    // Keep the CTPOP if it is cheap.
4262    if (TLI.isCtpopFast(CTVT))
4263      return SDValue();
4264
4265    SDValue Zero = DAG.getConstant(0, dl, CTVT);
4266    SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
4267    assert(CTVT.isInteger());
4268    SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, CTOp, NegOne);
4269
4270    // Its not uncommon for known-never-zero X to exist in (ctpop X) eq/ne 1, so
4271    // check before emitting a potentially unnecessary op.
4272    if (DAG.isKnownNeverZero(CTOp)) {
4273      // (ctpop x) == 1 --> (x & x-1) == 0
4274      // (ctpop x) != 1 --> (x & x-1) != 0
4275      SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add);
4276      SDValue RHS = DAG.getSetCC(dl, VT, And, Zero, Cond);
4277      return RHS;
4278    }
4279
4280    // (ctpop x) == 1 --> (x ^ x-1) >  x-1
4281    // (ctpop x) != 1 --> (x ^ x-1) <= x-1
4282    SDValue Xor = DAG.getNode(ISD::XOR, dl, CTVT, CTOp, Add);
4283    ISD::CondCode CmpCond = Cond == ISD::SETEQ ? ISD::SETUGT : ISD::SETULE;
4284    return DAG.getSetCC(dl, VT, Xor, Add, CmpCond);
4285  }
4286
4287  return SDValue();
4288}
4289
4290static SDValue foldSetCCWithRotate(EVT VT, SDValue N0, SDValue N1,
4291                                   ISD::CondCode Cond, const SDLoc &dl,
4292                                   SelectionDAG &DAG) {
4293  if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
4294    return SDValue();
4295
4296  auto *C1 = isConstOrConstSplat(N1, /* AllowUndefs */ true);
4297  if (!C1 || !(C1->isZero() || C1->isAllOnes()))
4298    return SDValue();
4299
4300  auto getRotateSource = [](SDValue X) {
4301    if (X.getOpcode() == ISD::ROTL || X.getOpcode() == ISD::ROTR)
4302      return X.getOperand(0);
4303    return SDValue();
4304  };
4305
4306  // Peek through a rotated value compared against 0 or -1:
4307  // (rot X, Y) == 0/-1 --> X == 0/-1
4308  // (rot X, Y) != 0/-1 --> X != 0/-1
4309  if (SDValue R = getRotateSource(N0))
4310    return DAG.getSetCC(dl, VT, R, N1, Cond);
4311
4312  // Peek through an 'or' of a rotated value compared against 0:
4313  // or (rot X, Y), Z ==/!= 0 --> (or X, Z) ==/!= 0
4314  // or Z, (rot X, Y) ==/!= 0 --> (or X, Z) ==/!= 0
4315  //
4316  // TODO: Add the 'and' with -1 sibling.
4317  // TODO: Recurse through a series of 'or' ops to find the rotate.
4318  EVT OpVT = N0.getValueType();
4319  if (N0.hasOneUse() && N0.getOpcode() == ISD::OR && C1->isZero()) {
4320    if (SDValue R = getRotateSource(N0.getOperand(0))) {
4321      SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, R, N0.getOperand(1));
4322      return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4323    }
4324    if (SDValue R = getRotateSource(N0.getOperand(1))) {
4325      SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, R, N0.getOperand(0));
4326      return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4327    }
4328  }
4329
4330  return SDValue();
4331}
4332
4333static SDValue foldSetCCWithFunnelShift(EVT VT, SDValue N0, SDValue N1,
4334                                        ISD::CondCode Cond, const SDLoc &dl,
4335                                        SelectionDAG &DAG) {
4336  // If we are testing for all-bits-clear, we might be able to do that with
4337  // less shifting since bit-order does not matter.
4338  if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
4339    return SDValue();
4340
4341  auto *C1 = isConstOrConstSplat(N1, /* AllowUndefs */ true);
4342  if (!C1 || !C1->isZero())
4343    return SDValue();
4344
4345  if (!N0.hasOneUse() ||
4346      (N0.getOpcode() != ISD::FSHL && N0.getOpcode() != ISD::FSHR))
4347    return SDValue();
4348
4349  unsigned BitWidth = N0.getScalarValueSizeInBits();
4350  auto *ShAmtC = isConstOrConstSplat(N0.getOperand(2));
4351  if (!ShAmtC || ShAmtC->getAPIntValue().uge(BitWidth))
4352    return SDValue();
4353
4354  // Canonicalize fshr as fshl to reduce pattern-matching.
4355  unsigned ShAmt = ShAmtC->getZExtValue();
4356  if (N0.getOpcode() == ISD::FSHR)
4357    ShAmt = BitWidth - ShAmt;
4358
4359  // Match an 'or' with a specific operand 'Other' in either commuted variant.
4360  SDValue X, Y;
4361  auto matchOr = [&X, &Y](SDValue Or, SDValue Other) {
4362    if (Or.getOpcode() != ISD::OR || !Or.hasOneUse())
4363      return false;
4364    if (Or.getOperand(0) == Other) {
4365      X = Or.getOperand(0);
4366      Y = Or.getOperand(1);
4367      return true;
4368    }
4369    if (Or.getOperand(1) == Other) {
4370      X = Or.getOperand(1);
4371      Y = Or.getOperand(0);
4372      return true;
4373    }
4374    return false;
4375  };
4376
4377  EVT OpVT = N0.getValueType();
4378  EVT ShAmtVT = N0.getOperand(2).getValueType();
4379  SDValue F0 = N0.getOperand(0);
4380  SDValue F1 = N0.getOperand(1);
4381  if (matchOr(F0, F1)) {
4382    // fshl (or X, Y), X, C ==/!= 0 --> or (shl Y, C), X ==/!= 0
4383    SDValue NewShAmt = DAG.getConstant(ShAmt, dl, ShAmtVT);
4384    SDValue Shift = DAG.getNode(ISD::SHL, dl, OpVT, Y, NewShAmt);
4385    SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, Shift, X);
4386    return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4387  }
4388  if (matchOr(F1, F0)) {
4389    // fshl X, (or X, Y), C ==/!= 0 --> or (srl Y, BW-C), X ==/!= 0
4390    SDValue NewShAmt = DAG.getConstant(BitWidth - ShAmt, dl, ShAmtVT);
4391    SDValue Shift = DAG.getNode(ISD::SRL, dl, OpVT, Y, NewShAmt);
4392    SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, Shift, X);
4393    return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4394  }
4395
4396  return SDValue();
4397}
4398
4399/// Try to simplify a setcc built with the specified operands and cc. If it is
4400/// unable to simplify it, return a null SDValue.
4401SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
4402                                      ISD::CondCode Cond, bool foldBooleans,
4403                                      DAGCombinerInfo &DCI,
4404                                      const SDLoc &dl) const {
4405  SelectionDAG &DAG = DCI.DAG;
4406  const DataLayout &Layout = DAG.getDataLayout();
4407  EVT OpVT = N0.getValueType();
4408  AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4409
4410  // Constant fold or commute setcc.
4411  if (SDValue Fold = DAG.FoldSetCC(VT, N0, N1, Cond, dl))
4412    return Fold;
4413
4414  bool N0ConstOrSplat =
4415      isConstOrConstSplat(N0, /*AllowUndefs*/ false, /*AllowTruncate*/ true);
4416  bool N1ConstOrSplat =
4417      isConstOrConstSplat(N1, /*AllowUndefs*/ false, /*AllowTruncate*/ true);
4418
4419  // Canonicalize toward having the constant on the RHS.
4420  // TODO: Handle non-splat vector constants. All undef causes trouble.
4421  // FIXME: We can't yet fold constant scalable vector splats, so avoid an
4422  // infinite loop here when we encounter one.
4423  ISD::CondCode SwappedCC = ISD::getSetCCSwappedOperands(Cond);
4424  if (N0ConstOrSplat && !N1ConstOrSplat &&
4425      (DCI.isBeforeLegalizeOps() ||
4426       isCondCodeLegal(SwappedCC, N0.getSimpleValueType())))
4427    return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
4428
4429  // If we have a subtract with the same 2 non-constant operands as this setcc
4430  // -- but in reverse order -- then try to commute the operands of this setcc
4431  // to match. A matching pair of setcc (cmp) and sub may be combined into 1
4432  // instruction on some targets.
4433  if (!N0ConstOrSplat && !N1ConstOrSplat &&
4434      (DCI.isBeforeLegalizeOps() ||
4435       isCondCodeLegal(SwappedCC, N0.getSimpleValueType())) &&
4436      DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N1, N0}) &&
4437      !DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N0, N1}))
4438    return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
4439
4440  if (SDValue V = foldSetCCWithRotate(VT, N0, N1, Cond, dl, DAG))
4441    return V;
4442
4443  if (SDValue V = foldSetCCWithFunnelShift(VT, N0, N1, Cond, dl, DAG))
4444    return V;
4445
4446  if (auto *N1C = isConstOrConstSplat(N1)) {
4447    const APInt &C1 = N1C->getAPIntValue();
4448
4449    // Optimize some CTPOP cases.
4450    if (SDValue V = simplifySetCCWithCTPOP(*this, VT, N0, C1, Cond, dl, DAG))
4451      return V;
4452
4453    // For equality to 0 of a no-wrap multiply, decompose and test each op:
4454    // X * Y == 0 --> (X == 0) || (Y == 0)
4455    // X * Y != 0 --> (X != 0) && (Y != 0)
4456    // TODO: This bails out if minsize is set, but if the target doesn't have a
4457    //       single instruction multiply for this type, it would likely be
4458    //       smaller to decompose.
4459    if (C1.isZero() && (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4460        N0.getOpcode() == ISD::MUL && N0.hasOneUse() &&
4461        (N0->getFlags().hasNoUnsignedWrap() ||
4462         N0->getFlags().hasNoSignedWrap()) &&
4463        !Attr.hasFnAttr(Attribute::MinSize)) {
4464      SDValue IsXZero = DAG.getSetCC(dl, VT, N0.getOperand(0), N1, Cond);
4465      SDValue IsYZero = DAG.getSetCC(dl, VT, N0.getOperand(1), N1, Cond);
4466      unsigned LogicOp = Cond == ISD::SETEQ ? ISD::OR : ISD::AND;
4467      return DAG.getNode(LogicOp, dl, VT, IsXZero, IsYZero);
4468    }
4469
4470    // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an
4471    // equality comparison, then we're just comparing whether X itself is
4472    // zero.
4473    if (N0.getOpcode() == ISD::SRL && (C1.isZero() || C1.isOne()) &&
4474        N0.getOperand(0).getOpcode() == ISD::CTLZ &&
4475        llvm::has_single_bit<uint32_t>(N0.getScalarValueSizeInBits())) {
4476      if (ConstantSDNode *ShAmt = isConstOrConstSplat(N0.getOperand(1))) {
4477        if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4478            ShAmt->getAPIntValue() == Log2_32(N0.getScalarValueSizeInBits())) {
4479          if ((C1 == 0) == (Cond == ISD::SETEQ)) {
4480            // (srl (ctlz x), 5) == 0  -> X != 0
4481            // (srl (ctlz x), 5) != 1  -> X != 0
4482            Cond = ISD::SETNE;
4483          } else {
4484            // (srl (ctlz x), 5) != 0  -> X == 0
4485            // (srl (ctlz x), 5) == 1  -> X == 0
4486            Cond = ISD::SETEQ;
4487          }
4488          SDValue Zero = DAG.getConstant(0, dl, N0.getValueType());
4489          return DAG.getSetCC(dl, VT, N0.getOperand(0).getOperand(0), Zero,
4490                              Cond);
4491        }
4492      }
4493    }
4494  }
4495
4496  // FIXME: Support vectors.
4497  if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
4498    const APInt &C1 = N1C->getAPIntValue();
4499
4500    // (zext x) == C --> x == (trunc C)
4501    // (sext x) == C --> x == (trunc C)
4502    if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4503        DCI.isBeforeLegalize() && N0->hasOneUse()) {
4504      unsigned MinBits = N0.getValueSizeInBits();
4505      SDValue PreExt;
4506      bool Signed = false;
4507      if (N0->getOpcode() == ISD::ZERO_EXTEND) {
4508        // ZExt
4509        MinBits = N0->getOperand(0).getValueSizeInBits();
4510        PreExt = N0->getOperand(0);
4511      } else if (N0->getOpcode() == ISD::AND) {
4512        // DAGCombine turns costly ZExts into ANDs
4513        if (auto *C = dyn_cast<ConstantSDNode>(N0->getOperand(1)))
4514          if ((C->getAPIntValue()+1).isPowerOf2()) {
4515            MinBits = C->getAPIntValue().countr_one();
4516            PreExt = N0->getOperand(0);
4517          }
4518      } else if (N0->getOpcode() == ISD::SIGN_EXTEND) {
4519        // SExt
4520        MinBits = N0->getOperand(0).getValueSizeInBits();
4521        PreExt = N0->getOperand(0);
4522        Signed = true;
4523      } else if (auto *LN0 = dyn_cast<LoadSDNode>(N0)) {
4524        // ZEXTLOAD / SEXTLOAD
4525        if (LN0->getExtensionType() == ISD::ZEXTLOAD) {
4526          MinBits = LN0->getMemoryVT().getSizeInBits();
4527          PreExt = N0;
4528        } else if (LN0->getExtensionType() == ISD::SEXTLOAD) {
4529          Signed = true;
4530          MinBits = LN0->getMemoryVT().getSizeInBits();
4531          PreExt = N0;
4532        }
4533      }
4534
4535      // Figure out how many bits we need to preserve this constant.
4536      unsigned ReqdBits = Signed ? C1.getSignificantBits() : C1.getActiveBits();
4537
4538      // Make sure we're not losing bits from the constant.
4539      if (MinBits > 0 &&
4540          MinBits < C1.getBitWidth() &&
4541          MinBits >= ReqdBits) {
4542        EVT MinVT = EVT::getIntegerVT(*DAG.getContext(), MinBits);
4543        if (isTypeDesirableForOp(ISD::SETCC, MinVT)) {
4544          // Will get folded away.
4545          SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MinVT, PreExt);
4546          if (MinBits == 1 && C1 == 1)
4547            // Invert the condition.
4548            return DAG.getSetCC(dl, VT, Trunc, DAG.getConstant(0, dl, MVT::i1),
4549                                Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
4550          SDValue C = DAG.getConstant(C1.trunc(MinBits), dl, MinVT);
4551          return DAG.getSetCC(dl, VT, Trunc, C, Cond);
4552        }
4553
4554        // If truncating the setcc operands is not desirable, we can still
4555        // simplify the expression in some cases:
4556        // setcc ([sz]ext (setcc x, y, cc)), 0, setne) -> setcc (x, y, cc)
4557        // setcc ([sz]ext (setcc x, y, cc)), 0, seteq) -> setcc (x, y, inv(cc))
4558        // setcc (zext (setcc x, y, cc)), 1, setne) -> setcc (x, y, inv(cc))
4559        // setcc (zext (setcc x, y, cc)), 1, seteq) -> setcc (x, y, cc)
4560        // setcc (sext (setcc x, y, cc)), -1, setne) -> setcc (x, y, inv(cc))
4561        // setcc (sext (setcc x, y, cc)), -1, seteq) -> setcc (x, y, cc)
4562        SDValue TopSetCC = N0->getOperand(0);
4563        unsigned N0Opc = N0->getOpcode();
4564        bool SExt = (N0Opc == ISD::SIGN_EXTEND);
4565        if (TopSetCC.getValueType() == MVT::i1 && VT == MVT::i1 &&
4566            TopSetCC.getOpcode() == ISD::SETCC &&
4567            (N0Opc == ISD::ZERO_EXTEND || N0Opc == ISD::SIGN_EXTEND) &&
4568            (isConstFalseVal(N1) ||
4569             isExtendedTrueVal(N1C, N0->getValueType(0), SExt))) {
4570
4571          bool Inverse = (N1C->isZero() && Cond == ISD::SETEQ) ||
4572                         (!N1C->isZero() && Cond == ISD::SETNE);
4573
4574          if (!Inverse)
4575            return TopSetCC;
4576
4577          ISD::CondCode InvCond = ISD::getSetCCInverse(
4578              cast<CondCodeSDNode>(TopSetCC.getOperand(2))->get(),
4579              TopSetCC.getOperand(0).getValueType());
4580          return DAG.getSetCC(dl, VT, TopSetCC.getOperand(0),
4581                                      TopSetCC.getOperand(1),
4582                                      InvCond);
4583        }
4584      }
4585    }
4586
4587    // If the LHS is '(and load, const)', the RHS is 0, the test is for
4588    // equality or unsigned, and all 1 bits of the const are in the same
4589    // partial word, see if we can shorten the load.
4590    if (DCI.isBeforeLegalize() &&
4591        !ISD::isSignedIntSetCC(Cond) &&
4592        N0.getOpcode() == ISD::AND && C1 == 0 &&
4593        N0.getNode()->hasOneUse() &&
4594        isa<LoadSDNode>(N0.getOperand(0)) &&
4595        N0.getOperand(0).getNode()->hasOneUse() &&
4596        isa<ConstantSDNode>(N0.getOperand(1))) {
4597      LoadSDNode *Lod = cast<LoadSDNode>(N0.getOperand(0));
4598      APInt bestMask;
4599      unsigned bestWidth = 0, bestOffset = 0;
4600      if (Lod->isSimple() && Lod->isUnindexed()) {
4601        unsigned origWidth = N0.getValueSizeInBits();
4602        unsigned maskWidth = origWidth;
4603        // We can narrow (e.g.) 16-bit extending loads on 32-bit target to
4604        // 8 bits, but have to be careful...
4605        if (Lod->getExtensionType() != ISD::NON_EXTLOAD)
4606          origWidth = Lod->getMemoryVT().getSizeInBits();
4607        const APInt &Mask = N0.getConstantOperandAPInt(1);
4608        for (unsigned width = origWidth / 2; width>=8; width /= 2) {
4609          APInt newMask = APInt::getLowBitsSet(maskWidth, width);
4610          for (unsigned offset=0; offset<origWidth/width; offset++) {
4611            if (Mask.isSubsetOf(newMask)) {
4612              if (Layout.isLittleEndian())
4613                bestOffset = (uint64_t)offset * (width/8);
4614              else
4615                bestOffset = (origWidth/width - offset - 1) * (width/8);
4616              bestMask = Mask.lshr(offset * (width/8) * 8);
4617              bestWidth = width;
4618              break;
4619            }
4620            newMask <<= width;
4621          }
4622        }
4623      }
4624      if (bestWidth) {
4625        EVT newVT = EVT::getIntegerVT(*DAG.getContext(), bestWidth);
4626        if (newVT.isRound() &&
4627            shouldReduceLoadWidth(Lod, ISD::NON_EXTLOAD, newVT)) {
4628          SDValue Ptr = Lod->getBasePtr();
4629          if (bestOffset != 0)
4630            Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::getFixed(bestOffset),
4631                                           dl);
4632          SDValue NewLoad =
4633              DAG.getLoad(newVT, dl, Lod->getChain(), Ptr,
4634                          Lod->getPointerInfo().getWithOffset(bestOffset),
4635                          Lod->getOriginalAlign());
4636          return DAG.getSetCC(dl, VT,
4637                              DAG.getNode(ISD::AND, dl, newVT, NewLoad,
4638                                      DAG.getConstant(bestMask.trunc(bestWidth),
4639                                                      dl, newVT)),
4640                              DAG.getConstant(0LL, dl, newVT), Cond);
4641        }
4642      }
4643    }
4644
4645    // If the LHS is a ZERO_EXTEND, perform the comparison on the input.
4646    if (N0.getOpcode() == ISD::ZERO_EXTEND) {
4647      unsigned InSize = N0.getOperand(0).getValueSizeInBits();
4648
4649      // If the comparison constant has bits in the upper part, the
4650      // zero-extended value could never match.
4651      if (C1.intersects(APInt::getHighBitsSet(C1.getBitWidth(),
4652                                              C1.getBitWidth() - InSize))) {
4653        switch (Cond) {
4654        case ISD::SETUGT:
4655        case ISD::SETUGE:
4656        case ISD::SETEQ:
4657          return DAG.getConstant(0, dl, VT);
4658        case ISD::SETULT:
4659        case ISD::SETULE:
4660        case ISD::SETNE:
4661          return DAG.getConstant(1, dl, VT);
4662        case ISD::SETGT:
4663        case ISD::SETGE:
4664          // True if the sign bit of C1 is set.
4665          return DAG.getConstant(C1.isNegative(), dl, VT);
4666        case ISD::SETLT:
4667        case ISD::SETLE:
4668          // True if the sign bit of C1 isn't set.
4669          return DAG.getConstant(C1.isNonNegative(), dl, VT);
4670        default:
4671          break;
4672        }
4673      }
4674
4675      // Otherwise, we can perform the comparison with the low bits.
4676      switch (Cond) {
4677      case ISD::SETEQ:
4678      case ISD::SETNE:
4679      case ISD::SETUGT:
4680      case ISD::SETUGE:
4681      case ISD::SETULT:
4682      case ISD::SETULE: {
4683        EVT newVT = N0.getOperand(0).getValueType();
4684        if (DCI.isBeforeLegalizeOps() ||
4685            (isOperationLegal(ISD::SETCC, newVT) &&
4686             isCondCodeLegal(Cond, newVT.getSimpleVT()))) {
4687          EVT NewSetCCVT = getSetCCResultType(Layout, *DAG.getContext(), newVT);
4688          SDValue NewConst = DAG.getConstant(C1.trunc(InSize), dl, newVT);
4689
4690          SDValue NewSetCC = DAG.getSetCC(dl, NewSetCCVT, N0.getOperand(0),
4691                                          NewConst, Cond);
4692          return DAG.getBoolExtOrTrunc(NewSetCC, dl, VT, N0.getValueType());
4693        }
4694        break;
4695      }
4696      default:
4697        break; // todo, be more careful with signed comparisons
4698      }
4699    } else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
4700               (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4701               !isSExtCheaperThanZExt(cast<VTSDNode>(N0.getOperand(1))->getVT(),
4702                                      OpVT)) {
4703      EVT ExtSrcTy = cast<VTSDNode>(N0.getOperand(1))->getVT();
4704      unsigned ExtSrcTyBits = ExtSrcTy.getSizeInBits();
4705      EVT ExtDstTy = N0.getValueType();
4706      unsigned ExtDstTyBits = ExtDstTy.getSizeInBits();
4707
4708      // If the constant doesn't fit into the number of bits for the source of
4709      // the sign extension, it is impossible for both sides to be equal.
4710      if (C1.getSignificantBits() > ExtSrcTyBits)
4711        return DAG.getBoolConstant(Cond == ISD::SETNE, dl, VT, OpVT);
4712
4713      assert(ExtDstTy == N0.getOperand(0).getValueType() &&
4714             ExtDstTy != ExtSrcTy && "Unexpected types!");
4715      APInt Imm = APInt::getLowBitsSet(ExtDstTyBits, ExtSrcTyBits);
4716      SDValue ZextOp = DAG.getNode(ISD::AND, dl, ExtDstTy, N0.getOperand(0),
4717                                   DAG.getConstant(Imm, dl, ExtDstTy));
4718      if (!DCI.isCalledByLegalizer())
4719        DCI.AddToWorklist(ZextOp.getNode());
4720      // Otherwise, make this a use of a zext.
4721      return DAG.getSetCC(dl, VT, ZextOp,
4722                          DAG.getConstant(C1 & Imm, dl, ExtDstTy), Cond);
4723    } else if ((N1C->isZero() || N1C->isOne()) &&
4724               (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
4725      // SETCC (SETCC), [0|1], [EQ|NE]  -> SETCC
4726      if (N0.getOpcode() == ISD::SETCC &&
4727          isTypeLegal(VT) && VT.bitsLE(N0.getValueType()) &&
4728          (N0.getValueType() == MVT::i1 ||
4729           getBooleanContents(N0.getOperand(0).getValueType()) ==
4730                       ZeroOrOneBooleanContent)) {
4731        bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (!N1C->isOne());
4732        if (TrueWhenTrue)
4733          return DAG.getNode(ISD::TRUNCATE, dl, VT, N0);
4734        // Invert the condition.
4735        ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
4736        CC = ISD::getSetCCInverse(CC, N0.getOperand(0).getValueType());
4737        if (DCI.isBeforeLegalizeOps() ||
4738            isCondCodeLegal(CC, N0.getOperand(0).getSimpleValueType()))
4739          return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC);
4740      }
4741
4742      if ((N0.getOpcode() == ISD::XOR ||
4743           (N0.getOpcode() == ISD::AND &&
4744            N0.getOperand(0).getOpcode() == ISD::XOR &&
4745            N0.getOperand(1) == N0.getOperand(0).getOperand(1))) &&
4746          isOneConstant(N0.getOperand(1))) {
4747        // If this is (X^1) == 0/1, swap the RHS and eliminate the xor.  We
4748        // can only do this if the top bits are known zero.
4749        unsigned BitWidth = N0.getValueSizeInBits();
4750        if (DAG.MaskedValueIsZero(N0,
4751                                  APInt::getHighBitsSet(BitWidth,
4752                                                        BitWidth-1))) {
4753          // Okay, get the un-inverted input value.
4754          SDValue Val;
4755          if (N0.getOpcode() == ISD::XOR) {
4756            Val = N0.getOperand(0);
4757          } else {
4758            assert(N0.getOpcode() == ISD::AND &&
4759                    N0.getOperand(0).getOpcode() == ISD::XOR);
4760            // ((X^1)&1)^1 -> X & 1
4761            Val = DAG.getNode(ISD::AND, dl, N0.getValueType(),
4762                              N0.getOperand(0).getOperand(0),
4763                              N0.getOperand(1));
4764          }
4765
4766          return DAG.getSetCC(dl, VT, Val, N1,
4767                              Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
4768        }
4769      } else if (N1C->isOne()) {
4770        SDValue Op0 = N0;
4771        if (Op0.getOpcode() == ISD::TRUNCATE)
4772          Op0 = Op0.getOperand(0);
4773
4774        if ((Op0.getOpcode() == ISD::XOR) &&
4775            Op0.getOperand(0).getOpcode() == ISD::SETCC &&
4776            Op0.getOperand(1).getOpcode() == ISD::SETCC) {
4777          SDValue XorLHS = Op0.getOperand(0);
4778          SDValue XorRHS = Op0.getOperand(1);
4779          // Ensure that the input setccs return an i1 type or 0/1 value.
4780          if (Op0.getValueType() == MVT::i1 ||
4781              (getBooleanContents(XorLHS.getOperand(0).getValueType()) ==
4782                      ZeroOrOneBooleanContent &&
4783               getBooleanContents(XorRHS.getOperand(0).getValueType()) ==
4784                        ZeroOrOneBooleanContent)) {
4785            // (xor (setcc), (setcc)) == / != 1 -> (setcc) != / == (setcc)
4786            Cond = (Cond == ISD::SETEQ) ? ISD::SETNE : ISD::SETEQ;
4787            return DAG.getSetCC(dl, VT, XorLHS, XorRHS, Cond);
4788          }
4789        }
4790        if (Op0.getOpcode() == ISD::AND && isOneConstant(Op0.getOperand(1))) {
4791          // If this is (X&1) == / != 1, normalize it to (X&1) != / == 0.
4792          if (Op0.getValueType().bitsGT(VT))
4793            Op0 = DAG.getNode(ISD::AND, dl, VT,
4794                          DAG.getNode(ISD::TRUNCATE, dl, VT, Op0.getOperand(0)),
4795                          DAG.getConstant(1, dl, VT));
4796          else if (Op0.getValueType().bitsLT(VT))
4797            Op0 = DAG.getNode(ISD::AND, dl, VT,
4798                        DAG.getNode(ISD::ANY_EXTEND, dl, VT, Op0.getOperand(0)),
4799                        DAG.getConstant(1, dl, VT));
4800
4801          return DAG.getSetCC(dl, VT, Op0,
4802                              DAG.getConstant(0, dl, Op0.getValueType()),
4803                              Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
4804        }
4805        if (Op0.getOpcode() == ISD::AssertZext &&
4806            cast<VTSDNode>(Op0.getOperand(1))->getVT() == MVT::i1)
4807          return DAG.getSetCC(dl, VT, Op0,
4808                              DAG.getConstant(0, dl, Op0.getValueType()),
4809                              Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
4810      }
4811    }
4812
4813    // Given:
4814    //   icmp eq/ne (urem %x, %y), 0
4815    // Iff %x has 0 or 1 bits set, and %y has at least 2 bits set, omit 'urem':
4816    //   icmp eq/ne %x, 0
4817    if (N0.getOpcode() == ISD::UREM && N1C->isZero() &&
4818        (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
4819      KnownBits XKnown = DAG.computeKnownBits(N0.getOperand(0));
4820      KnownBits YKnown = DAG.computeKnownBits(N0.getOperand(1));
4821      if (XKnown.countMaxPopulation() == 1 && YKnown.countMinPopulation() >= 2)
4822        return DAG.getSetCC(dl, VT, N0.getOperand(0), N1, Cond);
4823    }
4824
4825    // Fold set_cc seteq (ashr X, BW-1), -1 -> set_cc setlt X, 0
4826    //  and set_cc setne (ashr X, BW-1), -1 -> set_cc setge X, 0
4827    if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4828        N0.getOpcode() == ISD::SRA && isa<ConstantSDNode>(N0.getOperand(1)) &&
4829        N0.getConstantOperandAPInt(1) == OpVT.getScalarSizeInBits() - 1 &&
4830        N1C && N1C->isAllOnes()) {
4831      return DAG.getSetCC(dl, VT, N0.getOperand(0),
4832                          DAG.getConstant(0, dl, OpVT),
4833                          Cond == ISD::SETEQ ? ISD::SETLT : ISD::SETGE);
4834    }
4835
4836    if (SDValue V =
4837            optimizeSetCCOfSignedTruncationCheck(VT, N0, N1, Cond, DCI, dl))
4838      return V;
4839  }
4840
4841  // These simplifications apply to splat vectors as well.
4842  // TODO: Handle more splat vector cases.
4843  if (auto *N1C = isConstOrConstSplat(N1)) {
4844    const APInt &C1 = N1C->getAPIntValue();
4845
4846    APInt MinVal, MaxVal;
4847    unsigned OperandBitSize = N1C->getValueType(0).getScalarSizeInBits();
4848    if (ISD::isSignedIntSetCC(Cond)) {
4849      MinVal = APInt::getSignedMinValue(OperandBitSize);
4850      MaxVal = APInt::getSignedMaxValue(OperandBitSize);
4851    } else {
4852      MinVal = APInt::getMinValue(OperandBitSize);
4853      MaxVal = APInt::getMaxValue(OperandBitSize);
4854    }
4855
4856    // Canonicalize GE/LE comparisons to use GT/LT comparisons.
4857    if (Cond == ISD::SETGE || Cond == ISD::SETUGE) {
4858      // X >= MIN --> true
4859      if (C1 == MinVal)
4860        return DAG.getBoolConstant(true, dl, VT, OpVT);
4861
4862      if (!VT.isVector()) { // TODO: Support this for vectors.
4863        // X >= C0 --> X > (C0 - 1)
4864        APInt C = C1 - 1;
4865        ISD::CondCode NewCC = (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT;
4866        if ((DCI.isBeforeLegalizeOps() ||
4867             isCondCodeLegal(NewCC, VT.getSimpleVT())) &&
4868            (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
4869                                  isLegalICmpImmediate(C.getSExtValue())))) {
4870          return DAG.getSetCC(dl, VT, N0,
4871                              DAG.getConstant(C, dl, N1.getValueType()),
4872                              NewCC);
4873        }
4874      }
4875    }
4876
4877    if (Cond == ISD::SETLE || Cond == ISD::SETULE) {
4878      // X <= MAX --> true
4879      if (C1 == MaxVal)
4880        return DAG.getBoolConstant(true, dl, VT, OpVT);
4881
4882      // X <= C0 --> X < (C0 + 1)
4883      if (!VT.isVector()) { // TODO: Support this for vectors.
4884        APInt C = C1 + 1;
4885        ISD::CondCode NewCC = (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT;
4886        if ((DCI.isBeforeLegalizeOps() ||
4887             isCondCodeLegal(NewCC, VT.getSimpleVT())) &&
4888            (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
4889                                  isLegalICmpImmediate(C.getSExtValue())))) {
4890          return DAG.getSetCC(dl, VT, N0,
4891                              DAG.getConstant(C, dl, N1.getValueType()),
4892                              NewCC);
4893        }
4894      }
4895    }
4896
4897    if (Cond == ISD::SETLT || Cond == ISD::SETULT) {
4898      if (C1 == MinVal)
4899        return DAG.getBoolConstant(false, dl, VT, OpVT); // X < MIN --> false
4900
4901      // TODO: Support this for vectors after legalize ops.
4902      if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
4903        // Canonicalize setlt X, Max --> setne X, Max
4904        if (C1 == MaxVal)
4905          return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
4906
4907        // If we have setult X, 1, turn it into seteq X, 0
4908        if (C1 == MinVal+1)
4909          return DAG.getSetCC(dl, VT, N0,
4910                              DAG.getConstant(MinVal, dl, N0.getValueType()),
4911                              ISD::SETEQ);
4912      }
4913    }
4914
4915    if (Cond == ISD::SETGT || Cond == ISD::SETUGT) {
4916      if (C1 == MaxVal)
4917        return DAG.getBoolConstant(false, dl, VT, OpVT); // X > MAX --> false
4918
4919      // TODO: Support this for vectors after legalize ops.
4920      if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
4921        // Canonicalize setgt X, Min --> setne X, Min
4922        if (C1 == MinVal)
4923          return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
4924
4925        // If we have setugt X, Max-1, turn it into seteq X, Max
4926        if (C1 == MaxVal-1)
4927          return DAG.getSetCC(dl, VT, N0,
4928                              DAG.getConstant(MaxVal, dl, N0.getValueType()),
4929                              ISD::SETEQ);
4930      }
4931    }
4932
4933    if (Cond == ISD::SETEQ || Cond == ISD::SETNE) {
4934      // (X & (C l>>/<< Y)) ==/!= 0  -->  ((X <</l>> Y) & C) ==/!= 0
4935      if (C1.isZero())
4936        if (SDValue CC = optimizeSetCCByHoistingAndByConstFromLogicalShift(
4937                VT, N0, N1, Cond, DCI, dl))
4938          return CC;
4939
4940      // For all/any comparisons, replace or(x,shl(y,bw/2)) with and/or(x,y).
4941      // For example, when high 32-bits of i64 X are known clear:
4942      // all bits clear: (X | (Y<<32)) ==  0 --> (X | Y) ==  0
4943      // all bits set:   (X | (Y<<32)) == -1 --> (X & Y) == -1
4944      bool CmpZero = N1C->isZero();
4945      bool CmpNegOne = N1C->isAllOnes();
4946      if ((CmpZero || CmpNegOne) && N0.hasOneUse()) {
4947        // Match or(lo,shl(hi,bw/2)) pattern.
4948        auto IsConcat = [&](SDValue V, SDValue &Lo, SDValue &Hi) {
4949          unsigned EltBits = V.getScalarValueSizeInBits();
4950          if (V.getOpcode() != ISD::OR || (EltBits % 2) != 0)
4951            return false;
4952          SDValue LHS = V.getOperand(0);
4953          SDValue RHS = V.getOperand(1);
4954          APInt HiBits = APInt::getHighBitsSet(EltBits, EltBits / 2);
4955          // Unshifted element must have zero upperbits.
4956          if (RHS.getOpcode() == ISD::SHL &&
4957              isa<ConstantSDNode>(RHS.getOperand(1)) &&
4958              RHS.getConstantOperandAPInt(1) == (EltBits / 2) &&
4959              DAG.MaskedValueIsZero(LHS, HiBits)) {
4960            Lo = LHS;
4961            Hi = RHS.getOperand(0);
4962            return true;
4963          }
4964          if (LHS.getOpcode() == ISD::SHL &&
4965              isa<ConstantSDNode>(LHS.getOperand(1)) &&
4966              LHS.getConstantOperandAPInt(1) == (EltBits / 2) &&
4967              DAG.MaskedValueIsZero(RHS, HiBits)) {
4968            Lo = RHS;
4969            Hi = LHS.getOperand(0);
4970            return true;
4971          }
4972          return false;
4973        };
4974
4975        auto MergeConcat = [&](SDValue Lo, SDValue Hi) {
4976          unsigned EltBits = N0.getScalarValueSizeInBits();
4977          unsigned HalfBits = EltBits / 2;
4978          APInt HiBits = APInt::getHighBitsSet(EltBits, HalfBits);
4979          SDValue LoBits = DAG.getConstant(~HiBits, dl, OpVT);
4980          SDValue HiMask = DAG.getNode(ISD::AND, dl, OpVT, Hi, LoBits);
4981          SDValue NewN0 =
4982              DAG.getNode(CmpZero ? ISD::OR : ISD::AND, dl, OpVT, Lo, HiMask);
4983          SDValue NewN1 = CmpZero ? DAG.getConstant(0, dl, OpVT) : LoBits;
4984          return DAG.getSetCC(dl, VT, NewN0, NewN1, Cond);
4985        };
4986
4987        SDValue Lo, Hi;
4988        if (IsConcat(N0, Lo, Hi))
4989          return MergeConcat(Lo, Hi);
4990
4991        if (N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR) {
4992          SDValue Lo0, Lo1, Hi0, Hi1;
4993          if (IsConcat(N0.getOperand(0), Lo0, Hi0) &&
4994              IsConcat(N0.getOperand(1), Lo1, Hi1)) {
4995            return MergeConcat(DAG.getNode(N0.getOpcode(), dl, OpVT, Lo0, Lo1),
4996                               DAG.getNode(N0.getOpcode(), dl, OpVT, Hi0, Hi1));
4997          }
4998        }
4999      }
5000    }
5001
5002    // If we have "setcc X, C0", check to see if we can shrink the immediate
5003    // by changing cc.
5004    // TODO: Support this for vectors after legalize ops.
5005    if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
5006      // SETUGT X, SINTMAX  -> SETLT X, 0
5007      // SETUGE X, SINTMIN -> SETLT X, 0
5008      if ((Cond == ISD::SETUGT && C1.isMaxSignedValue()) ||
5009          (Cond == ISD::SETUGE && C1.isMinSignedValue()))
5010        return DAG.getSetCC(dl, VT, N0,
5011                            DAG.getConstant(0, dl, N1.getValueType()),
5012                            ISD::SETLT);
5013
5014      // SETULT X, SINTMIN  -> SETGT X, -1
5015      // SETULE X, SINTMAX  -> SETGT X, -1
5016      if ((Cond == ISD::SETULT && C1.isMinSignedValue()) ||
5017          (Cond == ISD::SETULE && C1.isMaxSignedValue()))
5018        return DAG.getSetCC(dl, VT, N0,
5019                            DAG.getAllOnesConstant(dl, N1.getValueType()),
5020                            ISD::SETGT);
5021    }
5022  }
5023
5024  // Back to non-vector simplifications.
5025  // TODO: Can we do these for vector splats?
5026  if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
5027    const TargetLowering &TLI = DAG.getTargetLoweringInfo();
5028    const APInt &C1 = N1C->getAPIntValue();
5029    EVT ShValTy = N0.getValueType();
5030
5031    // Fold bit comparisons when we can. This will result in an
5032    // incorrect value when boolean false is negative one, unless
5033    // the bitsize is 1 in which case the false value is the same
5034    // in practice regardless of the representation.
5035    if ((VT.getSizeInBits() == 1 ||
5036         getBooleanContents(N0.getValueType()) == ZeroOrOneBooleanContent) &&
5037        (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5038        (VT == ShValTy || (isTypeLegal(VT) && VT.bitsLE(ShValTy))) &&
5039        N0.getOpcode() == ISD::AND) {
5040      if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5041        EVT ShiftTy =
5042            getShiftAmountTy(ShValTy, Layout, !DCI.isBeforeLegalize());
5043        if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0  -->  (X & 8) >> 3
5044          // Perform the xform if the AND RHS is a single bit.
5045          unsigned ShCt = AndRHS->getAPIntValue().logBase2();
5046          if (AndRHS->getAPIntValue().isPowerOf2() &&
5047              !TLI.shouldAvoidTransformToShift(ShValTy, ShCt)) {
5048            return DAG.getNode(ISD::TRUNCATE, dl, VT,
5049                               DAG.getNode(ISD::SRL, dl, ShValTy, N0,
5050                                           DAG.getConstant(ShCt, dl, ShiftTy)));
5051          }
5052        } else if (Cond == ISD::SETEQ && C1 == AndRHS->getAPIntValue()) {
5053          // (X & 8) == 8  -->  (X & 8) >> 3
5054          // Perform the xform if C1 is a single bit.
5055          unsigned ShCt = C1.logBase2();
5056          if (C1.isPowerOf2() &&
5057              !TLI.shouldAvoidTransformToShift(ShValTy, ShCt)) {
5058            return DAG.getNode(ISD::TRUNCATE, dl, VT,
5059                               DAG.getNode(ISD::SRL, dl, ShValTy, N0,
5060                                           DAG.getConstant(ShCt, dl, ShiftTy)));
5061          }
5062        }
5063      }
5064    }
5065
5066    if (C1.getSignificantBits() <= 64 &&
5067        !isLegalICmpImmediate(C1.getSExtValue())) {
5068      EVT ShiftTy = getShiftAmountTy(ShValTy, Layout, !DCI.isBeforeLegalize());
5069      // (X & -256) == 256 -> (X >> 8) == 1
5070      if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5071          N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
5072        if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5073          const APInt &AndRHSC = AndRHS->getAPIntValue();
5074          if (AndRHSC.isNegatedPowerOf2() && (AndRHSC & C1) == C1) {
5075            unsigned ShiftBits = AndRHSC.countr_zero();
5076            if (!TLI.shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
5077              SDValue Shift =
5078                DAG.getNode(ISD::SRL, dl, ShValTy, N0.getOperand(0),
5079                            DAG.getConstant(ShiftBits, dl, ShiftTy));
5080              SDValue CmpRHS = DAG.getConstant(C1.lshr(ShiftBits), dl, ShValTy);
5081              return DAG.getSetCC(dl, VT, Shift, CmpRHS, Cond);
5082            }
5083          }
5084        }
5085      } else if (Cond == ISD::SETULT || Cond == ISD::SETUGE ||
5086                 Cond == ISD::SETULE || Cond == ISD::SETUGT) {
5087        bool AdjOne = (Cond == ISD::SETULE || Cond == ISD::SETUGT);
5088        // X <  0x100000000 -> (X >> 32) <  1
5089        // X >= 0x100000000 -> (X >> 32) >= 1
5090        // X <= 0x0ffffffff -> (X >> 32) <  1
5091        // X >  0x0ffffffff -> (X >> 32) >= 1
5092        unsigned ShiftBits;
5093        APInt NewC = C1;
5094        ISD::CondCode NewCond = Cond;
5095        if (AdjOne) {
5096          ShiftBits = C1.countr_one();
5097          NewC = NewC + 1;
5098          NewCond = (Cond == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
5099        } else {
5100          ShiftBits = C1.countr_zero();
5101        }
5102        NewC.lshrInPlace(ShiftBits);
5103        if (ShiftBits && NewC.getSignificantBits() <= 64 &&
5104            isLegalICmpImmediate(NewC.getSExtValue()) &&
5105            !TLI.shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
5106          SDValue Shift = DAG.getNode(ISD::SRL, dl, ShValTy, N0,
5107                                      DAG.getConstant(ShiftBits, dl, ShiftTy));
5108          SDValue CmpRHS = DAG.getConstant(NewC, dl, ShValTy);
5109          return DAG.getSetCC(dl, VT, Shift, CmpRHS, NewCond);
5110        }
5111      }
5112    }
5113  }
5114
5115  if (!isa<ConstantFPSDNode>(N0) && isa<ConstantFPSDNode>(N1)) {
5116    auto *CFP = cast<ConstantFPSDNode>(N1);
5117    assert(!CFP->getValueAPF().isNaN() && "Unexpected NaN value");
5118
5119    // Otherwise, we know the RHS is not a NaN.  Simplify the node to drop the
5120    // constant if knowing that the operand is non-nan is enough.  We prefer to
5121    // have SETO(x,x) instead of SETO(x, 0.0) because this avoids having to
5122    // materialize 0.0.
5123    if (Cond == ISD::SETO || Cond == ISD::SETUO)
5124      return DAG.getSetCC(dl, VT, N0, N0, Cond);
5125
5126    // setcc (fneg x), C -> setcc swap(pred) x, -C
5127    if (N0.getOpcode() == ISD::FNEG) {
5128      ISD::CondCode SwapCond = ISD::getSetCCSwappedOperands(Cond);
5129      if (DCI.isBeforeLegalizeOps() ||
5130          isCondCodeLegal(SwapCond, N0.getSimpleValueType())) {
5131        SDValue NegN1 = DAG.getNode(ISD::FNEG, dl, N0.getValueType(), N1);
5132        return DAG.getSetCC(dl, VT, N0.getOperand(0), NegN1, SwapCond);
5133      }
5134    }
5135
5136    // setueq/setoeq X, (fabs Inf) -> is_fpclass X, fcInf
5137    if (isOperationLegalOrCustom(ISD::IS_FPCLASS, N0.getValueType()) &&
5138        !isFPImmLegal(CFP->getValueAPF(), CFP->getValueType(0))) {
5139      bool IsFabs = N0.getOpcode() == ISD::FABS;
5140      SDValue Op = IsFabs ? N0.getOperand(0) : N0;
5141      if ((Cond == ISD::SETOEQ || Cond == ISD::SETUEQ) && CFP->isInfinity()) {
5142        FPClassTest Flag = CFP->isNegative() ? (IsFabs ? fcNone : fcNegInf)
5143                                             : (IsFabs ? fcInf : fcPosInf);
5144        if (Cond == ISD::SETUEQ)
5145          Flag |= fcNan;
5146        return DAG.getNode(ISD::IS_FPCLASS, dl, VT, Op,
5147                           DAG.getTargetConstant(Flag, dl, MVT::i32));
5148      }
5149    }
5150
5151    // If the condition is not legal, see if we can find an equivalent one
5152    // which is legal.
5153    if (!isCondCodeLegal(Cond, N0.getSimpleValueType())) {
5154      // If the comparison was an awkward floating-point == or != and one of
5155      // the comparison operands is infinity or negative infinity, convert the
5156      // condition to a less-awkward <= or >=.
5157      if (CFP->getValueAPF().isInfinity()) {
5158        bool IsNegInf = CFP->getValueAPF().isNegative();
5159        ISD::CondCode NewCond = ISD::SETCC_INVALID;
5160        switch (Cond) {
5161        case ISD::SETOEQ: NewCond = IsNegInf ? ISD::SETOLE : ISD::SETOGE; break;
5162        case ISD::SETUEQ: NewCond = IsNegInf ? ISD::SETULE : ISD::SETUGE; break;
5163        case ISD::SETUNE: NewCond = IsNegInf ? ISD::SETUGT : ISD::SETULT; break;
5164        case ISD::SETONE: NewCond = IsNegInf ? ISD::SETOGT : ISD::SETOLT; break;
5165        default: break;
5166        }
5167        if (NewCond != ISD::SETCC_INVALID &&
5168            isCondCodeLegal(NewCond, N0.getSimpleValueType()))
5169          return DAG.getSetCC(dl, VT, N0, N1, NewCond);
5170      }
5171    }
5172  }
5173
5174  if (N0 == N1) {
5175    // The sext(setcc()) => setcc() optimization relies on the appropriate
5176    // constant being emitted.
5177    assert(!N0.getValueType().isInteger() &&
5178           "Integer types should be handled by FoldSetCC");
5179
5180    bool EqTrue = ISD::isTrueWhenEqual(Cond);
5181    unsigned UOF = ISD::getUnorderedFlavor(Cond);
5182    if (UOF == 2) // FP operators that are undefined on NaNs.
5183      return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
5184    if (UOF == unsigned(EqTrue))
5185      return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
5186    // Otherwise, we can't fold it.  However, we can simplify it to SETUO/SETO
5187    // if it is not already.
5188    ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO;
5189    if (NewCond != Cond &&
5190        (DCI.isBeforeLegalizeOps() ||
5191                            isCondCodeLegal(NewCond, N0.getSimpleValueType())))
5192      return DAG.getSetCC(dl, VT, N0, N1, NewCond);
5193  }
5194
5195  // ~X > ~Y --> Y > X
5196  // ~X < ~Y --> Y < X
5197  // ~X < C --> X > ~C
5198  // ~X > C --> X < ~C
5199  if ((isSignedIntSetCC(Cond) || isUnsignedIntSetCC(Cond)) &&
5200      N0.getValueType().isInteger()) {
5201    if (isBitwiseNot(N0)) {
5202      if (isBitwiseNot(N1))
5203        return DAG.getSetCC(dl, VT, N1.getOperand(0), N0.getOperand(0), Cond);
5204
5205      if (DAG.isConstantIntBuildVectorOrConstantInt(N1) &&
5206          !DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(0))) {
5207        SDValue Not = DAG.getNOT(dl, N1, OpVT);
5208        return DAG.getSetCC(dl, VT, Not, N0.getOperand(0), Cond);
5209      }
5210    }
5211  }
5212
5213  if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5214      N0.getValueType().isInteger()) {
5215    if (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB ||
5216        N0.getOpcode() == ISD::XOR) {
5217      // Simplify (X+Y) == (X+Z) -->  Y == Z
5218      if (N0.getOpcode() == N1.getOpcode()) {
5219        if (N0.getOperand(0) == N1.getOperand(0))
5220          return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(1), Cond);
5221        if (N0.getOperand(1) == N1.getOperand(1))
5222          return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(0), Cond);
5223        if (isCommutativeBinOp(N0.getOpcode())) {
5224          // If X op Y == Y op X, try other combinations.
5225          if (N0.getOperand(0) == N1.getOperand(1))
5226            return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(0),
5227                                Cond);
5228          if (N0.getOperand(1) == N1.getOperand(0))
5229            return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(1),
5230                                Cond);
5231        }
5232      }
5233
5234      // If RHS is a legal immediate value for a compare instruction, we need
5235      // to be careful about increasing register pressure needlessly.
5236      bool LegalRHSImm = false;
5237
5238      if (auto *RHSC = dyn_cast<ConstantSDNode>(N1)) {
5239        if (auto *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5240          // Turn (X+C1) == C2 --> X == C2-C1
5241          if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse())
5242            return DAG.getSetCC(
5243                dl, VT, N0.getOperand(0),
5244                DAG.getConstant(RHSC->getAPIntValue() - LHSR->getAPIntValue(),
5245                                dl, N0.getValueType()),
5246                Cond);
5247
5248          // Turn (X^C1) == C2 --> X == C1^C2
5249          if (N0.getOpcode() == ISD::XOR && N0.getNode()->hasOneUse())
5250            return DAG.getSetCC(
5251                dl, VT, N0.getOperand(0),
5252                DAG.getConstant(LHSR->getAPIntValue() ^ RHSC->getAPIntValue(),
5253                                dl, N0.getValueType()),
5254                Cond);
5255        }
5256
5257        // Turn (C1-X) == C2 --> X == C1-C2
5258        if (auto *SUBC = dyn_cast<ConstantSDNode>(N0.getOperand(0)))
5259          if (N0.getOpcode() == ISD::SUB && N0.getNode()->hasOneUse())
5260            return DAG.getSetCC(
5261                dl, VT, N0.getOperand(1),
5262                DAG.getConstant(SUBC->getAPIntValue() - RHSC->getAPIntValue(),
5263                                dl, N0.getValueType()),
5264                Cond);
5265
5266        // Could RHSC fold directly into a compare?
5267        if (RHSC->getValueType(0).getSizeInBits() <= 64)
5268          LegalRHSImm = isLegalICmpImmediate(RHSC->getSExtValue());
5269      }
5270
5271      // (X+Y) == X --> Y == 0 and similar folds.
5272      // Don't do this if X is an immediate that can fold into a cmp
5273      // instruction and X+Y has other uses. It could be an induction variable
5274      // chain, and the transform would increase register pressure.
5275      if (!LegalRHSImm || N0.hasOneUse())
5276        if (SDValue V = foldSetCCWithBinOp(VT, N0, N1, Cond, dl, DCI))
5277          return V;
5278    }
5279
5280    if (N1.getOpcode() == ISD::ADD || N1.getOpcode() == ISD::SUB ||
5281        N1.getOpcode() == ISD::XOR)
5282      if (SDValue V = foldSetCCWithBinOp(VT, N1, N0, Cond, dl, DCI))
5283        return V;
5284
5285    if (SDValue V = foldSetCCWithAnd(VT, N0, N1, Cond, dl, DCI))
5286      return V;
5287  }
5288
5289  // Fold remainder of division by a constant.
5290  if ((N0.getOpcode() == ISD::UREM || N0.getOpcode() == ISD::SREM) &&
5291      N0.hasOneUse() && (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
5292    // When division is cheap or optimizing for minimum size,
5293    // fall through to DIVREM creation by skipping this fold.
5294    if (!isIntDivCheap(VT, Attr) && !Attr.hasFnAttr(Attribute::MinSize)) {
5295      if (N0.getOpcode() == ISD::UREM) {
5296        if (SDValue Folded = buildUREMEqFold(VT, N0, N1, Cond, DCI, dl))
5297          return Folded;
5298      } else if (N0.getOpcode() == ISD::SREM) {
5299        if (SDValue Folded = buildSREMEqFold(VT, N0, N1, Cond, DCI, dl))
5300          return Folded;
5301      }
5302    }
5303  }
5304
5305  // Fold away ALL boolean setcc's.
5306  if (N0.getValueType().getScalarType() == MVT::i1 && foldBooleans) {
5307    SDValue Temp;
5308    switch (Cond) {
5309    default: llvm_unreachable("Unknown integer setcc!");
5310    case ISD::SETEQ:  // X == Y  -> ~(X^Y)
5311      Temp = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
5312      N0 = DAG.getNOT(dl, Temp, OpVT);
5313      if (!DCI.isCalledByLegalizer())
5314        DCI.AddToWorklist(Temp.getNode());
5315      break;
5316    case ISD::SETNE:  // X != Y   -->  (X^Y)
5317      N0 = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
5318      break;
5319    case ISD::SETGT:  // X >s Y   -->  X == 0 & Y == 1  -->  ~X & Y
5320    case ISD::SETULT: // X <u Y   -->  X == 0 & Y == 1  -->  ~X & Y
5321      Temp = DAG.getNOT(dl, N0, OpVT);
5322      N0 = DAG.getNode(ISD::AND, dl, OpVT, N1, Temp);
5323      if (!DCI.isCalledByLegalizer())
5324        DCI.AddToWorklist(Temp.getNode());
5325      break;
5326    case ISD::SETLT:  // X <s Y   --> X == 1 & Y == 0  -->  ~Y & X
5327    case ISD::SETUGT: // X >u Y   --> X == 1 & Y == 0  -->  ~Y & X
5328      Temp = DAG.getNOT(dl, N1, OpVT);
5329      N0 = DAG.getNode(ISD::AND, dl, OpVT, N0, Temp);
5330      if (!DCI.isCalledByLegalizer())
5331        DCI.AddToWorklist(Temp.getNode());
5332      break;
5333    case ISD::SETULE: // X <=u Y  --> X == 0 | Y == 1  -->  ~X | Y
5334    case ISD::SETGE:  // X >=s Y  --> X == 0 | Y == 1  -->  ~X | Y
5335      Temp = DAG.getNOT(dl, N0, OpVT);
5336      N0 = DAG.getNode(ISD::OR, dl, OpVT, N1, Temp);
5337      if (!DCI.isCalledByLegalizer())
5338        DCI.AddToWorklist(Temp.getNode());
5339      break;
5340    case ISD::SETUGE: // X >=u Y  --> X == 1 | Y == 0  -->  ~Y | X
5341    case ISD::SETLE:  // X <=s Y  --> X == 1 | Y == 0  -->  ~Y | X
5342      Temp = DAG.getNOT(dl, N1, OpVT);
5343      N0 = DAG.getNode(ISD::OR, dl, OpVT, N0, Temp);
5344      break;
5345    }
5346    if (VT.getScalarType() != MVT::i1) {
5347      if (!DCI.isCalledByLegalizer())
5348        DCI.AddToWorklist(N0.getNode());
5349      // FIXME: If running after legalize, we probably can't do this.
5350      ISD::NodeType ExtendCode = getExtendForContent(getBooleanContents(OpVT));
5351      N0 = DAG.getNode(ExtendCode, dl, VT, N0);
5352    }
5353    return N0;
5354  }
5355
5356  // Could not fold it.
5357  return SDValue();
5358}
5359
5360/// Returns true (and the GlobalValue and the offset) if the node is a
5361/// GlobalAddress + offset.
5362bool TargetLowering::isGAPlusOffset(SDNode *WN, const GlobalValue *&GA,
5363                                    int64_t &Offset) const {
5364
5365  SDNode *N = unwrapAddress(SDValue(WN, 0)).getNode();
5366
5367  if (auto *GASD = dyn_cast<GlobalAddressSDNode>(N)) {
5368    GA = GASD->getGlobal();
5369    Offset += GASD->getOffset();
5370    return true;
5371  }
5372
5373  if (N->getOpcode() == ISD::ADD) {
5374    SDValue N1 = N->getOperand(0);
5375    SDValue N2 = N->getOperand(1);
5376    if (isGAPlusOffset(N1.getNode(), GA, Offset)) {
5377      if (auto *V = dyn_cast<ConstantSDNode>(N2)) {
5378        Offset += V->getSExtValue();
5379        return true;
5380      }
5381    } else if (isGAPlusOffset(N2.getNode(), GA, Offset)) {
5382      if (auto *V = dyn_cast<ConstantSDNode>(N1)) {
5383        Offset += V->getSExtValue();
5384        return true;
5385      }
5386    }
5387  }
5388
5389  return false;
5390}
5391
5392SDValue TargetLowering::PerformDAGCombine(SDNode *N,
5393                                          DAGCombinerInfo &DCI) const {
5394  // Default implementation: no optimization.
5395  return SDValue();
5396}
5397
5398//===----------------------------------------------------------------------===//
5399//  Inline Assembler Implementation Methods
5400//===----------------------------------------------------------------------===//
5401
5402TargetLowering::ConstraintType
5403TargetLowering::getConstraintType(StringRef Constraint) const {
5404  unsigned S = Constraint.size();
5405
5406  if (S == 1) {
5407    switch (Constraint[0]) {
5408    default: break;
5409    case 'r':
5410      return C_RegisterClass;
5411    case 'm': // memory
5412    case 'o': // offsetable
5413    case 'V': // not offsetable
5414      return C_Memory;
5415    case 'p': // Address.
5416      return C_Address;
5417    case 'n': // Simple Integer
5418    case 'E': // Floating Point Constant
5419    case 'F': // Floating Point Constant
5420      return C_Immediate;
5421    case 'i': // Simple Integer or Relocatable Constant
5422    case 's': // Relocatable Constant
5423    case 'X': // Allow ANY value.
5424    case 'I': // Target registers.
5425    case 'J':
5426    case 'K':
5427    case 'L':
5428    case 'M':
5429    case 'N':
5430    case 'O':
5431    case 'P':
5432    case '<':
5433    case '>':
5434      return C_Other;
5435    }
5436  }
5437
5438  if (S > 1 && Constraint[0] == '{' && Constraint[S - 1] == '}') {
5439    if (S == 8 && Constraint.substr(1, 6) == "memory") // "{memory}"
5440      return C_Memory;
5441    return C_Register;
5442  }
5443  return C_Unknown;
5444}
5445
5446/// Try to replace an X constraint, which matches anything, with another that
5447/// has more specific requirements based on the type of the corresponding
5448/// operand.
5449const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const {
5450  if (ConstraintVT.isInteger())
5451    return "r";
5452  if (ConstraintVT.isFloatingPoint())
5453    return "f"; // works for many targets
5454  return nullptr;
5455}
5456
5457SDValue TargetLowering::LowerAsmOutputForConstraint(
5458    SDValue &Chain, SDValue &Glue, const SDLoc &DL,
5459    const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const {
5460  return SDValue();
5461}
5462
5463/// Lower the specified operand into the Ops vector.
5464/// If it is invalid, don't add anything to Ops.
5465void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
5466                                                  StringRef Constraint,
5467                                                  std::vector<SDValue> &Ops,
5468                                                  SelectionDAG &DAG) const {
5469
5470  if (Constraint.size() > 1)
5471    return;
5472
5473  char ConstraintLetter = Constraint[0];
5474  switch (ConstraintLetter) {
5475  default: break;
5476  case 'X':    // Allows any operand
5477  case 'i':    // Simple Integer or Relocatable Constant
5478  case 'n':    // Simple Integer
5479  case 's': {  // Relocatable Constant
5480
5481    ConstantSDNode *C;
5482    uint64_t Offset = 0;
5483
5484    // Match (GA) or (C) or (GA+C) or (GA-C) or ((GA+C)+C) or (((GA+C)+C)+C),
5485    // etc., since getelementpointer is variadic. We can't use
5486    // SelectionDAG::FoldSymbolOffset because it expects the GA to be accessible
5487    // while in this case the GA may be furthest from the root node which is
5488    // likely an ISD::ADD.
5489    while (true) {
5490      if ((C = dyn_cast<ConstantSDNode>(Op)) && ConstraintLetter != 's') {
5491        // gcc prints these as sign extended.  Sign extend value to 64 bits
5492        // now; without this it would get ZExt'd later in
5493        // ScheduleDAGSDNodes::EmitNode, which is very generic.
5494        bool IsBool = C->getConstantIntValue()->getBitWidth() == 1;
5495        BooleanContent BCont = getBooleanContents(MVT::i64);
5496        ISD::NodeType ExtOpc =
5497            IsBool ? getExtendForContent(BCont) : ISD::SIGN_EXTEND;
5498        int64_t ExtVal =
5499            ExtOpc == ISD::ZERO_EXTEND ? C->getZExtValue() : C->getSExtValue();
5500        Ops.push_back(
5501            DAG.getTargetConstant(Offset + ExtVal, SDLoc(C), MVT::i64));
5502        return;
5503      }
5504      if (ConstraintLetter != 'n') {
5505        if (const auto *GA = dyn_cast<GlobalAddressSDNode>(Op)) {
5506          Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(Op),
5507                                                   GA->getValueType(0),
5508                                                   Offset + GA->getOffset()));
5509          return;
5510        }
5511        if (const auto *BA = dyn_cast<BlockAddressSDNode>(Op)) {
5512          Ops.push_back(DAG.getTargetBlockAddress(
5513              BA->getBlockAddress(), BA->getValueType(0),
5514              Offset + BA->getOffset(), BA->getTargetFlags()));
5515          return;
5516        }
5517        if (isa<BasicBlockSDNode>(Op)) {
5518          Ops.push_back(Op);
5519          return;
5520        }
5521      }
5522      const unsigned OpCode = Op.getOpcode();
5523      if (OpCode == ISD::ADD || OpCode == ISD::SUB) {
5524        if ((C = dyn_cast<ConstantSDNode>(Op.getOperand(0))))
5525          Op = Op.getOperand(1);
5526        // Subtraction is not commutative.
5527        else if (OpCode == ISD::ADD &&
5528                 (C = dyn_cast<ConstantSDNode>(Op.getOperand(1))))
5529          Op = Op.getOperand(0);
5530        else
5531          return;
5532        Offset += (OpCode == ISD::ADD ? 1 : -1) * C->getSExtValue();
5533        continue;
5534      }
5535      return;
5536    }
5537    break;
5538  }
5539  }
5540}
5541
5542void TargetLowering::CollectTargetIntrinsicOperands(
5543    const CallInst &I, SmallVectorImpl<SDValue> &Ops, SelectionDAG &DAG) const {
5544}
5545
5546std::pair<unsigned, const TargetRegisterClass *>
5547TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *RI,
5548                                             StringRef Constraint,
5549                                             MVT VT) const {
5550  if (Constraint.empty() || Constraint[0] != '{')
5551    return std::make_pair(0u, static_cast<TargetRegisterClass *>(nullptr));
5552  assert(*(Constraint.end() - 1) == '}' && "Not a brace enclosed constraint?");
5553
5554  // Remove the braces from around the name.
5555  StringRef RegName(Constraint.data() + 1, Constraint.size() - 2);
5556
5557  std::pair<unsigned, const TargetRegisterClass *> R =
5558      std::make_pair(0u, static_cast<const TargetRegisterClass *>(nullptr));
5559
5560  // Figure out which register class contains this reg.
5561  for (const TargetRegisterClass *RC : RI->regclasses()) {
5562    // If none of the value types for this register class are valid, we
5563    // can't use it.  For example, 64-bit reg classes on 32-bit targets.
5564    if (!isLegalRC(*RI, *RC))
5565      continue;
5566
5567    for (const MCPhysReg &PR : *RC) {
5568      if (RegName.equals_insensitive(RI->getRegAsmName(PR))) {
5569        std::pair<unsigned, const TargetRegisterClass *> S =
5570            std::make_pair(PR, RC);
5571
5572        // If this register class has the requested value type, return it,
5573        // otherwise keep searching and return the first class found
5574        // if no other is found which explicitly has the requested type.
5575        if (RI->isTypeLegalForClass(*RC, VT))
5576          return S;
5577        if (!R.second)
5578          R = S;
5579      }
5580    }
5581  }
5582
5583  return R;
5584}
5585
5586//===----------------------------------------------------------------------===//
5587// Constraint Selection.
5588
5589/// Return true of this is an input operand that is a matching constraint like
5590/// "4".
5591bool TargetLowering::AsmOperandInfo::isMatchingInputConstraint() const {
5592  assert(!ConstraintCode.empty() && "No known constraint!");
5593  return isdigit(static_cast<unsigned char>(ConstraintCode[0]));
5594}
5595
5596/// If this is an input matching constraint, this method returns the output
5597/// operand it matches.
5598unsigned TargetLowering::AsmOperandInfo::getMatchedOperand() const {
5599  assert(!ConstraintCode.empty() && "No known constraint!");
5600  return atoi(ConstraintCode.c_str());
5601}
5602
5603/// Split up the constraint string from the inline assembly value into the
5604/// specific constraints and their prefixes, and also tie in the associated
5605/// operand values.
5606/// If this returns an empty vector, and if the constraint string itself
5607/// isn't empty, there was an error parsing.
5608TargetLowering::AsmOperandInfoVector
5609TargetLowering::ParseConstraints(const DataLayout &DL,
5610                                 const TargetRegisterInfo *TRI,
5611                                 const CallBase &Call) const {
5612  /// Information about all of the constraints.
5613  AsmOperandInfoVector ConstraintOperands;
5614  const InlineAsm *IA = cast<InlineAsm>(Call.getCalledOperand());
5615  unsigned maCount = 0; // Largest number of multiple alternative constraints.
5616
5617  // Do a prepass over the constraints, canonicalizing them, and building up the
5618  // ConstraintOperands list.
5619  unsigned ArgNo = 0; // ArgNo - The argument of the CallInst.
5620  unsigned ResNo = 0; // ResNo - The result number of the next output.
5621  unsigned LabelNo = 0; // LabelNo - CallBr indirect dest number.
5622
5623  for (InlineAsm::ConstraintInfo &CI : IA->ParseConstraints()) {
5624    ConstraintOperands.emplace_back(std::move(CI));
5625    AsmOperandInfo &OpInfo = ConstraintOperands.back();
5626
5627    // Update multiple alternative constraint count.
5628    if (OpInfo.multipleAlternatives.size() > maCount)
5629      maCount = OpInfo.multipleAlternatives.size();
5630
5631    OpInfo.ConstraintVT = MVT::Other;
5632
5633    // Compute the value type for each operand.
5634    switch (OpInfo.Type) {
5635    case InlineAsm::isOutput:
5636      // Indirect outputs just consume an argument.
5637      if (OpInfo.isIndirect) {
5638        OpInfo.CallOperandVal = Call.getArgOperand(ArgNo);
5639        break;
5640      }
5641
5642      // The return value of the call is this value.  As such, there is no
5643      // corresponding argument.
5644      assert(!Call.getType()->isVoidTy() && "Bad inline asm!");
5645      if (StructType *STy = dyn_cast<StructType>(Call.getType())) {
5646        OpInfo.ConstraintVT =
5647            getSimpleValueType(DL, STy->getElementType(ResNo));
5648      } else {
5649        assert(ResNo == 0 && "Asm only has one result!");
5650        OpInfo.ConstraintVT =
5651            getAsmOperandValueType(DL, Call.getType()).getSimpleVT();
5652      }
5653      ++ResNo;
5654      break;
5655    case InlineAsm::isInput:
5656      OpInfo.CallOperandVal = Call.getArgOperand(ArgNo);
5657      break;
5658    case InlineAsm::isLabel:
5659      OpInfo.CallOperandVal = cast<CallBrInst>(&Call)->getIndirectDest(LabelNo);
5660      ++LabelNo;
5661      continue;
5662    case InlineAsm::isClobber:
5663      // Nothing to do.
5664      break;
5665    }
5666
5667    if (OpInfo.CallOperandVal) {
5668      llvm::Type *OpTy = OpInfo.CallOperandVal->getType();
5669      if (OpInfo.isIndirect) {
5670        OpTy = Call.getParamElementType(ArgNo);
5671        assert(OpTy && "Indirect operand must have elementtype attribute");
5672      }
5673
5674      // Look for vector wrapped in a struct. e.g. { <16 x i8> }.
5675      if (StructType *STy = dyn_cast<StructType>(OpTy))
5676        if (STy->getNumElements() == 1)
5677          OpTy = STy->getElementType(0);
5678
5679      // If OpTy is not a single value, it may be a struct/union that we
5680      // can tile with integers.
5681      if (!OpTy->isSingleValueType() && OpTy->isSized()) {
5682        unsigned BitSize = DL.getTypeSizeInBits(OpTy);
5683        switch (BitSize) {
5684        default: break;
5685        case 1:
5686        case 8:
5687        case 16:
5688        case 32:
5689        case 64:
5690        case 128:
5691          OpTy = IntegerType::get(OpTy->getContext(), BitSize);
5692          break;
5693        }
5694      }
5695
5696      EVT VT = getAsmOperandValueType(DL, OpTy, true);
5697      OpInfo.ConstraintVT = VT.isSimple() ? VT.getSimpleVT() : MVT::Other;
5698      ArgNo++;
5699    }
5700  }
5701
5702  // If we have multiple alternative constraints, select the best alternative.
5703  if (!ConstraintOperands.empty()) {
5704    if (maCount) {
5705      unsigned bestMAIndex = 0;
5706      int bestWeight = -1;
5707      // weight:  -1 = invalid match, and 0 = so-so match to 5 = good match.
5708      int weight = -1;
5709      unsigned maIndex;
5710      // Compute the sums of the weights for each alternative, keeping track
5711      // of the best (highest weight) one so far.
5712      for (maIndex = 0; maIndex < maCount; ++maIndex) {
5713        int weightSum = 0;
5714        for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
5715             cIndex != eIndex; ++cIndex) {
5716          AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
5717          if (OpInfo.Type == InlineAsm::isClobber)
5718            continue;
5719
5720          // If this is an output operand with a matching input operand,
5721          // look up the matching input. If their types mismatch, e.g. one
5722          // is an integer, the other is floating point, or their sizes are
5723          // different, flag it as an maCantMatch.
5724          if (OpInfo.hasMatchingInput()) {
5725            AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
5726            if (OpInfo.ConstraintVT != Input.ConstraintVT) {
5727              if ((OpInfo.ConstraintVT.isInteger() !=
5728                   Input.ConstraintVT.isInteger()) ||
5729                  (OpInfo.ConstraintVT.getSizeInBits() !=
5730                   Input.ConstraintVT.getSizeInBits())) {
5731                weightSum = -1; // Can't match.
5732                break;
5733              }
5734            }
5735          }
5736          weight = getMultipleConstraintMatchWeight(OpInfo, maIndex);
5737          if (weight == -1) {
5738            weightSum = -1;
5739            break;
5740          }
5741          weightSum += weight;
5742        }
5743        // Update best.
5744        if (weightSum > bestWeight) {
5745          bestWeight = weightSum;
5746          bestMAIndex = maIndex;
5747        }
5748      }
5749
5750      // Now select chosen alternative in each constraint.
5751      for (AsmOperandInfo &cInfo : ConstraintOperands)
5752        if (cInfo.Type != InlineAsm::isClobber)
5753          cInfo.selectAlternative(bestMAIndex);
5754    }
5755  }
5756
5757  // Check and hook up tied operands, choose constraint code to use.
5758  for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
5759       cIndex != eIndex; ++cIndex) {
5760    AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
5761
5762    // If this is an output operand with a matching input operand, look up the
5763    // matching input. If their types mismatch, e.g. one is an integer, the
5764    // other is floating point, or their sizes are different, flag it as an
5765    // error.
5766    if (OpInfo.hasMatchingInput()) {
5767      AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
5768
5769      if (OpInfo.ConstraintVT != Input.ConstraintVT) {
5770        std::pair<unsigned, const TargetRegisterClass *> MatchRC =
5771            getRegForInlineAsmConstraint(TRI, OpInfo.ConstraintCode,
5772                                         OpInfo.ConstraintVT);
5773        std::pair<unsigned, const TargetRegisterClass *> InputRC =
5774            getRegForInlineAsmConstraint(TRI, Input.ConstraintCode,
5775                                         Input.ConstraintVT);
5776        if ((OpInfo.ConstraintVT.isInteger() !=
5777             Input.ConstraintVT.isInteger()) ||
5778            (MatchRC.second != InputRC.second)) {
5779          report_fatal_error("Unsupported asm: input constraint"
5780                             " with a matching output constraint of"
5781                             " incompatible type!");
5782        }
5783      }
5784    }
5785  }
5786
5787  return ConstraintOperands;
5788}
5789
5790/// Return a number indicating our preference for chosing a type of constraint
5791/// over another, for the purpose of sorting them. Immediates are almost always
5792/// preferrable (when they can be emitted). A higher return value means a
5793/// stronger preference for one constraint type relative to another.
5794/// FIXME: We should prefer registers over memory but doing so may lead to
5795/// unrecoverable register exhaustion later.
5796/// https://github.com/llvm/llvm-project/issues/20571
5797static unsigned getConstraintPiority(TargetLowering::ConstraintType CT) {
5798  switch (CT) {
5799  case TargetLowering::C_Immediate:
5800  case TargetLowering::C_Other:
5801    return 4;
5802  case TargetLowering::C_Memory:
5803  case TargetLowering::C_Address:
5804    return 3;
5805  case TargetLowering::C_RegisterClass:
5806    return 2;
5807  case TargetLowering::C_Register:
5808    return 1;
5809  case TargetLowering::C_Unknown:
5810    return 0;
5811  }
5812  llvm_unreachable("Invalid constraint type");
5813}
5814
5815/// Examine constraint type and operand type and determine a weight value.
5816/// This object must already have been set up with the operand type
5817/// and the current alternative constraint selected.
5818TargetLowering::ConstraintWeight
5819  TargetLowering::getMultipleConstraintMatchWeight(
5820    AsmOperandInfo &info, int maIndex) const {
5821  InlineAsm::ConstraintCodeVector *rCodes;
5822  if (maIndex >= (int)info.multipleAlternatives.size())
5823    rCodes = &info.Codes;
5824  else
5825    rCodes = &info.multipleAlternatives[maIndex].Codes;
5826  ConstraintWeight BestWeight = CW_Invalid;
5827
5828  // Loop over the options, keeping track of the most general one.
5829  for (const std::string &rCode : *rCodes) {
5830    ConstraintWeight weight =
5831        getSingleConstraintMatchWeight(info, rCode.c_str());
5832    if (weight > BestWeight)
5833      BestWeight = weight;
5834  }
5835
5836  return BestWeight;
5837}
5838
5839/// Examine constraint type and operand type and determine a weight value.
5840/// This object must already have been set up with the operand type
5841/// and the current alternative constraint selected.
5842TargetLowering::ConstraintWeight
5843  TargetLowering::getSingleConstraintMatchWeight(
5844    AsmOperandInfo &info, const char *constraint) const {
5845  ConstraintWeight weight = CW_Invalid;
5846  Value *CallOperandVal = info.CallOperandVal;
5847    // If we don't have a value, we can't do a match,
5848    // but allow it at the lowest weight.
5849  if (!CallOperandVal)
5850    return CW_Default;
5851  // Look at the constraint type.
5852  switch (*constraint) {
5853    case 'i': // immediate integer.
5854    case 'n': // immediate integer with a known value.
5855      if (isa<ConstantInt>(CallOperandVal))
5856        weight = CW_Constant;
5857      break;
5858    case 's': // non-explicit intregal immediate.
5859      if (isa<GlobalValue>(CallOperandVal))
5860        weight = CW_Constant;
5861      break;
5862    case 'E': // immediate float if host format.
5863    case 'F': // immediate float.
5864      if (isa<ConstantFP>(CallOperandVal))
5865        weight = CW_Constant;
5866      break;
5867    case '<': // memory operand with autodecrement.
5868    case '>': // memory operand with autoincrement.
5869    case 'm': // memory operand.
5870    case 'o': // offsettable memory operand
5871    case 'V': // non-offsettable memory operand
5872      weight = CW_Memory;
5873      break;
5874    case 'r': // general register.
5875    case 'g': // general register, memory operand or immediate integer.
5876              // note: Clang converts "g" to "imr".
5877      if (CallOperandVal->getType()->isIntegerTy())
5878        weight = CW_Register;
5879      break;
5880    case 'X': // any operand.
5881  default:
5882    weight = CW_Default;
5883    break;
5884  }
5885  return weight;
5886}
5887
5888/// If there are multiple different constraints that we could pick for this
5889/// operand (e.g. "imr") try to pick the 'best' one.
5890/// This is somewhat tricky: constraints (TargetLowering::ConstraintType) fall
5891/// into seven classes:
5892///    Register      -> one specific register
5893///    RegisterClass -> a group of regs
5894///    Memory        -> memory
5895///    Address       -> a symbolic memory reference
5896///    Immediate     -> immediate values
5897///    Other         -> magic values (such as "Flag Output Operands")
5898///    Unknown       -> something we don't recognize yet and can't handle
5899/// Ideally, we would pick the most specific constraint possible: if we have
5900/// something that fits into a register, we would pick it.  The problem here
5901/// is that if we have something that could either be in a register or in
5902/// memory that use of the register could cause selection of *other*
5903/// operands to fail: they might only succeed if we pick memory.  Because of
5904/// this the heuristic we use is:
5905///
5906///  1) If there is an 'other' constraint, and if the operand is valid for
5907///     that constraint, use it.  This makes us take advantage of 'i'
5908///     constraints when available.
5909///  2) Otherwise, pick the most general constraint present.  This prefers
5910///     'm' over 'r', for example.
5911///
5912TargetLowering::ConstraintGroup TargetLowering::getConstraintPreferences(
5913    TargetLowering::AsmOperandInfo &OpInfo) const {
5914  ConstraintGroup Ret;
5915
5916  Ret.reserve(OpInfo.Codes.size());
5917  for (StringRef Code : OpInfo.Codes) {
5918    TargetLowering::ConstraintType CType = getConstraintType(Code);
5919
5920    // Indirect 'other' or 'immediate' constraints are not allowed.
5921    if (OpInfo.isIndirect && !(CType == TargetLowering::C_Memory ||
5922                               CType == TargetLowering::C_Register ||
5923                               CType == TargetLowering::C_RegisterClass))
5924      continue;
5925
5926    // Things with matching constraints can only be registers, per gcc
5927    // documentation.  This mainly affects "g" constraints.
5928    if (CType == TargetLowering::C_Memory && OpInfo.hasMatchingInput())
5929      continue;
5930
5931    Ret.emplace_back(Code, CType);
5932  }
5933
5934  std::stable_sort(
5935      Ret.begin(), Ret.end(), [](ConstraintPair a, ConstraintPair b) {
5936        return getConstraintPiority(a.second) > getConstraintPiority(b.second);
5937      });
5938
5939  return Ret;
5940}
5941
5942/// If we have an immediate, see if we can lower it. Return true if we can,
5943/// false otherwise.
5944static bool lowerImmediateIfPossible(TargetLowering::ConstraintPair &P,
5945                                     SDValue Op, SelectionDAG *DAG,
5946                                     const TargetLowering &TLI) {
5947
5948  assert((P.second == TargetLowering::C_Other ||
5949          P.second == TargetLowering::C_Immediate) &&
5950         "need immediate or other");
5951
5952  if (!Op.getNode())
5953    return false;
5954
5955  std::vector<SDValue> ResultOps;
5956  TLI.LowerAsmOperandForConstraint(Op, P.first, ResultOps, *DAG);
5957  return !ResultOps.empty();
5958}
5959
5960/// Determines the constraint code and constraint type to use for the specific
5961/// AsmOperandInfo, setting OpInfo.ConstraintCode and OpInfo.ConstraintType.
5962void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo,
5963                                            SDValue Op,
5964                                            SelectionDAG *DAG) const {
5965  assert(!OpInfo.Codes.empty() && "Must have at least one constraint");
5966
5967  // Single-letter constraints ('r') are very common.
5968  if (OpInfo.Codes.size() == 1) {
5969    OpInfo.ConstraintCode = OpInfo.Codes[0];
5970    OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
5971  } else {
5972    ConstraintGroup G = getConstraintPreferences(OpInfo);
5973    if (G.empty())
5974      return;
5975
5976    unsigned BestIdx = 0;
5977    for (const unsigned E = G.size();
5978         BestIdx < E && (G[BestIdx].second == TargetLowering::C_Other ||
5979                         G[BestIdx].second == TargetLowering::C_Immediate);
5980         ++BestIdx) {
5981      if (lowerImmediateIfPossible(G[BestIdx], Op, DAG, *this))
5982        break;
5983      // If we're out of constraints, just pick the first one.
5984      if (BestIdx + 1 == E) {
5985        BestIdx = 0;
5986        break;
5987      }
5988    }
5989
5990    OpInfo.ConstraintCode = G[BestIdx].first;
5991    OpInfo.ConstraintType = G[BestIdx].second;
5992  }
5993
5994  // 'X' matches anything.
5995  if (OpInfo.ConstraintCode == "X" && OpInfo.CallOperandVal) {
5996    // Constants are handled elsewhere.  For Functions, the type here is the
5997    // type of the result, which is not what we want to look at; leave them
5998    // alone.
5999    Value *v = OpInfo.CallOperandVal;
6000    if (isa<ConstantInt>(v) || isa<Function>(v)) {
6001      return;
6002    }
6003
6004    if (isa<BasicBlock>(v) || isa<BlockAddress>(v)) {
6005      OpInfo.ConstraintCode = "i";
6006      return;
6007    }
6008
6009    // Otherwise, try to resolve it to something we know about by looking at
6010    // the actual operand type.
6011    if (const char *Repl = LowerXConstraint(OpInfo.ConstraintVT)) {
6012      OpInfo.ConstraintCode = Repl;
6013      OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
6014    }
6015  }
6016}
6017
6018/// Given an exact SDIV by a constant, create a multiplication
6019/// with the multiplicative inverse of the constant.
6020static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N,
6021                              const SDLoc &dl, SelectionDAG &DAG,
6022                              SmallVectorImpl<SDNode *> &Created) {
6023  SDValue Op0 = N->getOperand(0);
6024  SDValue Op1 = N->getOperand(1);
6025  EVT VT = N->getValueType(0);
6026  EVT SVT = VT.getScalarType();
6027  EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
6028  EVT ShSVT = ShVT.getScalarType();
6029
6030  bool UseSRA = false;
6031  SmallVector<SDValue, 16> Shifts, Factors;
6032
6033  auto BuildSDIVPattern = [&](ConstantSDNode *C) {
6034    if (C->isZero())
6035      return false;
6036    APInt Divisor = C->getAPIntValue();
6037    unsigned Shift = Divisor.countr_zero();
6038    if (Shift) {
6039      Divisor.ashrInPlace(Shift);
6040      UseSRA = true;
6041    }
6042    // Calculate the multiplicative inverse, using Newton's method.
6043    APInt t;
6044    APInt Factor = Divisor;
6045    while ((t = Divisor * Factor) != 1)
6046      Factor *= APInt(Divisor.getBitWidth(), 2) - t;
6047    Shifts.push_back(DAG.getConstant(Shift, dl, ShSVT));
6048    Factors.push_back(DAG.getConstant(Factor, dl, SVT));
6049    return true;
6050  };
6051
6052  // Collect all magic values from the build vector.
6053  if (!ISD::matchUnaryPredicate(Op1, BuildSDIVPattern))
6054    return SDValue();
6055
6056  SDValue Shift, Factor;
6057  if (Op1.getOpcode() == ISD::BUILD_VECTOR) {
6058    Shift = DAG.getBuildVector(ShVT, dl, Shifts);
6059    Factor = DAG.getBuildVector(VT, dl, Factors);
6060  } else if (Op1.getOpcode() == ISD::SPLAT_VECTOR) {
6061    assert(Shifts.size() == 1 && Factors.size() == 1 &&
6062           "Expected matchUnaryPredicate to return one element for scalable "
6063           "vectors");
6064    Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
6065    Factor = DAG.getSplatVector(VT, dl, Factors[0]);
6066  } else {
6067    assert(isa<ConstantSDNode>(Op1) && "Expected a constant");
6068    Shift = Shifts[0];
6069    Factor = Factors[0];
6070  }
6071
6072  SDValue Res = Op0;
6073
6074  // Shift the value upfront if it is even, so the LSB is one.
6075  if (UseSRA) {
6076    // TODO: For UDIV use SRL instead of SRA.
6077    SDNodeFlags Flags;
6078    Flags.setExact(true);
6079    Res = DAG.getNode(ISD::SRA, dl, VT, Res, Shift, Flags);
6080    Created.push_back(Res.getNode());
6081  }
6082
6083  return DAG.getNode(ISD::MUL, dl, VT, Res, Factor);
6084}
6085
6086SDValue TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
6087                              SelectionDAG &DAG,
6088                              SmallVectorImpl<SDNode *> &Created) const {
6089  AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
6090  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6091  if (TLI.isIntDivCheap(N->getValueType(0), Attr))
6092    return SDValue(N, 0); // Lower SDIV as SDIV
6093  return SDValue();
6094}
6095
6096SDValue
6097TargetLowering::BuildSREMPow2(SDNode *N, const APInt &Divisor,
6098                              SelectionDAG &DAG,
6099                              SmallVectorImpl<SDNode *> &Created) const {
6100  AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
6101  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6102  if (TLI.isIntDivCheap(N->getValueType(0), Attr))
6103    return SDValue(N, 0); // Lower SREM as SREM
6104  return SDValue();
6105}
6106
6107/// Build sdiv by power-of-2 with conditional move instructions
6108/// Ref: "Hacker's Delight" by Henry Warren 10-1
6109/// If conditional move/branch is preferred, we lower sdiv x, +/-2**k into:
6110///   bgez x, label
6111///   add x, x, 2**k-1
6112/// label:
6113///   sra res, x, k
6114///   neg res, res (when the divisor is negative)
6115SDValue TargetLowering::buildSDIVPow2WithCMov(
6116    SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
6117    SmallVectorImpl<SDNode *> &Created) const {
6118  unsigned Lg2 = Divisor.countr_zero();
6119  EVT VT = N->getValueType(0);
6120
6121  SDLoc DL(N);
6122  SDValue N0 = N->getOperand(0);
6123  SDValue Zero = DAG.getConstant(0, DL, VT);
6124  APInt Lg2Mask = APInt::getLowBitsSet(VT.getSizeInBits(), Lg2);
6125  SDValue Pow2MinusOne = DAG.getConstant(Lg2Mask, DL, VT);
6126
6127  // If N0 is negative, we need to add (Pow2 - 1) to it before shifting right.
6128  EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
6129  SDValue Cmp = DAG.getSetCC(DL, CCVT, N0, Zero, ISD::SETLT);
6130  SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Pow2MinusOne);
6131  SDValue CMov = DAG.getNode(ISD::SELECT, DL, VT, Cmp, Add, N0);
6132
6133  Created.push_back(Cmp.getNode());
6134  Created.push_back(Add.getNode());
6135  Created.push_back(CMov.getNode());
6136
6137  // Divide by pow2.
6138  SDValue SRA =
6139      DAG.getNode(ISD::SRA, DL, VT, CMov, DAG.getConstant(Lg2, DL, VT));
6140
6141  // If we're dividing by a positive value, we're done.  Otherwise, we must
6142  // negate the result.
6143  if (Divisor.isNonNegative())
6144    return SRA;
6145
6146  Created.push_back(SRA.getNode());
6147  return DAG.getNode(ISD::SUB, DL, VT, Zero, SRA);
6148}
6149
6150/// Given an ISD::SDIV node expressing a divide by constant,
6151/// return a DAG expression to select that will generate the same value by
6152/// multiplying by a magic number.
6153/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
6154SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
6155                                  bool IsAfterLegalization,
6156                                  SmallVectorImpl<SDNode *> &Created) const {
6157  SDLoc dl(N);
6158  EVT VT = N->getValueType(0);
6159  EVT SVT = VT.getScalarType();
6160  EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
6161  EVT ShSVT = ShVT.getScalarType();
6162  unsigned EltBits = VT.getScalarSizeInBits();
6163  EVT MulVT;
6164
6165  // Check to see if we can do this.
6166  // FIXME: We should be more aggressive here.
6167  if (!isTypeLegal(VT)) {
6168    // Limit this to simple scalars for now.
6169    if (VT.isVector() || !VT.isSimple())
6170      return SDValue();
6171
6172    // If this type will be promoted to a large enough type with a legal
6173    // multiply operation, we can go ahead and do this transform.
6174    if (getTypeAction(VT.getSimpleVT()) != TypePromoteInteger)
6175      return SDValue();
6176
6177    MulVT = getTypeToTransformTo(*DAG.getContext(), VT);
6178    if (MulVT.getSizeInBits() < (2 * EltBits) ||
6179        !isOperationLegal(ISD::MUL, MulVT))
6180      return SDValue();
6181  }
6182
6183  // If the sdiv has an 'exact' bit we can use a simpler lowering.
6184  if (N->getFlags().hasExact())
6185    return BuildExactSDIV(*this, N, dl, DAG, Created);
6186
6187  SmallVector<SDValue, 16> MagicFactors, Factors, Shifts, ShiftMasks;
6188
6189  auto BuildSDIVPattern = [&](ConstantSDNode *C) {
6190    if (C->isZero())
6191      return false;
6192
6193    const APInt &Divisor = C->getAPIntValue();
6194    SignedDivisionByConstantInfo magics = SignedDivisionByConstantInfo::get(Divisor);
6195    int NumeratorFactor = 0;
6196    int ShiftMask = -1;
6197
6198    if (Divisor.isOne() || Divisor.isAllOnes()) {
6199      // If d is +1/-1, we just multiply the numerator by +1/-1.
6200      NumeratorFactor = Divisor.getSExtValue();
6201      magics.Magic = 0;
6202      magics.ShiftAmount = 0;
6203      ShiftMask = 0;
6204    } else if (Divisor.isStrictlyPositive() && magics.Magic.isNegative()) {
6205      // If d > 0 and m < 0, add the numerator.
6206      NumeratorFactor = 1;
6207    } else if (Divisor.isNegative() && magics.Magic.isStrictlyPositive()) {
6208      // If d < 0 and m > 0, subtract the numerator.
6209      NumeratorFactor = -1;
6210    }
6211
6212    MagicFactors.push_back(DAG.getConstant(magics.Magic, dl, SVT));
6213    Factors.push_back(DAG.getConstant(NumeratorFactor, dl, SVT));
6214    Shifts.push_back(DAG.getConstant(magics.ShiftAmount, dl, ShSVT));
6215    ShiftMasks.push_back(DAG.getConstant(ShiftMask, dl, SVT));
6216    return true;
6217  };
6218
6219  SDValue N0 = N->getOperand(0);
6220  SDValue N1 = N->getOperand(1);
6221
6222  // Collect the shifts / magic values from each element.
6223  if (!ISD::matchUnaryPredicate(N1, BuildSDIVPattern))
6224    return SDValue();
6225
6226  SDValue MagicFactor, Factor, Shift, ShiftMask;
6227  if (N1.getOpcode() == ISD::BUILD_VECTOR) {
6228    MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
6229    Factor = DAG.getBuildVector(VT, dl, Factors);
6230    Shift = DAG.getBuildVector(ShVT, dl, Shifts);
6231    ShiftMask = DAG.getBuildVector(VT, dl, ShiftMasks);
6232  } else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
6233    assert(MagicFactors.size() == 1 && Factors.size() == 1 &&
6234           Shifts.size() == 1 && ShiftMasks.size() == 1 &&
6235           "Expected matchUnaryPredicate to return one element for scalable "
6236           "vectors");
6237    MagicFactor = DAG.getSplatVector(VT, dl, MagicFactors[0]);
6238    Factor = DAG.getSplatVector(VT, dl, Factors[0]);
6239    Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
6240    ShiftMask = DAG.getSplatVector(VT, dl, ShiftMasks[0]);
6241  } else {
6242    assert(isa<ConstantSDNode>(N1) && "Expected a constant");
6243    MagicFactor = MagicFactors[0];
6244    Factor = Factors[0];
6245    Shift = Shifts[0];
6246    ShiftMask = ShiftMasks[0];
6247  }
6248
6249  // Multiply the numerator (operand 0) by the magic value.
6250  // FIXME: We should support doing a MUL in a wider type.
6251  auto GetMULHS = [&](SDValue X, SDValue Y) {
6252    // If the type isn't legal, use a wider mul of the type calculated
6253    // earlier.
6254    if (!isTypeLegal(VT)) {
6255      X = DAG.getNode(ISD::SIGN_EXTEND, dl, MulVT, X);
6256      Y = DAG.getNode(ISD::SIGN_EXTEND, dl, MulVT, Y);
6257      Y = DAG.getNode(ISD::MUL, dl, MulVT, X, Y);
6258      Y = DAG.getNode(ISD::SRL, dl, MulVT, Y,
6259                      DAG.getShiftAmountConstant(EltBits, MulVT, dl));
6260      return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
6261    }
6262
6263    if (isOperationLegalOrCustom(ISD::MULHS, VT, IsAfterLegalization))
6264      return DAG.getNode(ISD::MULHS, dl, VT, X, Y);
6265    if (isOperationLegalOrCustom(ISD::SMUL_LOHI, VT, IsAfterLegalization)) {
6266      SDValue LoHi =
6267          DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y);
6268      return SDValue(LoHi.getNode(), 1);
6269    }
6270    // If type twice as wide legal, widen and use a mul plus a shift.
6271    unsigned Size = VT.getScalarSizeInBits();
6272    EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), Size * 2);
6273    if (VT.isVector())
6274      WideVT = EVT::getVectorVT(*DAG.getContext(), WideVT,
6275                                VT.getVectorElementCount());
6276    if (isOperationLegalOrCustom(ISD::MUL, WideVT)) {
6277      X = DAG.getNode(ISD::SIGN_EXTEND, dl, WideVT, X);
6278      Y = DAG.getNode(ISD::SIGN_EXTEND, dl, WideVT, Y);
6279      Y = DAG.getNode(ISD::MUL, dl, WideVT, X, Y);
6280      Y = DAG.getNode(ISD::SRL, dl, WideVT, Y,
6281                      DAG.getShiftAmountConstant(EltBits, WideVT, dl));
6282      return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
6283    }
6284    return SDValue();
6285  };
6286
6287  SDValue Q = GetMULHS(N0, MagicFactor);
6288  if (!Q)
6289    return SDValue();
6290
6291  Created.push_back(Q.getNode());
6292
6293  // (Optionally) Add/subtract the numerator using Factor.
6294  Factor = DAG.getNode(ISD::MUL, dl, VT, N0, Factor);
6295  Created.push_back(Factor.getNode());
6296  Q = DAG.getNode(ISD::ADD, dl, VT, Q, Factor);
6297  Created.push_back(Q.getNode());
6298
6299  // Shift right algebraic by shift value.
6300  Q = DAG.getNode(ISD::SRA, dl, VT, Q, Shift);
6301  Created.push_back(Q.getNode());
6302
6303  // Extract the sign bit, mask it and add it to the quotient.
6304  SDValue SignShift = DAG.getConstant(EltBits - 1, dl, ShVT);
6305  SDValue T = DAG.getNode(ISD::SRL, dl, VT, Q, SignShift);
6306  Created.push_back(T.getNode());
6307  T = DAG.getNode(ISD::AND, dl, VT, T, ShiftMask);
6308  Created.push_back(T.getNode());
6309  return DAG.getNode(ISD::ADD, dl, VT, Q, T);
6310}
6311
6312/// Given an ISD::UDIV node expressing a divide by constant,
6313/// return a DAG expression to select that will generate the same value by
6314/// multiplying by a magic number.
6315/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
6316SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
6317                                  bool IsAfterLegalization,
6318                                  SmallVectorImpl<SDNode *> &Created) const {
6319  SDLoc dl(N);
6320  EVT VT = N->getValueType(0);
6321  EVT SVT = VT.getScalarType();
6322  EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
6323  EVT ShSVT = ShVT.getScalarType();
6324  unsigned EltBits = VT.getScalarSizeInBits();
6325  EVT MulVT;
6326
6327  // Check to see if we can do this.
6328  // FIXME: We should be more aggressive here.
6329  if (!isTypeLegal(VT)) {
6330    // Limit this to simple scalars for now.
6331    if (VT.isVector() || !VT.isSimple())
6332      return SDValue();
6333
6334    // If this type will be promoted to a large enough type with a legal
6335    // multiply operation, we can go ahead and do this transform.
6336    if (getTypeAction(VT.getSimpleVT()) != TypePromoteInteger)
6337      return SDValue();
6338
6339    MulVT = getTypeToTransformTo(*DAG.getContext(), VT);
6340    if (MulVT.getSizeInBits() < (2 * EltBits) ||
6341        !isOperationLegal(ISD::MUL, MulVT))
6342      return SDValue();
6343  }
6344
6345  SDValue N0 = N->getOperand(0);
6346  SDValue N1 = N->getOperand(1);
6347
6348  // Try to use leading zeros of the dividend to reduce the multiplier and
6349  // avoid expensive fixups.
6350  // TODO: Support vectors.
6351  unsigned LeadingZeros = 0;
6352  if (!VT.isVector() && isa<ConstantSDNode>(N1)) {
6353    assert(!isOneConstant(N1) && "Unexpected divisor");
6354    LeadingZeros = DAG.computeKnownBits(N0).countMinLeadingZeros();
6355    // UnsignedDivisionByConstantInfo doesn't work correctly if leading zeros in
6356    // the dividend exceeds the leading zeros for the divisor.
6357    LeadingZeros = std::min(LeadingZeros, N1->getAsAPIntVal().countl_zero());
6358  }
6359
6360  bool UseNPQ = false, UsePreShift = false, UsePostShift = false;
6361  SmallVector<SDValue, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
6362
6363  auto BuildUDIVPattern = [&](ConstantSDNode *C) {
6364    if (C->isZero())
6365      return false;
6366    const APInt& Divisor = C->getAPIntValue();
6367
6368    SDValue PreShift, MagicFactor, NPQFactor, PostShift;
6369
6370    // Magic algorithm doesn't work for division by 1. We need to emit a select
6371    // at the end.
6372    if (Divisor.isOne()) {
6373      PreShift = PostShift = DAG.getUNDEF(ShSVT);
6374      MagicFactor = NPQFactor = DAG.getUNDEF(SVT);
6375    } else {
6376      UnsignedDivisionByConstantInfo magics =
6377          UnsignedDivisionByConstantInfo::get(Divisor, LeadingZeros);
6378
6379      MagicFactor = DAG.getConstant(magics.Magic, dl, SVT);
6380
6381      assert(magics.PreShift < Divisor.getBitWidth() &&
6382             "We shouldn't generate an undefined shift!");
6383      assert(magics.PostShift < Divisor.getBitWidth() &&
6384             "We shouldn't generate an undefined shift!");
6385      assert((!magics.IsAdd || magics.PreShift == 0) &&
6386             "Unexpected pre-shift");
6387      PreShift = DAG.getConstant(magics.PreShift, dl, ShSVT);
6388      PostShift = DAG.getConstant(magics.PostShift, dl, ShSVT);
6389      NPQFactor = DAG.getConstant(
6390          magics.IsAdd ? APInt::getOneBitSet(EltBits, EltBits - 1)
6391                       : APInt::getZero(EltBits),
6392          dl, SVT);
6393      UseNPQ |= magics.IsAdd;
6394      UsePreShift |= magics.PreShift != 0;
6395      UsePostShift |= magics.PostShift != 0;
6396    }
6397
6398    PreShifts.push_back(PreShift);
6399    MagicFactors.push_back(MagicFactor);
6400    NPQFactors.push_back(NPQFactor);
6401    PostShifts.push_back(PostShift);
6402    return true;
6403  };
6404
6405  // Collect the shifts/magic values from each element.
6406  if (!ISD::matchUnaryPredicate(N1, BuildUDIVPattern))
6407    return SDValue();
6408
6409  SDValue PreShift, PostShift, MagicFactor, NPQFactor;
6410  if (N1.getOpcode() == ISD::BUILD_VECTOR) {
6411    PreShift = DAG.getBuildVector(ShVT, dl, PreShifts);
6412    MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
6413    NPQFactor = DAG.getBuildVector(VT, dl, NPQFactors);
6414    PostShift = DAG.getBuildVector(ShVT, dl, PostShifts);
6415  } else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
6416    assert(PreShifts.size() == 1 && MagicFactors.size() == 1 &&
6417           NPQFactors.size() == 1 && PostShifts.size() == 1 &&
6418           "Expected matchUnaryPredicate to return one for scalable vectors");
6419    PreShift = DAG.getSplatVector(ShVT, dl, PreShifts[0]);
6420    MagicFactor = DAG.getSplatVector(VT, dl, MagicFactors[0]);
6421    NPQFactor = DAG.getSplatVector(VT, dl, NPQFactors[0]);
6422    PostShift = DAG.getSplatVector(ShVT, dl, PostShifts[0]);
6423  } else {
6424    assert(isa<ConstantSDNode>(N1) && "Expected a constant");
6425    PreShift = PreShifts[0];
6426    MagicFactor = MagicFactors[0];
6427    PostShift = PostShifts[0];
6428  }
6429
6430  SDValue Q = N0;
6431  if (UsePreShift) {
6432    Q = DAG.getNode(ISD::SRL, dl, VT, Q, PreShift);
6433    Created.push_back(Q.getNode());
6434  }
6435
6436  // FIXME: We should support doing a MUL in a wider type.
6437  auto GetMULHU = [&](SDValue X, SDValue Y) {
6438    // If the type isn't legal, use a wider mul of the type calculated
6439    // earlier.
6440    if (!isTypeLegal(VT)) {
6441      X = DAG.getNode(ISD::ZERO_EXTEND, dl, MulVT, X);
6442      Y = DAG.getNode(ISD::ZERO_EXTEND, dl, MulVT, Y);
6443      Y = DAG.getNode(ISD::MUL, dl, MulVT, X, Y);
6444      Y = DAG.getNode(ISD::SRL, dl, MulVT, Y,
6445                      DAG.getShiftAmountConstant(EltBits, MulVT, dl));
6446      return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
6447    }
6448
6449    if (isOperationLegalOrCustom(ISD::MULHU, VT, IsAfterLegalization))
6450      return DAG.getNode(ISD::MULHU, dl, VT, X, Y);
6451    if (isOperationLegalOrCustom(ISD::UMUL_LOHI, VT, IsAfterLegalization)) {
6452      SDValue LoHi =
6453          DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y);
6454      return SDValue(LoHi.getNode(), 1);
6455    }
6456    // If type twice as wide legal, widen and use a mul plus a shift.
6457    unsigned Size = VT.getScalarSizeInBits();
6458    EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), Size * 2);
6459    if (VT.isVector())
6460      WideVT = EVT::getVectorVT(*DAG.getContext(), WideVT,
6461                                VT.getVectorElementCount());
6462    if (isOperationLegalOrCustom(ISD::MUL, WideVT)) {
6463      X = DAG.getNode(ISD::ZERO_EXTEND, dl, WideVT, X);
6464      Y = DAG.getNode(ISD::ZERO_EXTEND, dl, WideVT, Y);
6465      Y = DAG.getNode(ISD::MUL, dl, WideVT, X, Y);
6466      Y = DAG.getNode(ISD::SRL, dl, WideVT, Y,
6467                      DAG.getShiftAmountConstant(EltBits, WideVT, dl));
6468      return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
6469    }
6470    return SDValue(); // No mulhu or equivalent
6471  };
6472
6473  // Multiply the numerator (operand 0) by the magic value.
6474  Q = GetMULHU(Q, MagicFactor);
6475  if (!Q)
6476    return SDValue();
6477
6478  Created.push_back(Q.getNode());
6479
6480  if (UseNPQ) {
6481    SDValue NPQ = DAG.getNode(ISD::SUB, dl, VT, N0, Q);
6482    Created.push_back(NPQ.getNode());
6483
6484    // For vectors we might have a mix of non-NPQ/NPQ paths, so use
6485    // MULHU to act as a SRL-by-1 for NPQ, else multiply by zero.
6486    if (VT.isVector())
6487      NPQ = GetMULHU(NPQ, NPQFactor);
6488    else
6489      NPQ = DAG.getNode(ISD::SRL, dl, VT, NPQ, DAG.getConstant(1, dl, ShVT));
6490
6491    Created.push_back(NPQ.getNode());
6492
6493    Q = DAG.getNode(ISD::ADD, dl, VT, NPQ, Q);
6494    Created.push_back(Q.getNode());
6495  }
6496
6497  if (UsePostShift) {
6498    Q = DAG.getNode(ISD::SRL, dl, VT, Q, PostShift);
6499    Created.push_back(Q.getNode());
6500  }
6501
6502  EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
6503
6504  SDValue One = DAG.getConstant(1, dl, VT);
6505  SDValue IsOne = DAG.getSetCC(dl, SetCCVT, N1, One, ISD::SETEQ);
6506  return DAG.getSelect(dl, VT, IsOne, N0, Q);
6507}
6508
6509/// If all values in Values that *don't* match the predicate are same 'splat'
6510/// value, then replace all values with that splat value.
6511/// Else, if AlternativeReplacement was provided, then replace all values that
6512/// do match predicate with AlternativeReplacement value.
6513static void
6514turnVectorIntoSplatVector(MutableArrayRef<SDValue> Values,
6515                          std::function<bool(SDValue)> Predicate,
6516                          SDValue AlternativeReplacement = SDValue()) {
6517  SDValue Replacement;
6518  // Is there a value for which the Predicate does *NOT* match? What is it?
6519  auto SplatValue = llvm::find_if_not(Values, Predicate);
6520  if (SplatValue != Values.end()) {
6521    // Does Values consist only of SplatValue's and values matching Predicate?
6522    if (llvm::all_of(Values, [Predicate, SplatValue](SDValue Value) {
6523          return Value == *SplatValue || Predicate(Value);
6524        })) // Then we shall replace values matching predicate with SplatValue.
6525      Replacement = *SplatValue;
6526  }
6527  if (!Replacement) {
6528    // Oops, we did not find the "baseline" splat value.
6529    if (!AlternativeReplacement)
6530      return; // Nothing to do.
6531    // Let's replace with provided value then.
6532    Replacement = AlternativeReplacement;
6533  }
6534  std::replace_if(Values.begin(), Values.end(), Predicate, Replacement);
6535}
6536
6537/// Given an ISD::UREM used only by an ISD::SETEQ or ISD::SETNE
6538/// where the divisor is constant and the comparison target is zero,
6539/// return a DAG expression that will generate the same comparison result
6540/// using only multiplications, additions and shifts/rotations.
6541/// Ref: "Hacker's Delight" 10-17.
6542SDValue TargetLowering::buildUREMEqFold(EVT SETCCVT, SDValue REMNode,
6543                                        SDValue CompTargetNode,
6544                                        ISD::CondCode Cond,
6545                                        DAGCombinerInfo &DCI,
6546                                        const SDLoc &DL) const {
6547  SmallVector<SDNode *, 5> Built;
6548  if (SDValue Folded = prepareUREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
6549                                         DCI, DL, Built)) {
6550    for (SDNode *N : Built)
6551      DCI.AddToWorklist(N);
6552    return Folded;
6553  }
6554
6555  return SDValue();
6556}
6557
6558SDValue
6559TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
6560                                  SDValue CompTargetNode, ISD::CondCode Cond,
6561                                  DAGCombinerInfo &DCI, const SDLoc &DL,
6562                                  SmallVectorImpl<SDNode *> &Created) const {
6563  // fold (seteq/ne (urem N, D), 0) -> (setule/ugt (rotr (mul N, P), K), Q)
6564  // - D must be constant, with D = D0 * 2^K where D0 is odd
6565  // - P is the multiplicative inverse of D0 modulo 2^W
6566  // - Q = floor(((2^W) - 1) / D)
6567  // where W is the width of the common type of N and D.
6568  assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
6569         "Only applicable for (in)equality comparisons.");
6570
6571  SelectionDAG &DAG = DCI.DAG;
6572
6573  EVT VT = REMNode.getValueType();
6574  EVT SVT = VT.getScalarType();
6575  EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout(), !DCI.isBeforeLegalize());
6576  EVT ShSVT = ShVT.getScalarType();
6577
6578  // If MUL is unavailable, we cannot proceed in any case.
6579  if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::MUL, VT))
6580    return SDValue();
6581
6582  bool ComparingWithAllZeros = true;
6583  bool AllComparisonsWithNonZerosAreTautological = true;
6584  bool HadTautologicalLanes = false;
6585  bool AllLanesAreTautological = true;
6586  bool HadEvenDivisor = false;
6587  bool AllDivisorsArePowerOfTwo = true;
6588  bool HadTautologicalInvertedLanes = false;
6589  SmallVector<SDValue, 16> PAmts, KAmts, QAmts, IAmts;
6590
6591  auto BuildUREMPattern = [&](ConstantSDNode *CDiv, ConstantSDNode *CCmp) {
6592    // Division by 0 is UB. Leave it to be constant-folded elsewhere.
6593    if (CDiv->isZero())
6594      return false;
6595
6596    const APInt &D = CDiv->getAPIntValue();
6597    const APInt &Cmp = CCmp->getAPIntValue();
6598
6599    ComparingWithAllZeros &= Cmp.isZero();
6600
6601    // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
6602    // if C2 is not less than C1, the comparison is always false.
6603    // But we will only be able to produce the comparison that will give the
6604    // opposive tautological answer. So this lane would need to be fixed up.
6605    bool TautologicalInvertedLane = D.ule(Cmp);
6606    HadTautologicalInvertedLanes |= TautologicalInvertedLane;
6607
6608    // If all lanes are tautological (either all divisors are ones, or divisor
6609    // is not greater than the constant we are comparing with),
6610    // we will prefer to avoid the fold.
6611    bool TautologicalLane = D.isOne() || TautologicalInvertedLane;
6612    HadTautologicalLanes |= TautologicalLane;
6613    AllLanesAreTautological &= TautologicalLane;
6614
6615    // If we are comparing with non-zero, we need'll need  to subtract said
6616    // comparison value from the LHS. But there is no point in doing that if
6617    // every lane where we are comparing with non-zero is tautological..
6618    if (!Cmp.isZero())
6619      AllComparisonsWithNonZerosAreTautological &= TautologicalLane;
6620
6621    // Decompose D into D0 * 2^K
6622    unsigned K = D.countr_zero();
6623    assert((!D.isOne() || (K == 0)) && "For divisor '1' we won't rotate.");
6624    APInt D0 = D.lshr(K);
6625
6626    // D is even if it has trailing zeros.
6627    HadEvenDivisor |= (K != 0);
6628    // D is a power-of-two if D0 is one.
6629    // If all divisors are power-of-two, we will prefer to avoid the fold.
6630    AllDivisorsArePowerOfTwo &= D0.isOne();
6631
6632    // P = inv(D0, 2^W)
6633    // 2^W requires W + 1 bits, so we have to extend and then truncate.
6634    unsigned W = D.getBitWidth();
6635    APInt P = D0.zext(W + 1)
6636                  .multiplicativeInverse(APInt::getSignedMinValue(W + 1))
6637                  .trunc(W);
6638    assert(!P.isZero() && "No multiplicative inverse!"); // unreachable
6639    assert((D0 * P).isOne() && "Multiplicative inverse basic check failed.");
6640
6641    // Q = floor((2^W - 1) u/ D)
6642    // R = ((2^W - 1) u% D)
6643    APInt Q, R;
6644    APInt::udivrem(APInt::getAllOnes(W), D, Q, R);
6645
6646    // If we are comparing with zero, then that comparison constant is okay,
6647    // else it may need to be one less than that.
6648    if (Cmp.ugt(R))
6649      Q -= 1;
6650
6651    assert(APInt::getAllOnes(ShSVT.getSizeInBits()).ugt(K) &&
6652           "We are expecting that K is always less than all-ones for ShSVT");
6653
6654    // If the lane is tautological the result can be constant-folded.
6655    if (TautologicalLane) {
6656      // Set P and K amount to a bogus values so we can try to splat them.
6657      P = 0;
6658      K = -1;
6659      // And ensure that comparison constant is tautological,
6660      // it will always compare true/false.
6661      Q = -1;
6662    }
6663
6664    PAmts.push_back(DAG.getConstant(P, DL, SVT));
6665    KAmts.push_back(
6666        DAG.getConstant(APInt(ShSVT.getSizeInBits(), K), DL, ShSVT));
6667    QAmts.push_back(DAG.getConstant(Q, DL, SVT));
6668    return true;
6669  };
6670
6671  SDValue N = REMNode.getOperand(0);
6672  SDValue D = REMNode.getOperand(1);
6673
6674  // Collect the values from each element.
6675  if (!ISD::matchBinaryPredicate(D, CompTargetNode, BuildUREMPattern))
6676    return SDValue();
6677
6678  // If all lanes are tautological, the result can be constant-folded.
6679  if (AllLanesAreTautological)
6680    return SDValue();
6681
6682  // If this is a urem by a powers-of-two, avoid the fold since it can be
6683  // best implemented as a bit test.
6684  if (AllDivisorsArePowerOfTwo)
6685    return SDValue();
6686
6687  SDValue PVal, KVal, QVal;
6688  if (D.getOpcode() == ISD::BUILD_VECTOR) {
6689    if (HadTautologicalLanes) {
6690      // Try to turn PAmts into a splat, since we don't care about the values
6691      // that are currently '0'. If we can't, just keep '0'`s.
6692      turnVectorIntoSplatVector(PAmts, isNullConstant);
6693      // Try to turn KAmts into a splat, since we don't care about the values
6694      // that are currently '-1'. If we can't, change them to '0'`s.
6695      turnVectorIntoSplatVector(KAmts, isAllOnesConstant,
6696                                DAG.getConstant(0, DL, ShSVT));
6697    }
6698
6699    PVal = DAG.getBuildVector(VT, DL, PAmts);
6700    KVal = DAG.getBuildVector(ShVT, DL, KAmts);
6701    QVal = DAG.getBuildVector(VT, DL, QAmts);
6702  } else if (D.getOpcode() == ISD::SPLAT_VECTOR) {
6703    assert(PAmts.size() == 1 && KAmts.size() == 1 && QAmts.size() == 1 &&
6704           "Expected matchBinaryPredicate to return one element for "
6705           "SPLAT_VECTORs");
6706    PVal = DAG.getSplatVector(VT, DL, PAmts[0]);
6707    KVal = DAG.getSplatVector(ShVT, DL, KAmts[0]);
6708    QVal = DAG.getSplatVector(VT, DL, QAmts[0]);
6709  } else {
6710    PVal = PAmts[0];
6711    KVal = KAmts[0];
6712    QVal = QAmts[0];
6713  }
6714
6715  if (!ComparingWithAllZeros && !AllComparisonsWithNonZerosAreTautological) {
6716    if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::SUB, VT))
6717      return SDValue(); // FIXME: Could/should use `ISD::ADD`?
6718    assert(CompTargetNode.getValueType() == N.getValueType() &&
6719           "Expecting that the types on LHS and RHS of comparisons match.");
6720    N = DAG.getNode(ISD::SUB, DL, VT, N, CompTargetNode);
6721  }
6722
6723  // (mul N, P)
6724  SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal);
6725  Created.push_back(Op0.getNode());
6726
6727  // Rotate right only if any divisor was even. We avoid rotates for all-odd
6728  // divisors as a performance improvement, since rotating by 0 is a no-op.
6729  if (HadEvenDivisor) {
6730    // We need ROTR to do this.
6731    if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ROTR, VT))
6732      return SDValue();
6733    // UREM: (rotr (mul N, P), K)
6734    Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal);
6735    Created.push_back(Op0.getNode());
6736  }
6737
6738  // UREM: (setule/setugt (rotr (mul N, P), K), Q)
6739  SDValue NewCC =
6740      DAG.getSetCC(DL, SETCCVT, Op0, QVal,
6741                   ((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT));
6742  if (!HadTautologicalInvertedLanes)
6743    return NewCC;
6744
6745  // If any lanes previously compared always-false, the NewCC will give
6746  // always-true result for them, so we need to fixup those lanes.
6747  // Or the other way around for inequality predicate.
6748  assert(VT.isVector() && "Can/should only get here for vectors.");
6749  Created.push_back(NewCC.getNode());
6750
6751  // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
6752  // if C2 is not less than C1, the comparison is always false.
6753  // But we have produced the comparison that will give the
6754  // opposive tautological answer. So these lanes would need to be fixed up.
6755  SDValue TautologicalInvertedChannels =
6756      DAG.getSetCC(DL, SETCCVT, D, CompTargetNode, ISD::SETULE);
6757  Created.push_back(TautologicalInvertedChannels.getNode());
6758
6759  // NOTE: we avoid letting illegal types through even if we're before legalize
6760  // ops ��� legalization has a hard time producing good code for this.
6761  if (isOperationLegalOrCustom(ISD::VSELECT, SETCCVT)) {
6762    // If we have a vector select, let's replace the comparison results in the
6763    // affected lanes with the correct tautological result.
6764    SDValue Replacement = DAG.getBoolConstant(Cond == ISD::SETEQ ? false : true,
6765                                              DL, SETCCVT, SETCCVT);
6766    return DAG.getNode(ISD::VSELECT, DL, SETCCVT, TautologicalInvertedChannels,
6767                       Replacement, NewCC);
6768  }
6769
6770  // Else, we can just invert the comparison result in the appropriate lanes.
6771  //
6772  // NOTE: see the note above VSELECT above.
6773  if (isOperationLegalOrCustom(ISD::XOR, SETCCVT))
6774    return DAG.getNode(ISD::XOR, DL, SETCCVT, NewCC,
6775                       TautologicalInvertedChannels);
6776
6777  return SDValue(); // Don't know how to lower.
6778}
6779
6780/// Given an ISD::SREM used only by an ISD::SETEQ or ISD::SETNE
6781/// where the divisor is constant and the comparison target is zero,
6782/// return a DAG expression that will generate the same comparison result
6783/// using only multiplications, additions and shifts/rotations.
6784/// Ref: "Hacker's Delight" 10-17.
6785SDValue TargetLowering::buildSREMEqFold(EVT SETCCVT, SDValue REMNode,
6786                                        SDValue CompTargetNode,
6787                                        ISD::CondCode Cond,
6788                                        DAGCombinerInfo &DCI,
6789                                        const SDLoc &DL) const {
6790  SmallVector<SDNode *, 7> Built;
6791  if (SDValue Folded = prepareSREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
6792                                         DCI, DL, Built)) {
6793    assert(Built.size() <= 7 && "Max size prediction failed.");
6794    for (SDNode *N : Built)
6795      DCI.AddToWorklist(N);
6796    return Folded;
6797  }
6798
6799  return SDValue();
6800}
6801
6802SDValue
6803TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
6804                                  SDValue CompTargetNode, ISD::CondCode Cond,
6805                                  DAGCombinerInfo &DCI, const SDLoc &DL,
6806                                  SmallVectorImpl<SDNode *> &Created) const {
6807  // Fold:
6808  //   (seteq/ne (srem N, D), 0)
6809  // To:
6810  //   (setule/ugt (rotr (add (mul N, P), A), K), Q)
6811  //
6812  // - D must be constant, with D = D0 * 2^K where D0 is odd
6813  // - P is the multiplicative inverse of D0 modulo 2^W
6814  // - A = bitwiseand(floor((2^(W - 1) - 1) / D0), (-(2^k)))
6815  // - Q = floor((2 * A) / (2^K))
6816  // where W is the width of the common type of N and D.
6817  assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
6818         "Only applicable for (in)equality comparisons.");
6819
6820  SelectionDAG &DAG = DCI.DAG;
6821
6822  EVT VT = REMNode.getValueType();
6823  EVT SVT = VT.getScalarType();
6824  EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout(), !DCI.isBeforeLegalize());
6825  EVT ShSVT = ShVT.getScalarType();
6826
6827  // If we are after ops legalization, and MUL is unavailable, we can not
6828  // proceed.
6829  if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::MUL, VT))
6830    return SDValue();
6831
6832  // TODO: Could support comparing with non-zero too.
6833  ConstantSDNode *CompTarget = isConstOrConstSplat(CompTargetNode);
6834  if (!CompTarget || !CompTarget->isZero())
6835    return SDValue();
6836
6837  bool HadIntMinDivisor = false;
6838  bool HadOneDivisor = false;
6839  bool AllDivisorsAreOnes = true;
6840  bool HadEvenDivisor = false;
6841  bool NeedToApplyOffset = false;
6842  bool AllDivisorsArePowerOfTwo = true;
6843  SmallVector<SDValue, 16> PAmts, AAmts, KAmts, QAmts;
6844
6845  auto BuildSREMPattern = [&](ConstantSDNode *C) {
6846    // Division by 0 is UB. Leave it to be constant-folded elsewhere.
6847    if (C->isZero())
6848      return false;
6849
6850    // FIXME: we don't fold `rem %X, -C` to `rem %X, C` in DAGCombine.
6851
6852    // WARNING: this fold is only valid for positive divisors!
6853    APInt D = C->getAPIntValue();
6854    if (D.isNegative())
6855      D.negate(); //  `rem %X, -C` is equivalent to `rem %X, C`
6856
6857    HadIntMinDivisor |= D.isMinSignedValue();
6858
6859    // If all divisors are ones, we will prefer to avoid the fold.
6860    HadOneDivisor |= D.isOne();
6861    AllDivisorsAreOnes &= D.isOne();
6862
6863    // Decompose D into D0 * 2^K
6864    unsigned K = D.countr_zero();
6865    assert((!D.isOne() || (K == 0)) && "For divisor '1' we won't rotate.");
6866    APInt D0 = D.lshr(K);
6867
6868    if (!D.isMinSignedValue()) {
6869      // D is even if it has trailing zeros; unless it's INT_MIN, in which case
6870      // we don't care about this lane in this fold, we'll special-handle it.
6871      HadEvenDivisor |= (K != 0);
6872    }
6873
6874    // D is a power-of-two if D0 is one. This includes INT_MIN.
6875    // If all divisors are power-of-two, we will prefer to avoid the fold.
6876    AllDivisorsArePowerOfTwo &= D0.isOne();
6877
6878    // P = inv(D0, 2^W)
6879    // 2^W requires W + 1 bits, so we have to extend and then truncate.
6880    unsigned W = D.getBitWidth();
6881    APInt P = D0.zext(W + 1)
6882                  .multiplicativeInverse(APInt::getSignedMinValue(W + 1))
6883                  .trunc(W);
6884    assert(!P.isZero() && "No multiplicative inverse!"); // unreachable
6885    assert((D0 * P).isOne() && "Multiplicative inverse basic check failed.");
6886
6887    // A = floor((2^(W - 1) - 1) / D0) & -2^K
6888    APInt A = APInt::getSignedMaxValue(W).udiv(D0);
6889    A.clearLowBits(K);
6890
6891    if (!D.isMinSignedValue()) {
6892      // If divisor INT_MIN, then we don't care about this lane in this fold,
6893      // we'll special-handle it.
6894      NeedToApplyOffset |= A != 0;
6895    }
6896
6897    // Q = floor((2 * A) / (2^K))
6898    APInt Q = (2 * A).udiv(APInt::getOneBitSet(W, K));
6899
6900    assert(APInt::getAllOnes(SVT.getSizeInBits()).ugt(A) &&
6901           "We are expecting that A is always less than all-ones for SVT");
6902    assert(APInt::getAllOnes(ShSVT.getSizeInBits()).ugt(K) &&
6903           "We are expecting that K is always less than all-ones for ShSVT");
6904
6905    // If the divisor is 1 the result can be constant-folded. Likewise, we
6906    // don't care about INT_MIN lanes, those can be set to undef if appropriate.
6907    if (D.isOne()) {
6908      // Set P, A and K to a bogus values so we can try to splat them.
6909      P = 0;
6910      A = -1;
6911      K = -1;
6912
6913      // x ?% 1 == 0  <-->  true  <-->  x u<= -1
6914      Q = -1;
6915    }
6916
6917    PAmts.push_back(DAG.getConstant(P, DL, SVT));
6918    AAmts.push_back(DAG.getConstant(A, DL, SVT));
6919    KAmts.push_back(
6920        DAG.getConstant(APInt(ShSVT.getSizeInBits(), K), DL, ShSVT));
6921    QAmts.push_back(DAG.getConstant(Q, DL, SVT));
6922    return true;
6923  };
6924
6925  SDValue N = REMNode.getOperand(0);
6926  SDValue D = REMNode.getOperand(1);
6927
6928  // Collect the values from each element.
6929  if (!ISD::matchUnaryPredicate(D, BuildSREMPattern))
6930    return SDValue();
6931
6932  // If this is a srem by a one, avoid the fold since it can be constant-folded.
6933  if (AllDivisorsAreOnes)
6934    return SDValue();
6935
6936  // If this is a srem by a powers-of-two (including INT_MIN), avoid the fold
6937  // since it can be best implemented as a bit test.
6938  if (AllDivisorsArePowerOfTwo)
6939    return SDValue();
6940
6941  SDValue PVal, AVal, KVal, QVal;
6942  if (D.getOpcode() == ISD::BUILD_VECTOR) {
6943    if (HadOneDivisor) {
6944      // Try to turn PAmts into a splat, since we don't care about the values
6945      // that are currently '0'. If we can't, just keep '0'`s.
6946      turnVectorIntoSplatVector(PAmts, isNullConstant);
6947      // Try to turn AAmts into a splat, since we don't care about the
6948      // values that are currently '-1'. If we can't, change them to '0'`s.
6949      turnVectorIntoSplatVector(AAmts, isAllOnesConstant,
6950                                DAG.getConstant(0, DL, SVT));
6951      // Try to turn KAmts into a splat, since we don't care about the values
6952      // that are currently '-1'. If we can't, change them to '0'`s.
6953      turnVectorIntoSplatVector(KAmts, isAllOnesConstant,
6954                                DAG.getConstant(0, DL, ShSVT));
6955    }
6956
6957    PVal = DAG.getBuildVector(VT, DL, PAmts);
6958    AVal = DAG.getBuildVector(VT, DL, AAmts);
6959    KVal = DAG.getBuildVector(ShVT, DL, KAmts);
6960    QVal = DAG.getBuildVector(VT, DL, QAmts);
6961  } else if (D.getOpcode() == ISD::SPLAT_VECTOR) {
6962    assert(PAmts.size() == 1 && AAmts.size() == 1 && KAmts.size() == 1 &&
6963           QAmts.size() == 1 &&
6964           "Expected matchUnaryPredicate to return one element for scalable "
6965           "vectors");
6966    PVal = DAG.getSplatVector(VT, DL, PAmts[0]);
6967    AVal = DAG.getSplatVector(VT, DL, AAmts[0]);
6968    KVal = DAG.getSplatVector(ShVT, DL, KAmts[0]);
6969    QVal = DAG.getSplatVector(VT, DL, QAmts[0]);
6970  } else {
6971    assert(isa<ConstantSDNode>(D) && "Expected a constant");
6972    PVal = PAmts[0];
6973    AVal = AAmts[0];
6974    KVal = KAmts[0];
6975    QVal = QAmts[0];
6976  }
6977
6978  // (mul N, P)
6979  SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal);
6980  Created.push_back(Op0.getNode());
6981
6982  if (NeedToApplyOffset) {
6983    // We need ADD to do this.
6984    if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ADD, VT))
6985      return SDValue();
6986
6987    // (add (mul N, P), A)
6988    Op0 = DAG.getNode(ISD::ADD, DL, VT, Op0, AVal);
6989    Created.push_back(Op0.getNode());
6990  }
6991
6992  // Rotate right only if any divisor was even. We avoid rotates for all-odd
6993  // divisors as a performance improvement, since rotating by 0 is a no-op.
6994  if (HadEvenDivisor) {
6995    // We need ROTR to do this.
6996    if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ROTR, VT))
6997      return SDValue();
6998    // SREM: (rotr (add (mul N, P), A), K)
6999    Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal);
7000    Created.push_back(Op0.getNode());
7001  }
7002
7003  // SREM: (setule/setugt (rotr (add (mul N, P), A), K), Q)
7004  SDValue Fold =
7005      DAG.getSetCC(DL, SETCCVT, Op0, QVal,
7006                   ((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT));
7007
7008  // If we didn't have lanes with INT_MIN divisor, then we're done.
7009  if (!HadIntMinDivisor)
7010    return Fold;
7011
7012  // That fold is only valid for positive divisors. Which effectively means,
7013  // it is invalid for INT_MIN divisors. So if we have such a lane,
7014  // we must fix-up results for said lanes.
7015  assert(VT.isVector() && "Can/should only get here for vectors.");
7016
7017  // NOTE: we avoid letting illegal types through even if we're before legalize
7018  // ops ��� legalization has a hard time producing good code for the code that
7019  // follows.
7020  if (!isOperationLegalOrCustom(ISD::SETCC, SETCCVT) ||
7021      !isOperationLegalOrCustom(ISD::AND, VT) ||
7022      !isCondCodeLegalOrCustom(Cond, VT.getSimpleVT()) ||
7023      !isOperationLegalOrCustom(ISD::VSELECT, SETCCVT))
7024    return SDValue();
7025
7026  Created.push_back(Fold.getNode());
7027
7028  SDValue IntMin = DAG.getConstant(
7029      APInt::getSignedMinValue(SVT.getScalarSizeInBits()), DL, VT);
7030  SDValue IntMax = DAG.getConstant(
7031      APInt::getSignedMaxValue(SVT.getScalarSizeInBits()), DL, VT);
7032  SDValue Zero =
7033      DAG.getConstant(APInt::getZero(SVT.getScalarSizeInBits()), DL, VT);
7034
7035  // Which lanes had INT_MIN divisors? Divisor is constant, so const-folded.
7036  SDValue DivisorIsIntMin = DAG.getSetCC(DL, SETCCVT, D, IntMin, ISD::SETEQ);
7037  Created.push_back(DivisorIsIntMin.getNode());
7038
7039  // (N s% INT_MIN) ==/!= 0  <-->  (N & INT_MAX) ==/!= 0
7040  SDValue Masked = DAG.getNode(ISD::AND, DL, VT, N, IntMax);
7041  Created.push_back(Masked.getNode());
7042  SDValue MaskedIsZero = DAG.getSetCC(DL, SETCCVT, Masked, Zero, Cond);
7043  Created.push_back(MaskedIsZero.getNode());
7044
7045  // To produce final result we need to blend 2 vectors: 'SetCC' and
7046  // 'MaskedIsZero'. If the divisor for channel was *NOT* INT_MIN, we pick
7047  // from 'Fold', else pick from 'MaskedIsZero'. Since 'DivisorIsIntMin' is
7048  // constant-folded, select can get lowered to a shuffle with constant mask.
7049  SDValue Blended = DAG.getNode(ISD::VSELECT, DL, SETCCVT, DivisorIsIntMin,
7050                                MaskedIsZero, Fold);
7051
7052  return Blended;
7053}
7054
7055bool TargetLowering::
7056verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const {
7057  if (!isa<ConstantSDNode>(Op.getOperand(0))) {
7058    DAG.getContext()->emitError("argument to '__builtin_return_address' must "
7059                                "be a constant integer");
7060    return true;
7061  }
7062
7063  return false;
7064}
7065
7066SDValue TargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG,
7067                                         const DenormalMode &Mode) const {
7068  SDLoc DL(Op);
7069  EVT VT = Op.getValueType();
7070  EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
7071  SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
7072
7073  // This is specifically a check for the handling of denormal inputs, not the
7074  // result.
7075  if (Mode.Input == DenormalMode::PreserveSign ||
7076      Mode.Input == DenormalMode::PositiveZero) {
7077    // Test = X == 0.0
7078    return DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
7079  }
7080
7081  // Testing it with denormal inputs to avoid wrong estimate.
7082  //
7083  // Test = fabs(X) < SmallestNormal
7084  const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);
7085  APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem);
7086  SDValue NormC = DAG.getConstantFP(SmallestNorm, DL, VT);
7087  SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op);
7088  return DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT);
7089}
7090
7091SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
7092                                             bool LegalOps, bool OptForSize,
7093                                             NegatibleCost &Cost,
7094                                             unsigned Depth) const {
7095  // fneg is removable even if it has multiple uses.
7096  if (Op.getOpcode() == ISD::FNEG || Op.getOpcode() == ISD::VP_FNEG) {
7097    Cost = NegatibleCost::Cheaper;
7098    return Op.getOperand(0);
7099  }
7100
7101  // Don't recurse exponentially.
7102  if (Depth > SelectionDAG::MaxRecursionDepth)
7103    return SDValue();
7104
7105  // Pre-increment recursion depth for use in recursive calls.
7106  ++Depth;
7107  const SDNodeFlags Flags = Op->getFlags();
7108  const TargetOptions &Options = DAG.getTarget().Options;
7109  EVT VT = Op.getValueType();
7110  unsigned Opcode = Op.getOpcode();
7111
7112  // Don't allow anything with multiple uses unless we know it is free.
7113  if (!Op.hasOneUse() && Opcode != ISD::ConstantFP) {
7114    bool IsFreeExtend = Opcode == ISD::FP_EXTEND &&
7115                        isFPExtFree(VT, Op.getOperand(0).getValueType());
7116    if (!IsFreeExtend)
7117      return SDValue();
7118  }
7119
7120  auto RemoveDeadNode = [&](SDValue N) {
7121    if (N && N.getNode()->use_empty())
7122      DAG.RemoveDeadNode(N.getNode());
7123  };
7124
7125  SDLoc DL(Op);
7126
7127  // Because getNegatedExpression can delete nodes we need a handle to keep
7128  // temporary nodes alive in case the recursion manages to create an identical
7129  // node.
7130  std::list<HandleSDNode> Handles;
7131
7132  switch (Opcode) {
7133  case ISD::ConstantFP: {
7134    // Don't invert constant FP values after legalization unless the target says
7135    // the negated constant is legal.
7136    bool IsOpLegal =
7137        isOperationLegal(ISD::ConstantFP, VT) ||
7138        isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT,
7139                     OptForSize);
7140
7141    if (LegalOps && !IsOpLegal)
7142      break;
7143
7144    APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
7145    V.changeSign();
7146    SDValue CFP = DAG.getConstantFP(V, DL, VT);
7147
7148    // If we already have the use of the negated floating constant, it is free
7149    // to negate it even it has multiple uses.
7150    if (!Op.hasOneUse() && CFP.use_empty())
7151      break;
7152    Cost = NegatibleCost::Neutral;
7153    return CFP;
7154  }
7155  case ISD::BUILD_VECTOR: {
7156    // Only permit BUILD_VECTOR of constants.
7157    if (llvm::any_of(Op->op_values(), [&](SDValue N) {
7158          return !N.isUndef() && !isa<ConstantFPSDNode>(N);
7159        }))
7160      break;
7161
7162    bool IsOpLegal =
7163        (isOperationLegal(ISD::ConstantFP, VT) &&
7164         isOperationLegal(ISD::BUILD_VECTOR, VT)) ||
7165        llvm::all_of(Op->op_values(), [&](SDValue N) {
7166          return N.isUndef() ||
7167                 isFPImmLegal(neg(cast<ConstantFPSDNode>(N)->getValueAPF()), VT,
7168                              OptForSize);
7169        });
7170
7171    if (LegalOps && !IsOpLegal)
7172      break;
7173
7174    SmallVector<SDValue, 4> Ops;
7175    for (SDValue C : Op->op_values()) {
7176      if (C.isUndef()) {
7177        Ops.push_back(C);
7178        continue;
7179      }
7180      APFloat V = cast<ConstantFPSDNode>(C)->getValueAPF();
7181      V.changeSign();
7182      Ops.push_back(DAG.getConstantFP(V, DL, C.getValueType()));
7183    }
7184    Cost = NegatibleCost::Neutral;
7185    return DAG.getBuildVector(VT, DL, Ops);
7186  }
7187  case ISD::FADD: {
7188    if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
7189      break;
7190
7191    // After operation legalization, it might not be legal to create new FSUBs.
7192    if (LegalOps && !isOperationLegalOrCustom(ISD::FSUB, VT))
7193      break;
7194    SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
7195
7196    // fold (fneg (fadd X, Y)) -> (fsub (fneg X), Y)
7197    NegatibleCost CostX = NegatibleCost::Expensive;
7198    SDValue NegX =
7199        getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
7200    // Prevent this node from being deleted by the next call.
7201    if (NegX)
7202      Handles.emplace_back(NegX);
7203
7204    // fold (fneg (fadd X, Y)) -> (fsub (fneg Y), X)
7205    NegatibleCost CostY = NegatibleCost::Expensive;
7206    SDValue NegY =
7207        getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
7208
7209    // We're done with the handles.
7210    Handles.clear();
7211
7212    // Negate the X if its cost is less or equal than Y.
7213    if (NegX && (CostX <= CostY)) {
7214      Cost = CostX;
7215      SDValue N = DAG.getNode(ISD::FSUB, DL, VT, NegX, Y, Flags);
7216      if (NegY != N)
7217        RemoveDeadNode(NegY);
7218      return N;
7219    }
7220
7221    // Negate the Y if it is not expensive.
7222    if (NegY) {
7223      Cost = CostY;
7224      SDValue N = DAG.getNode(ISD::FSUB, DL, VT, NegY, X, Flags);
7225      if (NegX != N)
7226        RemoveDeadNode(NegX);
7227      return N;
7228    }
7229    break;
7230  }
7231  case ISD::FSUB: {
7232    // We can't turn -(A-B) into B-A when we honor signed zeros.
7233    if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
7234      break;
7235
7236    SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
7237    // fold (fneg (fsub 0, Y)) -> Y
7238    if (ConstantFPSDNode *C = isConstOrConstSplatFP(X, /*AllowUndefs*/ true))
7239      if (C->isZero()) {
7240        Cost = NegatibleCost::Cheaper;
7241        return Y;
7242      }
7243
7244    // fold (fneg (fsub X, Y)) -> (fsub Y, X)
7245    Cost = NegatibleCost::Neutral;
7246    return DAG.getNode(ISD::FSUB, DL, VT, Y, X, Flags);
7247  }
7248  case ISD::FMUL:
7249  case ISD::FDIV: {
7250    SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
7251
7252    // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
7253    NegatibleCost CostX = NegatibleCost::Expensive;
7254    SDValue NegX =
7255        getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
7256    // Prevent this node from being deleted by the next call.
7257    if (NegX)
7258      Handles.emplace_back(NegX);
7259
7260    // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
7261    NegatibleCost CostY = NegatibleCost::Expensive;
7262    SDValue NegY =
7263        getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
7264
7265    // We're done with the handles.
7266    Handles.clear();
7267
7268    // Negate the X if its cost is less or equal than Y.
7269    if (NegX && (CostX <= CostY)) {
7270      Cost = CostX;
7271      SDValue N = DAG.getNode(Opcode, DL, VT, NegX, Y, Flags);
7272      if (NegY != N)
7273        RemoveDeadNode(NegY);
7274      return N;
7275    }
7276
7277    // Ignore X * 2.0 because that is expected to be canonicalized to X + X.
7278    if (auto *C = isConstOrConstSplatFP(Op.getOperand(1)))
7279      if (C->isExactlyValue(2.0) && Op.getOpcode() == ISD::FMUL)
7280        break;
7281
7282    // Negate the Y if it is not expensive.
7283    if (NegY) {
7284      Cost = CostY;
7285      SDValue N = DAG.getNode(Opcode, DL, VT, X, NegY, Flags);
7286      if (NegX != N)
7287        RemoveDeadNode(NegX);
7288      return N;
7289    }
7290    break;
7291  }
7292  case ISD::FMA:
7293  case ISD::FMAD: {
7294    if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
7295      break;
7296
7297    SDValue X = Op.getOperand(0), Y = Op.getOperand(1), Z = Op.getOperand(2);
7298    NegatibleCost CostZ = NegatibleCost::Expensive;
7299    SDValue NegZ =
7300        getNegatedExpression(Z, DAG, LegalOps, OptForSize, CostZ, Depth);
7301    // Give up if fail to negate the Z.
7302    if (!NegZ)
7303      break;
7304
7305    // Prevent this node from being deleted by the next two calls.
7306    Handles.emplace_back(NegZ);
7307
7308    // fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z))
7309    NegatibleCost CostX = NegatibleCost::Expensive;
7310    SDValue NegX =
7311        getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
7312    // Prevent this node from being deleted by the next call.
7313    if (NegX)
7314      Handles.emplace_back(NegX);
7315
7316    // fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z))
7317    NegatibleCost CostY = NegatibleCost::Expensive;
7318    SDValue NegY =
7319        getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
7320
7321    // We're done with the handles.
7322    Handles.clear();
7323
7324    // Negate the X if its cost is less or equal than Y.
7325    if (NegX && (CostX <= CostY)) {
7326      Cost = std::min(CostX, CostZ);
7327      SDValue N = DAG.getNode(Opcode, DL, VT, NegX, Y, NegZ, Flags);
7328      if (NegY != N)
7329        RemoveDeadNode(NegY);
7330      return N;
7331    }
7332
7333    // Negate the Y if it is not expensive.
7334    if (NegY) {
7335      Cost = std::min(CostY, CostZ);
7336      SDValue N = DAG.getNode(Opcode, DL, VT, X, NegY, NegZ, Flags);
7337      if (NegX != N)
7338        RemoveDeadNode(NegX);
7339      return N;
7340    }
7341    break;
7342  }
7343
7344  case ISD::FP_EXTEND:
7345  case ISD::FSIN:
7346    if (SDValue NegV = getNegatedExpression(Op.getOperand(0), DAG, LegalOps,
7347                                            OptForSize, Cost, Depth))
7348      return DAG.getNode(Opcode, DL, VT, NegV);
7349    break;
7350  case ISD::FP_ROUND:
7351    if (SDValue NegV = getNegatedExpression(Op.getOperand(0), DAG, LegalOps,
7352                                            OptForSize, Cost, Depth))
7353      return DAG.getNode(ISD::FP_ROUND, DL, VT, NegV, Op.getOperand(1));
7354    break;
7355  case ISD::SELECT:
7356  case ISD::VSELECT: {
7357    // fold (fneg (select C, LHS, RHS)) -> (select C, (fneg LHS), (fneg RHS))
7358    // iff at least one cost is cheaper and the other is neutral/cheaper
7359    SDValue LHS = Op.getOperand(1);
7360    NegatibleCost CostLHS = NegatibleCost::Expensive;
7361    SDValue NegLHS =
7362        getNegatedExpression(LHS, DAG, LegalOps, OptForSize, CostLHS, Depth);
7363    if (!NegLHS || CostLHS > NegatibleCost::Neutral) {
7364      RemoveDeadNode(NegLHS);
7365      break;
7366    }
7367
7368    // Prevent this node from being deleted by the next call.
7369    Handles.emplace_back(NegLHS);
7370
7371    SDValue RHS = Op.getOperand(2);
7372    NegatibleCost CostRHS = NegatibleCost::Expensive;
7373    SDValue NegRHS =
7374        getNegatedExpression(RHS, DAG, LegalOps, OptForSize, CostRHS, Depth);
7375
7376    // We're done with the handles.
7377    Handles.clear();
7378
7379    if (!NegRHS || CostRHS > NegatibleCost::Neutral ||
7380        (CostLHS != NegatibleCost::Cheaper &&
7381         CostRHS != NegatibleCost::Cheaper)) {
7382      RemoveDeadNode(NegLHS);
7383      RemoveDeadNode(NegRHS);
7384      break;
7385    }
7386
7387    Cost = std::min(CostLHS, CostRHS);
7388    return DAG.getSelect(DL, VT, Op.getOperand(0), NegLHS, NegRHS);
7389  }
7390  }
7391
7392  return SDValue();
7393}
7394
7395//===----------------------------------------------------------------------===//
7396// Legalization Utilities
7397//===----------------------------------------------------------------------===//
7398
7399bool TargetLowering::expandMUL_LOHI(unsigned Opcode, EVT VT, const SDLoc &dl,
7400                                    SDValue LHS, SDValue RHS,
7401                                    SmallVectorImpl<SDValue> &Result,
7402                                    EVT HiLoVT, SelectionDAG &DAG,
7403                                    MulExpansionKind Kind, SDValue LL,
7404                                    SDValue LH, SDValue RL, SDValue RH) const {
7405  assert(Opcode == ISD::MUL || Opcode == ISD::UMUL_LOHI ||
7406         Opcode == ISD::SMUL_LOHI);
7407
7408  bool HasMULHS = (Kind == MulExpansionKind::Always) ||
7409                  isOperationLegalOrCustom(ISD::MULHS, HiLoVT);
7410  bool HasMULHU = (Kind == MulExpansionKind::Always) ||
7411                  isOperationLegalOrCustom(ISD::MULHU, HiLoVT);
7412  bool HasSMUL_LOHI = (Kind == MulExpansionKind::Always) ||
7413                      isOperationLegalOrCustom(ISD::SMUL_LOHI, HiLoVT);
7414  bool HasUMUL_LOHI = (Kind == MulExpansionKind::Always) ||
7415                      isOperationLegalOrCustom(ISD::UMUL_LOHI, HiLoVT);
7416
7417  if (!HasMULHU && !HasMULHS && !HasUMUL_LOHI && !HasSMUL_LOHI)
7418    return false;
7419
7420  unsigned OuterBitSize = VT.getScalarSizeInBits();
7421  unsigned InnerBitSize = HiLoVT.getScalarSizeInBits();
7422
7423  // LL, LH, RL, and RH must be either all NULL or all set to a value.
7424  assert((LL.getNode() && LH.getNode() && RL.getNode() && RH.getNode()) ||
7425         (!LL.getNode() && !LH.getNode() && !RL.getNode() && !RH.getNode()));
7426
7427  SDVTList VTs = DAG.getVTList(HiLoVT, HiLoVT);
7428  auto MakeMUL_LOHI = [&](SDValue L, SDValue R, SDValue &Lo, SDValue &Hi,
7429                          bool Signed) -> bool {
7430    if ((Signed && HasSMUL_LOHI) || (!Signed && HasUMUL_LOHI)) {
7431      Lo = DAG.getNode(Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI, dl, VTs, L, R);
7432      Hi = SDValue(Lo.getNode(), 1);
7433      return true;
7434    }
7435    if ((Signed && HasMULHS) || (!Signed && HasMULHU)) {
7436      Lo = DAG.getNode(ISD::MUL, dl, HiLoVT, L, R);
7437      Hi = DAG.getNode(Signed ? ISD::MULHS : ISD::MULHU, dl, HiLoVT, L, R);
7438      return true;
7439    }
7440    return false;
7441  };
7442
7443  SDValue Lo, Hi;
7444
7445  if (!LL.getNode() && !RL.getNode() &&
7446      isOperationLegalOrCustom(ISD::TRUNCATE, HiLoVT)) {
7447    LL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LHS);
7448    RL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RHS);
7449  }
7450
7451  if (!LL.getNode())
7452    return false;
7453
7454  APInt HighMask = APInt::getHighBitsSet(OuterBitSize, InnerBitSize);
7455  if (DAG.MaskedValueIsZero(LHS, HighMask) &&
7456      DAG.MaskedValueIsZero(RHS, HighMask)) {
7457    // The inputs are both zero-extended.
7458    if (MakeMUL_LOHI(LL, RL, Lo, Hi, false)) {
7459      Result.push_back(Lo);
7460      Result.push_back(Hi);
7461      if (Opcode != ISD::MUL) {
7462        SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
7463        Result.push_back(Zero);
7464        Result.push_back(Zero);
7465      }
7466      return true;
7467    }
7468  }
7469
7470  if (!VT.isVector() && Opcode == ISD::MUL &&
7471      DAG.ComputeMaxSignificantBits(LHS) <= InnerBitSize &&
7472      DAG.ComputeMaxSignificantBits(RHS) <= InnerBitSize) {
7473    // The input values are both sign-extended.
7474    // TODO non-MUL case?
7475    if (MakeMUL_LOHI(LL, RL, Lo, Hi, true)) {
7476      Result.push_back(Lo);
7477      Result.push_back(Hi);
7478      return true;
7479    }
7480  }
7481
7482  unsigned ShiftAmount = OuterBitSize - InnerBitSize;
7483  SDValue Shift = DAG.getShiftAmountConstant(ShiftAmount, VT, dl);
7484
7485  if (!LH.getNode() && !RH.getNode() &&
7486      isOperationLegalOrCustom(ISD::SRL, VT) &&
7487      isOperationLegalOrCustom(ISD::TRUNCATE, HiLoVT)) {
7488    LH = DAG.getNode(ISD::SRL, dl, VT, LHS, Shift);
7489    LH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LH);
7490    RH = DAG.getNode(ISD::SRL, dl, VT, RHS, Shift);
7491    RH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RH);
7492  }
7493
7494  if (!LH.getNode())
7495    return false;
7496
7497  if (!MakeMUL_LOHI(LL, RL, Lo, Hi, false))
7498    return false;
7499
7500  Result.push_back(Lo);
7501
7502  if (Opcode == ISD::MUL) {
7503    RH = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RH);
7504    LH = DAG.getNode(ISD::MUL, dl, HiLoVT, LH, RL);
7505    Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, RH);
7506    Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, LH);
7507    Result.push_back(Hi);
7508    return true;
7509  }
7510
7511  // Compute the full width result.
7512  auto Merge = [&](SDValue Lo, SDValue Hi) -> SDValue {
7513    Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Lo);
7514    Hi = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi);
7515    Hi = DAG.getNode(ISD::SHL, dl, VT, Hi, Shift);
7516    return DAG.getNode(ISD::OR, dl, VT, Lo, Hi);
7517  };
7518
7519  SDValue Next = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi);
7520  if (!MakeMUL_LOHI(LL, RH, Lo, Hi, false))
7521    return false;
7522
7523  // This is effectively the add part of a multiply-add of half-sized operands,
7524  // so it cannot overflow.
7525  Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
7526
7527  if (!MakeMUL_LOHI(LH, RL, Lo, Hi, false))
7528    return false;
7529
7530  SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
7531  EVT BoolType = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
7532
7533  bool UseGlue = (isOperationLegalOrCustom(ISD::ADDC, VT) &&
7534                  isOperationLegalOrCustom(ISD::ADDE, VT));
7535  if (UseGlue)
7536    Next = DAG.getNode(ISD::ADDC, dl, DAG.getVTList(VT, MVT::Glue), Next,
7537                       Merge(Lo, Hi));
7538  else
7539    Next = DAG.getNode(ISD::UADDO_CARRY, dl, DAG.getVTList(VT, BoolType), Next,
7540                       Merge(Lo, Hi), DAG.getConstant(0, dl, BoolType));
7541
7542  SDValue Carry = Next.getValue(1);
7543  Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
7544  Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);
7545
7546  if (!MakeMUL_LOHI(LH, RH, Lo, Hi, Opcode == ISD::SMUL_LOHI))
7547    return false;
7548
7549  if (UseGlue)
7550    Hi = DAG.getNode(ISD::ADDE, dl, DAG.getVTList(HiLoVT, MVT::Glue), Hi, Zero,
7551                     Carry);
7552  else
7553    Hi = DAG.getNode(ISD::UADDO_CARRY, dl, DAG.getVTList(HiLoVT, BoolType), Hi,
7554                     Zero, Carry);
7555
7556  Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
7557
7558  if (Opcode == ISD::SMUL_LOHI) {
7559    SDValue NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
7560                                  DAG.getNode(ISD::ZERO_EXTEND, dl, VT, RL));
7561    Next = DAG.getSelectCC(dl, LH, Zero, NextSub, Next, ISD::SETLT);
7562
7563    NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
7564                          DAG.getNode(ISD::ZERO_EXTEND, dl, VT, LL));
7565    Next = DAG.getSelectCC(dl, RH, Zero, NextSub, Next, ISD::SETLT);
7566  }
7567
7568  Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
7569  Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);
7570  Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
7571  return true;
7572}
7573
7574bool TargetLowering::expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT,
7575                               SelectionDAG &DAG, MulExpansionKind Kind,
7576                               SDValue LL, SDValue LH, SDValue RL,
7577                               SDValue RH) const {
7578  SmallVector<SDValue, 2> Result;
7579  bool Ok = expandMUL_LOHI(N->getOpcode(), N->getValueType(0), SDLoc(N),
7580                           N->getOperand(0), N->getOperand(1), Result, HiLoVT,
7581                           DAG, Kind, LL, LH, RL, RH);
7582  if (Ok) {
7583    assert(Result.size() == 2);
7584    Lo = Result[0];
7585    Hi = Result[1];
7586  }
7587  return Ok;
7588}
7589
7590// Optimize unsigned division or remainder by constants for types twice as large
7591// as a legal VT.
7592//
7593// If (1 << (BitWidth / 2)) % Constant == 1, then the remainder
7594// can be computed
7595// as:
7596//   Sum += __builtin_uadd_overflow(Lo, High, &Sum);
7597//   Remainder = Sum % Constant
7598// This is based on "Remainder by Summing Digits" from Hacker's Delight.
7599//
7600// For division, we can compute the remainder using the algorithm described
7601// above, subtract it from the dividend to get an exact multiple of Constant.
7602// Then multiply that extact multiply by the multiplicative inverse modulo
7603// (1 << (BitWidth / 2)) to get the quotient.
7604
7605// If Constant is even, we can shift right the dividend and the divisor by the
7606// number of trailing zeros in Constant before applying the remainder algorithm.
7607// If we're after the quotient, we can subtract this value from the shifted
7608// dividend and multiply by the multiplicative inverse of the shifted divisor.
7609// If we want the remainder, we shift the value left by the number of trailing
7610// zeros and add the bits that were shifted out of the dividend.
7611bool TargetLowering::expandDIVREMByConstant(SDNode *N,
7612                                            SmallVectorImpl<SDValue> &Result,
7613                                            EVT HiLoVT, SelectionDAG &DAG,
7614                                            SDValue LL, SDValue LH) const {
7615  unsigned Opcode = N->getOpcode();
7616  EVT VT = N->getValueType(0);
7617
7618  // TODO: Support signed division/remainder.
7619  if (Opcode == ISD::SREM || Opcode == ISD::SDIV || Opcode == ISD::SDIVREM)
7620    return false;
7621  assert(
7622      (Opcode == ISD::UREM || Opcode == ISD::UDIV || Opcode == ISD::UDIVREM) &&
7623      "Unexpected opcode");
7624
7625  auto *CN = dyn_cast<ConstantSDNode>(N->getOperand(1));
7626  if (!CN)
7627    return false;
7628
7629  APInt Divisor = CN->getAPIntValue();
7630  unsigned BitWidth = Divisor.getBitWidth();
7631  unsigned HBitWidth = BitWidth / 2;
7632  assert(VT.getScalarSizeInBits() == BitWidth &&
7633         HiLoVT.getScalarSizeInBits() == HBitWidth && "Unexpected VTs");
7634
7635  // Divisor needs to less than (1 << HBitWidth).
7636  APInt HalfMaxPlus1 = APInt::getOneBitSet(BitWidth, HBitWidth);
7637  if (Divisor.uge(HalfMaxPlus1))
7638    return false;
7639
7640  // We depend on the UREM by constant optimization in DAGCombiner that requires
7641  // high multiply.
7642  if (!isOperationLegalOrCustom(ISD::MULHU, HiLoVT) &&
7643      !isOperationLegalOrCustom(ISD::UMUL_LOHI, HiLoVT))
7644    return false;
7645
7646  // Don't expand if optimizing for size.
7647  if (DAG.shouldOptForSize())
7648    return false;
7649
7650  // Early out for 0 or 1 divisors.
7651  if (Divisor.ule(1))
7652    return false;
7653
7654  // If the divisor is even, shift it until it becomes odd.
7655  unsigned TrailingZeros = 0;
7656  if (!Divisor[0]) {
7657    TrailingZeros = Divisor.countr_zero();
7658    Divisor.lshrInPlace(TrailingZeros);
7659  }
7660
7661  SDLoc dl(N);
7662  SDValue Sum;
7663  SDValue PartialRem;
7664
7665  // If (1 << HBitWidth) % divisor == 1, we can add the two halves together and
7666  // then add in the carry.
7667  // TODO: If we can't split it in half, we might be able to split into 3 or
7668  // more pieces using a smaller bit width.
7669  if (HalfMaxPlus1.urem(Divisor).isOne()) {
7670    assert(!LL == !LH && "Expected both input halves or no input halves!");
7671    if (!LL)
7672      std::tie(LL, LH) = DAG.SplitScalar(N->getOperand(0), dl, HiLoVT, HiLoVT);
7673
7674    // Shift the input by the number of TrailingZeros in the divisor. The
7675    // shifted out bits will be added to the remainder later.
7676    if (TrailingZeros) {
7677      // Save the shifted off bits if we need the remainder.
7678      if (Opcode != ISD::UDIV) {
7679        APInt Mask = APInt::getLowBitsSet(HBitWidth, TrailingZeros);
7680        PartialRem = DAG.getNode(ISD::AND, dl, HiLoVT, LL,
7681                                 DAG.getConstant(Mask, dl, HiLoVT));
7682      }
7683
7684      LL = DAG.getNode(
7685          ISD::OR, dl, HiLoVT,
7686          DAG.getNode(ISD::SRL, dl, HiLoVT, LL,
7687                      DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl)),
7688          DAG.getNode(ISD::SHL, dl, HiLoVT, LH,
7689                      DAG.getShiftAmountConstant(HBitWidth - TrailingZeros,
7690                                                 HiLoVT, dl)));
7691      LH = DAG.getNode(ISD::SRL, dl, HiLoVT, LH,
7692                       DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl));
7693    }
7694
7695    // Use uaddo_carry if we can, otherwise use a compare to detect overflow.
7696    EVT SetCCType =
7697        getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), HiLoVT);
7698    if (isOperationLegalOrCustom(ISD::UADDO_CARRY, HiLoVT)) {
7699      SDVTList VTList = DAG.getVTList(HiLoVT, SetCCType);
7700      Sum = DAG.getNode(ISD::UADDO, dl, VTList, LL, LH);
7701      Sum = DAG.getNode(ISD::UADDO_CARRY, dl, VTList, Sum,
7702                        DAG.getConstant(0, dl, HiLoVT), Sum.getValue(1));
7703    } else {
7704      Sum = DAG.getNode(ISD::ADD, dl, HiLoVT, LL, LH);
7705      SDValue Carry = DAG.getSetCC(dl, SetCCType, Sum, LL, ISD::SETULT);
7706      // If the boolean for the target is 0 or 1, we can add the setcc result
7707      // directly.
7708      if (getBooleanContents(HiLoVT) ==
7709          TargetLoweringBase::ZeroOrOneBooleanContent)
7710        Carry = DAG.getZExtOrTrunc(Carry, dl, HiLoVT);
7711      else
7712        Carry = DAG.getSelect(dl, HiLoVT, Carry, DAG.getConstant(1, dl, HiLoVT),
7713                              DAG.getConstant(0, dl, HiLoVT));
7714      Sum = DAG.getNode(ISD::ADD, dl, HiLoVT, Sum, Carry);
7715    }
7716  }
7717
7718  // If we didn't find a sum, we can't do the expansion.
7719  if (!Sum)
7720    return false;
7721
7722  // Perform a HiLoVT urem on the Sum using truncated divisor.
7723  SDValue RemL =
7724      DAG.getNode(ISD::UREM, dl, HiLoVT, Sum,
7725                  DAG.getConstant(Divisor.trunc(HBitWidth), dl, HiLoVT));
7726  SDValue RemH = DAG.getConstant(0, dl, HiLoVT);
7727
7728  if (Opcode != ISD::UREM) {
7729    // Subtract the remainder from the shifted dividend.
7730    SDValue Dividend = DAG.getNode(ISD::BUILD_PAIR, dl, VT, LL, LH);
7731    SDValue Rem = DAG.getNode(ISD::BUILD_PAIR, dl, VT, RemL, RemH);
7732
7733    Dividend = DAG.getNode(ISD::SUB, dl, VT, Dividend, Rem);
7734
7735    // Multiply by the multiplicative inverse of the divisor modulo
7736    // (1 << BitWidth).
7737    APInt Mod = APInt::getSignedMinValue(BitWidth + 1);
7738    APInt MulFactor = Divisor.zext(BitWidth + 1);
7739    MulFactor = MulFactor.multiplicativeInverse(Mod);
7740    MulFactor = MulFactor.trunc(BitWidth);
7741
7742    SDValue Quotient = DAG.getNode(ISD::MUL, dl, VT, Dividend,
7743                                   DAG.getConstant(MulFactor, dl, VT));
7744
7745    // Split the quotient into low and high parts.
7746    SDValue QuotL, QuotH;
7747    std::tie(QuotL, QuotH) = DAG.SplitScalar(Quotient, dl, HiLoVT, HiLoVT);
7748    Result.push_back(QuotL);
7749    Result.push_back(QuotH);
7750  }
7751
7752  if (Opcode != ISD::UDIV) {
7753    // If we shifted the input, shift the remainder left and add the bits we
7754    // shifted off the input.
7755    if (TrailingZeros) {
7756      APInt Mask = APInt::getLowBitsSet(HBitWidth, TrailingZeros);
7757      RemL = DAG.getNode(ISD::SHL, dl, HiLoVT, RemL,
7758                         DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl));
7759      RemL = DAG.getNode(ISD::ADD, dl, HiLoVT, RemL, PartialRem);
7760    }
7761    Result.push_back(RemL);
7762    Result.push_back(DAG.getConstant(0, dl, HiLoVT));
7763  }
7764
7765  return true;
7766}
7767
7768// Check that (every element of) Z is undef or not an exact multiple of BW.
7769static bool isNonZeroModBitWidthOrUndef(SDValue Z, unsigned BW) {
7770  return ISD::matchUnaryPredicate(
7771      Z,
7772      [=](ConstantSDNode *C) { return !C || C->getAPIntValue().urem(BW) != 0; },
7773      true);
7774}
7775
7776static SDValue expandVPFunnelShift(SDNode *Node, SelectionDAG &DAG) {
7777  EVT VT = Node->getValueType(0);
7778  SDValue ShX, ShY;
7779  SDValue ShAmt, InvShAmt;
7780  SDValue X = Node->getOperand(0);
7781  SDValue Y = Node->getOperand(1);
7782  SDValue Z = Node->getOperand(2);
7783  SDValue Mask = Node->getOperand(3);
7784  SDValue VL = Node->getOperand(4);
7785
7786  unsigned BW = VT.getScalarSizeInBits();
7787  bool IsFSHL = Node->getOpcode() == ISD::VP_FSHL;
7788  SDLoc DL(SDValue(Node, 0));
7789
7790  EVT ShVT = Z.getValueType();
7791  if (isNonZeroModBitWidthOrUndef(Z, BW)) {
7792    // fshl: X << C | Y >> (BW - C)
7793    // fshr: X << (BW - C) | Y >> C
7794    // where C = Z % BW is not zero
7795    SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
7796    ShAmt = DAG.getNode(ISD::VP_UREM, DL, ShVT, Z, BitWidthC, Mask, VL);
7797    InvShAmt = DAG.getNode(ISD::VP_SUB, DL, ShVT, BitWidthC, ShAmt, Mask, VL);
7798    ShX = DAG.getNode(ISD::VP_SHL, DL, VT, X, IsFSHL ? ShAmt : InvShAmt, Mask,
7799                      VL);
7800    ShY = DAG.getNode(ISD::VP_LSHR, DL, VT, Y, IsFSHL ? InvShAmt : ShAmt, Mask,
7801                      VL);
7802  } else {
7803    // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
7804    // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
7805    SDValue BitMask = DAG.getConstant(BW - 1, DL, ShVT);
7806    if (isPowerOf2_32(BW)) {
7807      // Z % BW -> Z & (BW - 1)
7808      ShAmt = DAG.getNode(ISD::VP_AND, DL, ShVT, Z, BitMask, Mask, VL);
7809      // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
7810      SDValue NotZ = DAG.getNode(ISD::VP_XOR, DL, ShVT, Z,
7811                                 DAG.getAllOnesConstant(DL, ShVT), Mask, VL);
7812      InvShAmt = DAG.getNode(ISD::VP_AND, DL, ShVT, NotZ, BitMask, Mask, VL);
7813    } else {
7814      SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
7815      ShAmt = DAG.getNode(ISD::VP_UREM, DL, ShVT, Z, BitWidthC, Mask, VL);
7816      InvShAmt = DAG.getNode(ISD::VP_SUB, DL, ShVT, BitMask, ShAmt, Mask, VL);
7817    }
7818
7819    SDValue One = DAG.getConstant(1, DL, ShVT);
7820    if (IsFSHL) {
7821      ShX = DAG.getNode(ISD::VP_SHL, DL, VT, X, ShAmt, Mask, VL);
7822      SDValue ShY1 = DAG.getNode(ISD::VP_LSHR, DL, VT, Y, One, Mask, VL);
7823      ShY = DAG.getNode(ISD::VP_LSHR, DL, VT, ShY1, InvShAmt, Mask, VL);
7824    } else {
7825      SDValue ShX1 = DAG.getNode(ISD::VP_SHL, DL, VT, X, One, Mask, VL);
7826      ShX = DAG.getNode(ISD::VP_SHL, DL, VT, ShX1, InvShAmt, Mask, VL);
7827      ShY = DAG.getNode(ISD::VP_LSHR, DL, VT, Y, ShAmt, Mask, VL);
7828    }
7829  }
7830  return DAG.getNode(ISD::VP_OR, DL, VT, ShX, ShY, Mask, VL);
7831}
7832
7833SDValue TargetLowering::expandFunnelShift(SDNode *Node,
7834                                          SelectionDAG &DAG) const {
7835  if (Node->isVPOpcode())
7836    return expandVPFunnelShift(Node, DAG);
7837
7838  EVT VT = Node->getValueType(0);
7839
7840  if (VT.isVector() && (!isOperationLegalOrCustom(ISD::SHL, VT) ||
7841                        !isOperationLegalOrCustom(ISD::SRL, VT) ||
7842                        !isOperationLegalOrCustom(ISD::SUB, VT) ||
7843                        !isOperationLegalOrCustomOrPromote(ISD::OR, VT)))
7844    return SDValue();
7845
7846  SDValue X = Node->getOperand(0);
7847  SDValue Y = Node->getOperand(1);
7848  SDValue Z = Node->getOperand(2);
7849
7850  unsigned BW = VT.getScalarSizeInBits();
7851  bool IsFSHL = Node->getOpcode() == ISD::FSHL;
7852  SDLoc DL(SDValue(Node, 0));
7853
7854  EVT ShVT = Z.getValueType();
7855
7856  // If a funnel shift in the other direction is more supported, use it.
7857  unsigned RevOpcode = IsFSHL ? ISD::FSHR : ISD::FSHL;
7858  if (!isOperationLegalOrCustom(Node->getOpcode(), VT) &&
7859      isOperationLegalOrCustom(RevOpcode, VT) && isPowerOf2_32(BW)) {
7860    if (isNonZeroModBitWidthOrUndef(Z, BW)) {
7861      // fshl X, Y, Z -> fshr X, Y, -Z
7862      // fshr X, Y, Z -> fshl X, Y, -Z
7863      SDValue Zero = DAG.getConstant(0, DL, ShVT);
7864      Z = DAG.getNode(ISD::SUB, DL, VT, Zero, Z);
7865    } else {
7866      // fshl X, Y, Z -> fshr (srl X, 1), (fshr X, Y, 1), ~Z
7867      // fshr X, Y, Z -> fshl (fshl X, Y, 1), (shl Y, 1), ~Z
7868      SDValue One = DAG.getConstant(1, DL, ShVT);
7869      if (IsFSHL) {
7870        Y = DAG.getNode(RevOpcode, DL, VT, X, Y, One);
7871        X = DAG.getNode(ISD::SRL, DL, VT, X, One);
7872      } else {
7873        X = DAG.getNode(RevOpcode, DL, VT, X, Y, One);
7874        Y = DAG.getNode(ISD::SHL, DL, VT, Y, One);
7875      }
7876      Z = DAG.getNOT(DL, Z, ShVT);
7877    }
7878    return DAG.getNode(RevOpcode, DL, VT, X, Y, Z);
7879  }
7880
7881  SDValue ShX, ShY;
7882  SDValue ShAmt, InvShAmt;
7883  if (isNonZeroModBitWidthOrUndef(Z, BW)) {
7884    // fshl: X << C | Y >> (BW - C)
7885    // fshr: X << (BW - C) | Y >> C
7886    // where C = Z % BW is not zero
7887    SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
7888    ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Z, BitWidthC);
7889    InvShAmt = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthC, ShAmt);
7890    ShX = DAG.getNode(ISD::SHL, DL, VT, X, IsFSHL ? ShAmt : InvShAmt);
7891    ShY = DAG.getNode(ISD::SRL, DL, VT, Y, IsFSHL ? InvShAmt : ShAmt);
7892  } else {
7893    // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
7894    // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
7895    SDValue Mask = DAG.getConstant(BW - 1, DL, ShVT);
7896    if (isPowerOf2_32(BW)) {
7897      // Z % BW -> Z & (BW - 1)
7898      ShAmt = DAG.getNode(ISD::AND, DL, ShVT, Z, Mask);
7899      // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
7900      InvShAmt = DAG.getNode(ISD::AND, DL, ShVT, DAG.getNOT(DL, Z, ShVT), Mask);
7901    } else {
7902      SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
7903      ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Z, BitWidthC);
7904      InvShAmt = DAG.getNode(ISD::SUB, DL, ShVT, Mask, ShAmt);
7905    }
7906
7907    SDValue One = DAG.getConstant(1, DL, ShVT);
7908    if (IsFSHL) {
7909      ShX = DAG.getNode(ISD::SHL, DL, VT, X, ShAmt);
7910      SDValue ShY1 = DAG.getNode(ISD::SRL, DL, VT, Y, One);
7911      ShY = DAG.getNode(ISD::SRL, DL, VT, ShY1, InvShAmt);
7912    } else {
7913      SDValue ShX1 = DAG.getNode(ISD::SHL, DL, VT, X, One);
7914      ShX = DAG.getNode(ISD::SHL, DL, VT, ShX1, InvShAmt);
7915      ShY = DAG.getNode(ISD::SRL, DL, VT, Y, ShAmt);
7916    }
7917  }
7918  return DAG.getNode(ISD::OR, DL, VT, ShX, ShY);
7919}
7920
7921// TODO: Merge with expandFunnelShift.
7922SDValue TargetLowering::expandROT(SDNode *Node, bool AllowVectorOps,
7923                                  SelectionDAG &DAG) const {
7924  EVT VT = Node->getValueType(0);
7925  unsigned EltSizeInBits = VT.getScalarSizeInBits();
7926  bool IsLeft = Node->getOpcode() == ISD::ROTL;
7927  SDValue Op0 = Node->getOperand(0);
7928  SDValue Op1 = Node->getOperand(1);
7929  SDLoc DL(SDValue(Node, 0));
7930
7931  EVT ShVT = Op1.getValueType();
7932  SDValue Zero = DAG.getConstant(0, DL, ShVT);
7933
7934  // If a rotate in the other direction is more supported, use it.
7935  unsigned RevRot = IsLeft ? ISD::ROTR : ISD::ROTL;
7936  if (!isOperationLegalOrCustom(Node->getOpcode(), VT) &&
7937      isOperationLegalOrCustom(RevRot, VT) && isPowerOf2_32(EltSizeInBits)) {
7938    SDValue Sub = DAG.getNode(ISD::SUB, DL, ShVT, Zero, Op1);
7939    return DAG.getNode(RevRot, DL, VT, Op0, Sub);
7940  }
7941
7942  if (!AllowVectorOps && VT.isVector() &&
7943      (!isOperationLegalOrCustom(ISD::SHL, VT) ||
7944       !isOperationLegalOrCustom(ISD::SRL, VT) ||
7945       !isOperationLegalOrCustom(ISD::SUB, VT) ||
7946       !isOperationLegalOrCustomOrPromote(ISD::OR, VT) ||
7947       !isOperationLegalOrCustomOrPromote(ISD::AND, VT)))
7948    return SDValue();
7949
7950  unsigned ShOpc = IsLeft ? ISD::SHL : ISD::SRL;
7951  unsigned HsOpc = IsLeft ? ISD::SRL : ISD::SHL;
7952  SDValue BitWidthMinusOneC = DAG.getConstant(EltSizeInBits - 1, DL, ShVT);
7953  SDValue ShVal;
7954  SDValue HsVal;
7955  if (isPowerOf2_32(EltSizeInBits)) {
7956    // (rotl x, c) -> x << (c & (w - 1)) | x >> (-c & (w - 1))
7957    // (rotr x, c) -> x >> (c & (w - 1)) | x << (-c & (w - 1))
7958    SDValue NegOp1 = DAG.getNode(ISD::SUB, DL, ShVT, Zero, Op1);
7959    SDValue ShAmt = DAG.getNode(ISD::AND, DL, ShVT, Op1, BitWidthMinusOneC);
7960    ShVal = DAG.getNode(ShOpc, DL, VT, Op0, ShAmt);
7961    SDValue HsAmt = DAG.getNode(ISD::AND, DL, ShVT, NegOp1, BitWidthMinusOneC);
7962    HsVal = DAG.getNode(HsOpc, DL, VT, Op0, HsAmt);
7963  } else {
7964    // (rotl x, c) -> x << (c % w) | x >> 1 >> (w - 1 - (c % w))
7965    // (rotr x, c) -> x >> (c % w) | x << 1 << (w - 1 - (c % w))
7966    SDValue BitWidthC = DAG.getConstant(EltSizeInBits, DL, ShVT);
7967    SDValue ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Op1, BitWidthC);
7968    ShVal = DAG.getNode(ShOpc, DL, VT, Op0, ShAmt);
7969    SDValue HsAmt = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthMinusOneC, ShAmt);
7970    SDValue One = DAG.getConstant(1, DL, ShVT);
7971    HsVal =
7972        DAG.getNode(HsOpc, DL, VT, DAG.getNode(HsOpc, DL, VT, Op0, One), HsAmt);
7973  }
7974  return DAG.getNode(ISD::OR, DL, VT, ShVal, HsVal);
7975}
7976
7977void TargetLowering::expandShiftParts(SDNode *Node, SDValue &Lo, SDValue &Hi,
7978                                      SelectionDAG &DAG) const {
7979  assert(Node->getNumOperands() == 3 && "Not a double-shift!");
7980  EVT VT = Node->getValueType(0);
7981  unsigned VTBits = VT.getScalarSizeInBits();
7982  assert(isPowerOf2_32(VTBits) && "Power-of-two integer type expected");
7983
7984  bool IsSHL = Node->getOpcode() == ISD::SHL_PARTS;
7985  bool IsSRA = Node->getOpcode() == ISD::SRA_PARTS;
7986  SDValue ShOpLo = Node->getOperand(0);
7987  SDValue ShOpHi = Node->getOperand(1);
7988  SDValue ShAmt = Node->getOperand(2);
7989  EVT ShAmtVT = ShAmt.getValueType();
7990  EVT ShAmtCCVT =
7991      getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), ShAmtVT);
7992  SDLoc dl(Node);
7993
7994  // ISD::FSHL and ISD::FSHR have defined overflow behavior but ISD::SHL and
7995  // ISD::SRA/L nodes haven't. Insert an AND to be safe, it's usually optimized
7996  // away during isel.
7997  SDValue SafeShAmt = DAG.getNode(ISD::AND, dl, ShAmtVT, ShAmt,
7998                                  DAG.getConstant(VTBits - 1, dl, ShAmtVT));
7999  SDValue Tmp1 = IsSRA ? DAG.getNode(ISD::SRA, dl, VT, ShOpHi,
8000                                     DAG.getConstant(VTBits - 1, dl, ShAmtVT))
8001                       : DAG.getConstant(0, dl, VT);
8002
8003  SDValue Tmp2, Tmp3;
8004  if (IsSHL) {
8005    Tmp2 = DAG.getNode(ISD::FSHL, dl, VT, ShOpHi, ShOpLo, ShAmt);
8006    Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, SafeShAmt);
8007  } else {
8008    Tmp2 = DAG.getNode(ISD::FSHR, dl, VT, ShOpHi, ShOpLo, ShAmt);
8009    Tmp3 = DAG.getNode(IsSRA ? ISD::SRA : ISD::SRL, dl, VT, ShOpHi, SafeShAmt);
8010  }
8011
8012  // If the shift amount is larger or equal than the width of a part we don't
8013  // use the result from the FSHL/FSHR. Insert a test and select the appropriate
8014  // values for large shift amounts.
8015  SDValue AndNode = DAG.getNode(ISD::AND, dl, ShAmtVT, ShAmt,
8016                                DAG.getConstant(VTBits, dl, ShAmtVT));
8017  SDValue Cond = DAG.getSetCC(dl, ShAmtCCVT, AndNode,
8018                              DAG.getConstant(0, dl, ShAmtVT), ISD::SETNE);
8019
8020  if (IsSHL) {
8021    Hi = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp3, Tmp2);
8022    Lo = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp1, Tmp3);
8023  } else {
8024    Lo = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp3, Tmp2);
8025    Hi = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp1, Tmp3);
8026  }
8027}
8028
8029bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result,
8030                                      SelectionDAG &DAG) const {
8031  unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
8032  SDValue Src = Node->getOperand(OpNo);
8033  EVT SrcVT = Src.getValueType();
8034  EVT DstVT = Node->getValueType(0);
8035  SDLoc dl(SDValue(Node, 0));
8036
8037  // FIXME: Only f32 to i64 conversions are supported.
8038  if (SrcVT != MVT::f32 || DstVT != MVT::i64)
8039    return false;
8040
8041  if (Node->isStrictFPOpcode())
8042    // When a NaN is converted to an integer a trap is allowed. We can't
8043    // use this expansion here because it would eliminate that trap. Other
8044    // traps are also allowed and cannot be eliminated. See
8045    // IEEE 754-2008 sec 5.8.
8046    return false;
8047
8048  // Expand f32 -> i64 conversion
8049  // This algorithm comes from compiler-rt's implementation of fixsfdi:
8050  // https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/fixsfdi.c
8051  unsigned SrcEltBits = SrcVT.getScalarSizeInBits();
8052  EVT IntVT = SrcVT.changeTypeToInteger();
8053  EVT IntShVT = getShiftAmountTy(IntVT, DAG.getDataLayout());
8054
8055  SDValue ExponentMask = DAG.getConstant(0x7F800000, dl, IntVT);
8056  SDValue ExponentLoBit = DAG.getConstant(23, dl, IntVT);
8057  SDValue Bias = DAG.getConstant(127, dl, IntVT);
8058  SDValue SignMask = DAG.getConstant(APInt::getSignMask(SrcEltBits), dl, IntVT);
8059  SDValue SignLowBit = DAG.getConstant(SrcEltBits - 1, dl, IntVT);
8060  SDValue MantissaMask = DAG.getConstant(0x007FFFFF, dl, IntVT);
8061
8062  SDValue Bits = DAG.getNode(ISD::BITCAST, dl, IntVT, Src);
8063
8064  SDValue ExponentBits = DAG.getNode(
8065      ISD::SRL, dl, IntVT, DAG.getNode(ISD::AND, dl, IntVT, Bits, ExponentMask),
8066      DAG.getZExtOrTrunc(ExponentLoBit, dl, IntShVT));
8067  SDValue Exponent = DAG.getNode(ISD::SUB, dl, IntVT, ExponentBits, Bias);
8068
8069  SDValue Sign = DAG.getNode(ISD::SRA, dl, IntVT,
8070                             DAG.getNode(ISD::AND, dl, IntVT, Bits, SignMask),
8071                             DAG.getZExtOrTrunc(SignLowBit, dl, IntShVT));
8072  Sign = DAG.getSExtOrTrunc(Sign, dl, DstVT);
8073
8074  SDValue R = DAG.getNode(ISD::OR, dl, IntVT,
8075                          DAG.getNode(ISD::AND, dl, IntVT, Bits, MantissaMask),
8076                          DAG.getConstant(0x00800000, dl, IntVT));
8077
8078  R = DAG.getZExtOrTrunc(R, dl, DstVT);
8079
8080  R = DAG.getSelectCC(
8081      dl, Exponent, ExponentLoBit,
8082      DAG.getNode(ISD::SHL, dl, DstVT, R,
8083                  DAG.getZExtOrTrunc(
8084                      DAG.getNode(ISD::SUB, dl, IntVT, Exponent, ExponentLoBit),
8085                      dl, IntShVT)),
8086      DAG.getNode(ISD::SRL, dl, DstVT, R,
8087                  DAG.getZExtOrTrunc(
8088                      DAG.getNode(ISD::SUB, dl, IntVT, ExponentLoBit, Exponent),
8089                      dl, IntShVT)),
8090      ISD::SETGT);
8091
8092  SDValue Ret = DAG.getNode(ISD::SUB, dl, DstVT,
8093                            DAG.getNode(ISD::XOR, dl, DstVT, R, Sign), Sign);
8094
8095  Result = DAG.getSelectCC(dl, Exponent, DAG.getConstant(0, dl, IntVT),
8096                           DAG.getConstant(0, dl, DstVT), Ret, ISD::SETLT);
8097  return true;
8098}
8099
8100bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result,
8101                                      SDValue &Chain,
8102                                      SelectionDAG &DAG) const {
8103  SDLoc dl(SDValue(Node, 0));
8104  unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
8105  SDValue Src = Node->getOperand(OpNo);
8106
8107  EVT SrcVT = Src.getValueType();
8108  EVT DstVT = Node->getValueType(0);
8109  EVT SetCCVT =
8110      getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
8111  EVT DstSetCCVT =
8112      getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT);
8113
8114  // Only expand vector types if we have the appropriate vector bit operations.
8115  unsigned SIntOpcode = Node->isStrictFPOpcode() ? ISD::STRICT_FP_TO_SINT :
8116                                                   ISD::FP_TO_SINT;
8117  if (DstVT.isVector() && (!isOperationLegalOrCustom(SIntOpcode, DstVT) ||
8118                           !isOperationLegalOrCustomOrPromote(ISD::XOR, SrcVT)))
8119    return false;
8120
8121  // If the maximum float value is smaller then the signed integer range,
8122  // the destination signmask can't be represented by the float, so we can
8123  // just use FP_TO_SINT directly.
8124  const fltSemantics &APFSem = DAG.EVTToAPFloatSemantics(SrcVT);
8125  APFloat APF(APFSem, APInt::getZero(SrcVT.getScalarSizeInBits()));
8126  APInt SignMask = APInt::getSignMask(DstVT.getScalarSizeInBits());
8127  if (APFloat::opOverflow &
8128      APF.convertFromAPInt(SignMask, false, APFloat::rmNearestTiesToEven)) {
8129    if (Node->isStrictFPOpcode()) {
8130      Result = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
8131                           { Node->getOperand(0), Src });
8132      Chain = Result.getValue(1);
8133    } else
8134      Result = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
8135    return true;
8136  }
8137
8138  // Don't expand it if there isn't cheap fsub instruction.
8139  if (!isOperationLegalOrCustom(
8140          Node->isStrictFPOpcode() ? ISD::STRICT_FSUB : ISD::FSUB, SrcVT))
8141    return false;
8142
8143  SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
8144  SDValue Sel;
8145
8146  if (Node->isStrictFPOpcode()) {
8147    Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT,
8148                       Node->getOperand(0), /*IsSignaling*/ true);
8149    Chain = Sel.getValue(1);
8150  } else {
8151    Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT);
8152  }
8153
8154  bool Strict = Node->isStrictFPOpcode() ||
8155                shouldUseStrictFP_TO_INT(SrcVT, DstVT, /*IsSigned*/ false);
8156
8157  if (Strict) {
8158    // Expand based on maximum range of FP_TO_SINT, if the value exceeds the
8159    // signmask then offset (the result of which should be fully representable).
8160    // Sel = Src < 0x8000000000000000
8161    // FltOfs = select Sel, 0, 0x8000000000000000
8162    // IntOfs = select Sel, 0, 0x8000000000000000
8163    // Result = fp_to_sint(Src - FltOfs) ^ IntOfs
8164
8165    // TODO: Should any fast-math-flags be set for the FSUB?
8166    SDValue FltOfs = DAG.getSelect(dl, SrcVT, Sel,
8167                                   DAG.getConstantFP(0.0, dl, SrcVT), Cst);
8168    Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
8169    SDValue IntOfs = DAG.getSelect(dl, DstVT, Sel,
8170                                   DAG.getConstant(0, dl, DstVT),
8171                                   DAG.getConstant(SignMask, dl, DstVT));
8172    SDValue SInt;
8173    if (Node->isStrictFPOpcode()) {
8174      SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl, { SrcVT, MVT::Other },
8175                                { Chain, Src, FltOfs });
8176      SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
8177                         { Val.getValue(1), Val });
8178      Chain = SInt.getValue(1);
8179    } else {
8180      SDValue Val = DAG.getNode(ISD::FSUB, dl, SrcVT, Src, FltOfs);
8181      SInt = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Val);
8182    }
8183    Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs);
8184  } else {
8185    // Expand based on maximum range of FP_TO_SINT:
8186    // True = fp_to_sint(Src)
8187    // False = 0x8000000000000000 + fp_to_sint(Src - 0x8000000000000000)
8188    // Result = select (Src < 0x8000000000000000), True, False
8189
8190    SDValue True = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
8191    // TODO: Should any fast-math-flags be set for the FSUB?
8192    SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT,
8193                                DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst));
8194    False = DAG.getNode(ISD::XOR, dl, DstVT, False,
8195                        DAG.getConstant(SignMask, dl, DstVT));
8196    Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
8197    Result = DAG.getSelect(dl, DstVT, Sel, True, False);
8198  }
8199  return true;
8200}
8201
8202bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result,
8203                                      SDValue &Chain,
8204                                      SelectionDAG &DAG) const {
8205  // This transform is not correct for converting 0 when rounding mode is set
8206  // to round toward negative infinity which will produce -0.0. So disable under
8207  // strictfp.
8208  if (Node->isStrictFPOpcode())
8209    return false;
8210
8211  SDValue Src = Node->getOperand(0);
8212  EVT SrcVT = Src.getValueType();
8213  EVT DstVT = Node->getValueType(0);
8214
8215  if (SrcVT.getScalarType() != MVT::i64 || DstVT.getScalarType() != MVT::f64)
8216    return false;
8217
8218  // Only expand vector types if we have the appropriate vector bit operations.
8219  if (SrcVT.isVector() && (!isOperationLegalOrCustom(ISD::SRL, SrcVT) ||
8220                           !isOperationLegalOrCustom(ISD::FADD, DstVT) ||
8221                           !isOperationLegalOrCustom(ISD::FSUB, DstVT) ||
8222                           !isOperationLegalOrCustomOrPromote(ISD::OR, SrcVT) ||
8223                           !isOperationLegalOrCustomOrPromote(ISD::AND, SrcVT)))
8224    return false;
8225
8226  SDLoc dl(SDValue(Node, 0));
8227  EVT ShiftVT = getShiftAmountTy(SrcVT, DAG.getDataLayout());
8228
8229  // Implementation of unsigned i64 to f64 following the algorithm in
8230  // __floatundidf in compiler_rt.  This implementation performs rounding
8231  // correctly in all rounding modes with the exception of converting 0
8232  // when rounding toward negative infinity. In that case the fsub will produce
8233  // -0.0. This will be added to +0.0 and produce -0.0 which is incorrect.
8234  SDValue TwoP52 = DAG.getConstant(UINT64_C(0x4330000000000000), dl, SrcVT);
8235  SDValue TwoP84PlusTwoP52 = DAG.getConstantFP(
8236      llvm::bit_cast<double>(UINT64_C(0x4530000000100000)), dl, DstVT);
8237  SDValue TwoP84 = DAG.getConstant(UINT64_C(0x4530000000000000), dl, SrcVT);
8238  SDValue LoMask = DAG.getConstant(UINT64_C(0x00000000FFFFFFFF), dl, SrcVT);
8239  SDValue HiShift = DAG.getConstant(32, dl, ShiftVT);
8240
8241  SDValue Lo = DAG.getNode(ISD::AND, dl, SrcVT, Src, LoMask);
8242  SDValue Hi = DAG.getNode(ISD::SRL, dl, SrcVT, Src, HiShift);
8243  SDValue LoOr = DAG.getNode(ISD::OR, dl, SrcVT, Lo, TwoP52);
8244  SDValue HiOr = DAG.getNode(ISD::OR, dl, SrcVT, Hi, TwoP84);
8245  SDValue LoFlt = DAG.getBitcast(DstVT, LoOr);
8246  SDValue HiFlt = DAG.getBitcast(DstVT, HiOr);
8247  SDValue HiSub =
8248      DAG.getNode(ISD::FSUB, dl, DstVT, HiFlt, TwoP84PlusTwoP52);
8249  Result = DAG.getNode(ISD::FADD, dl, DstVT, LoFlt, HiSub);
8250  return true;
8251}
8252
8253SDValue
8254TargetLowering::createSelectForFMINNUM_FMAXNUM(SDNode *Node,
8255                                               SelectionDAG &DAG) const {
8256  unsigned Opcode = Node->getOpcode();
8257  assert((Opcode == ISD::FMINNUM || Opcode == ISD::FMAXNUM ||
8258          Opcode == ISD::STRICT_FMINNUM || Opcode == ISD::STRICT_FMAXNUM) &&
8259         "Wrong opcode");
8260
8261  if (Node->getFlags().hasNoNaNs()) {
8262    ISD::CondCode Pred = Opcode == ISD::FMINNUM ? ISD::SETLT : ISD::SETGT;
8263    SDValue Op1 = Node->getOperand(0);
8264    SDValue Op2 = Node->getOperand(1);
8265    SDValue SelCC = DAG.getSelectCC(SDLoc(Node), Op1, Op2, Op1, Op2, Pred);
8266    // Copy FMF flags, but always set the no-signed-zeros flag
8267    // as this is implied by the FMINNUM/FMAXNUM semantics.
8268    SDNodeFlags Flags = Node->getFlags();
8269    Flags.setNoSignedZeros(true);
8270    SelCC->setFlags(Flags);
8271    return SelCC;
8272  }
8273
8274  return SDValue();
8275}
8276
8277SDValue TargetLowering::expandFMINNUM_FMAXNUM(SDNode *Node,
8278                                              SelectionDAG &DAG) const {
8279  SDLoc dl(Node);
8280  unsigned NewOp = Node->getOpcode() == ISD::FMINNUM ?
8281    ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
8282  EVT VT = Node->getValueType(0);
8283
8284  if (VT.isScalableVector())
8285    report_fatal_error(
8286        "Expanding fminnum/fmaxnum for scalable vectors is undefined.");
8287
8288  if (isOperationLegalOrCustom(NewOp, VT)) {
8289    SDValue Quiet0 = Node->getOperand(0);
8290    SDValue Quiet1 = Node->getOperand(1);
8291
8292    if (!Node->getFlags().hasNoNaNs()) {
8293      // Insert canonicalizes if it's possible we need to quiet to get correct
8294      // sNaN behavior.
8295      if (!DAG.isKnownNeverSNaN(Quiet0)) {
8296        Quiet0 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet0,
8297                             Node->getFlags());
8298      }
8299      if (!DAG.isKnownNeverSNaN(Quiet1)) {
8300        Quiet1 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet1,
8301                             Node->getFlags());
8302      }
8303    }
8304
8305    return DAG.getNode(NewOp, dl, VT, Quiet0, Quiet1, Node->getFlags());
8306  }
8307
8308  // If the target has FMINIMUM/FMAXIMUM but not FMINNUM/FMAXNUM use that
8309  // instead if there are no NaNs and there can't be an incompatible zero
8310  // compare: at least one operand isn't +/-0, or there are no signed-zeros.
8311  if ((Node->getFlags().hasNoNaNs() ||
8312       (DAG.isKnownNeverNaN(Node->getOperand(0)) &&
8313        DAG.isKnownNeverNaN(Node->getOperand(1)))) &&
8314      (Node->getFlags().hasNoSignedZeros() ||
8315       DAG.isKnownNeverZeroFloat(Node->getOperand(0)) ||
8316       DAG.isKnownNeverZeroFloat(Node->getOperand(1)))) {
8317    unsigned IEEE2018Op =
8318        Node->getOpcode() == ISD::FMINNUM ? ISD::FMINIMUM : ISD::FMAXIMUM;
8319    if (isOperationLegalOrCustom(IEEE2018Op, VT))
8320      return DAG.getNode(IEEE2018Op, dl, VT, Node->getOperand(0),
8321                         Node->getOperand(1), Node->getFlags());
8322  }
8323
8324  if (SDValue SelCC = createSelectForFMINNUM_FMAXNUM(Node, DAG))
8325    return SelCC;
8326
8327  return SDValue();
8328}
8329
8330/// Returns a true value if if this FPClassTest can be performed with an ordered
8331/// fcmp to 0, and a false value if it's an unordered fcmp to 0. Returns
8332/// std::nullopt if it cannot be performed as a compare with 0.
8333static std::optional<bool> isFCmpEqualZero(FPClassTest Test,
8334                                           const fltSemantics &Semantics,
8335                                           const MachineFunction &MF) {
8336  FPClassTest OrderedMask = Test & ~fcNan;
8337  FPClassTest NanTest = Test & fcNan;
8338  bool IsOrdered = NanTest == fcNone;
8339  bool IsUnordered = NanTest == fcNan;
8340
8341  // Skip cases that are testing for only a qnan or snan.
8342  if (!IsOrdered && !IsUnordered)
8343    return std::nullopt;
8344
8345  if (OrderedMask == fcZero &&
8346      MF.getDenormalMode(Semantics).Input == DenormalMode::IEEE)
8347    return IsOrdered;
8348  if (OrderedMask == (fcZero | fcSubnormal) &&
8349      MF.getDenormalMode(Semantics).inputsAreZero())
8350    return IsOrdered;
8351  return std::nullopt;
8352}
8353
8354SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
8355                                         FPClassTest Test, SDNodeFlags Flags,
8356                                         const SDLoc &DL,
8357                                         SelectionDAG &DAG) const {
8358  EVT OperandVT = Op.getValueType();
8359  assert(OperandVT.isFloatingPoint());
8360
8361  // Degenerated cases.
8362  if (Test == fcNone)
8363    return DAG.getBoolConstant(false, DL, ResultVT, OperandVT);
8364  if ((Test & fcAllFlags) == fcAllFlags)
8365    return DAG.getBoolConstant(true, DL, ResultVT, OperandVT);
8366
8367  // PPC double double is a pair of doubles, of which the higher part determines
8368  // the value class.
8369  if (OperandVT == MVT::ppcf128) {
8370    Op = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::f64, Op,
8371                     DAG.getConstant(1, DL, MVT::i32));
8372    OperandVT = MVT::f64;
8373  }
8374
8375  // Some checks may be represented as inversion of simpler check, for example
8376  // "inf|normal|subnormal|zero" => !"nan".
8377  bool IsInverted = false;
8378  if (FPClassTest InvertedCheck = invertFPClassTestIfSimpler(Test)) {
8379    IsInverted = true;
8380    Test = InvertedCheck;
8381  }
8382
8383  // Floating-point type properties.
8384  EVT ScalarFloatVT = OperandVT.getScalarType();
8385  const Type *FloatTy = ScalarFloatVT.getTypeForEVT(*DAG.getContext());
8386  const llvm::fltSemantics &Semantics = FloatTy->getFltSemantics();
8387  bool IsF80 = (ScalarFloatVT == MVT::f80);
8388
8389  // Some checks can be implemented using float comparisons, if floating point
8390  // exceptions are ignored.
8391  if (Flags.hasNoFPExcept() &&
8392      isOperationLegalOrCustom(ISD::SETCC, OperandVT.getScalarType())) {
8393    ISD::CondCode OrderedCmpOpcode = IsInverted ? ISD::SETUNE : ISD::SETOEQ;
8394    ISD::CondCode UnorderedCmpOpcode = IsInverted ? ISD::SETONE : ISD::SETUEQ;
8395
8396    if (std::optional<bool> IsCmp0 =
8397            isFCmpEqualZero(Test, Semantics, DAG.getMachineFunction());
8398        IsCmp0 && (isCondCodeLegalOrCustom(
8399                      *IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode,
8400                      OperandVT.getScalarType().getSimpleVT()))) {
8401
8402      // If denormals could be implicitly treated as 0, this is not equivalent
8403      // to a compare with 0 since it will also be true for denormals.
8404      return DAG.getSetCC(DL, ResultVT, Op,
8405                          DAG.getConstantFP(0.0, DL, OperandVT),
8406                          *IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode);
8407    }
8408
8409    if (Test == fcNan &&
8410        isCondCodeLegalOrCustom(IsInverted ? ISD::SETO : ISD::SETUO,
8411                                OperandVT.getScalarType().getSimpleVT())) {
8412      return DAG.getSetCC(DL, ResultVT, Op, Op,
8413                          IsInverted ? ISD::SETO : ISD::SETUO);
8414    }
8415
8416    if (Test == fcInf &&
8417        isCondCodeLegalOrCustom(IsInverted ? ISD::SETUNE : ISD::SETOEQ,
8418                                OperandVT.getScalarType().getSimpleVT()) &&
8419        isOperationLegalOrCustom(ISD::FABS, OperandVT.getScalarType())) {
8420      // isinf(x) --> fabs(x) == inf
8421      SDValue Abs = DAG.getNode(ISD::FABS, DL, OperandVT, Op);
8422      SDValue Inf =
8423          DAG.getConstantFP(APFloat::getInf(Semantics), DL, OperandVT);
8424      return DAG.getSetCC(DL, ResultVT, Abs, Inf,
8425                          IsInverted ? ISD::SETUNE : ISD::SETOEQ);
8426    }
8427  }
8428
8429  // In the general case use integer operations.
8430  unsigned BitSize = OperandVT.getScalarSizeInBits();
8431  EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), BitSize);
8432  if (OperandVT.isVector())
8433    IntVT = EVT::getVectorVT(*DAG.getContext(), IntVT,
8434                             OperandVT.getVectorElementCount());
8435  SDValue OpAsInt = DAG.getBitcast(IntVT, Op);
8436
8437  // Various masks.
8438  APInt SignBit = APInt::getSignMask(BitSize);
8439  APInt ValueMask = APInt::getSignedMaxValue(BitSize);     // All bits but sign.
8440  APInt Inf = APFloat::getInf(Semantics).bitcastToAPInt(); // Exp and int bit.
8441  const unsigned ExplicitIntBitInF80 = 63;
8442  APInt ExpMask = Inf;
8443  if (IsF80)
8444    ExpMask.clearBit(ExplicitIntBitInF80);
8445  APInt AllOneMantissa = APFloat::getLargest(Semantics).bitcastToAPInt() & ~Inf;
8446  APInt QNaNBitMask =
8447      APInt::getOneBitSet(BitSize, AllOneMantissa.getActiveBits() - 1);
8448  APInt InvertionMask = APInt::getAllOnes(ResultVT.getScalarSizeInBits());
8449
8450  SDValue ValueMaskV = DAG.getConstant(ValueMask, DL, IntVT);
8451  SDValue SignBitV = DAG.getConstant(SignBit, DL, IntVT);
8452  SDValue ExpMaskV = DAG.getConstant(ExpMask, DL, IntVT);
8453  SDValue ZeroV = DAG.getConstant(0, DL, IntVT);
8454  SDValue InfV = DAG.getConstant(Inf, DL, IntVT);
8455  SDValue ResultInvertionMask = DAG.getConstant(InvertionMask, DL, ResultVT);
8456
8457  SDValue Res;
8458  const auto appendResult = [&](SDValue PartialRes) {
8459    if (PartialRes) {
8460      if (Res)
8461        Res = DAG.getNode(ISD::OR, DL, ResultVT, Res, PartialRes);
8462      else
8463        Res = PartialRes;
8464    }
8465  };
8466
8467  SDValue IntBitIsSetV; // Explicit integer bit in f80 mantissa is set.
8468  const auto getIntBitIsSet = [&]() -> SDValue {
8469    if (!IntBitIsSetV) {
8470      APInt IntBitMask(BitSize, 0);
8471      IntBitMask.setBit(ExplicitIntBitInF80);
8472      SDValue IntBitMaskV = DAG.getConstant(IntBitMask, DL, IntVT);
8473      SDValue IntBitV = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, IntBitMaskV);
8474      IntBitIsSetV = DAG.getSetCC(DL, ResultVT, IntBitV, ZeroV, ISD::SETNE);
8475    }
8476    return IntBitIsSetV;
8477  };
8478
8479  // Split the value into sign bit and absolute value.
8480  SDValue AbsV = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, ValueMaskV);
8481  SDValue SignV = DAG.getSetCC(DL, ResultVT, OpAsInt,
8482                               DAG.getConstant(0.0, DL, IntVT), ISD::SETLT);
8483
8484  // Tests that involve more than one class should be processed first.
8485  SDValue PartialRes;
8486
8487  if (IsF80)
8488    ; // Detect finite numbers of f80 by checking individual classes because
8489      // they have different settings of the explicit integer bit.
8490  else if ((Test & fcFinite) == fcFinite) {
8491    // finite(V) ==> abs(V) < exp_mask
8492    PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ExpMaskV, ISD::SETLT);
8493    Test &= ~fcFinite;
8494  } else if ((Test & fcFinite) == fcPosFinite) {
8495    // finite(V) && V > 0 ==> V < exp_mask
8496    PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, ExpMaskV, ISD::SETULT);
8497    Test &= ~fcPosFinite;
8498  } else if ((Test & fcFinite) == fcNegFinite) {
8499    // finite(V) && V < 0 ==> abs(V) < exp_mask && signbit == 1
8500    PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ExpMaskV, ISD::SETLT);
8501    PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
8502    Test &= ~fcNegFinite;
8503  }
8504  appendResult(PartialRes);
8505
8506  if (FPClassTest PartialCheck = Test & (fcZero | fcSubnormal)) {
8507    // fcZero | fcSubnormal => test all exponent bits are 0
8508    // TODO: Handle sign bit specific cases
8509    if (PartialCheck == (fcZero | fcSubnormal)) {
8510      SDValue ExpBits = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, ExpMaskV);
8511      SDValue ExpIsZero =
8512          DAG.getSetCC(DL, ResultVT, ExpBits, ZeroV, ISD::SETEQ);
8513      appendResult(ExpIsZero);
8514      Test &= ~PartialCheck & fcAllFlags;
8515    }
8516  }
8517
8518  // Check for individual classes.
8519
8520  if (unsigned PartialCheck = Test & fcZero) {
8521    if (PartialCheck == fcPosZero)
8522      PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, ZeroV, ISD::SETEQ);
8523    else if (PartialCheck == fcZero)
8524      PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ZeroV, ISD::SETEQ);
8525    else // ISD::fcNegZero
8526      PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, SignBitV, ISD::SETEQ);
8527    appendResult(PartialRes);
8528  }
8529
8530  if (unsigned PartialCheck = Test & fcSubnormal) {
8531    // issubnormal(V) ==> unsigned(abs(V) - 1) < (all mantissa bits set)
8532    // issubnormal(V) && V>0 ==> unsigned(V - 1) < (all mantissa bits set)
8533    SDValue V = (PartialCheck == fcPosSubnormal) ? OpAsInt : AbsV;
8534    SDValue MantissaV = DAG.getConstant(AllOneMantissa, DL, IntVT);
8535    SDValue VMinusOneV =
8536        DAG.getNode(ISD::SUB, DL, IntVT, V, DAG.getConstant(1, DL, IntVT));
8537    PartialRes = DAG.getSetCC(DL, ResultVT, VMinusOneV, MantissaV, ISD::SETULT);
8538    if (PartialCheck == fcNegSubnormal)
8539      PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
8540    appendResult(PartialRes);
8541  }
8542
8543  if (unsigned PartialCheck = Test & fcInf) {
8544    if (PartialCheck == fcPosInf)
8545      PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, InfV, ISD::SETEQ);
8546    else if (PartialCheck == fcInf)
8547      PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETEQ);
8548    else { // ISD::fcNegInf
8549      APInt NegInf = APFloat::getInf(Semantics, true).bitcastToAPInt();
8550      SDValue NegInfV = DAG.getConstant(NegInf, DL, IntVT);
8551      PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, NegInfV, ISD::SETEQ);
8552    }
8553    appendResult(PartialRes);
8554  }
8555
8556  if (unsigned PartialCheck = Test & fcNan) {
8557    APInt InfWithQnanBit = Inf | QNaNBitMask;
8558    SDValue InfWithQnanBitV = DAG.getConstant(InfWithQnanBit, DL, IntVT);
8559    if (PartialCheck == fcNan) {
8560      // isnan(V) ==> abs(V) > int(inf)
8561      PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETGT);
8562      if (IsF80) {
8563        // Recognize unsupported values as NaNs for compatibility with glibc.
8564        // In them (exp(V)==0) == int_bit.
8565        SDValue ExpBits = DAG.getNode(ISD::AND, DL, IntVT, AbsV, ExpMaskV);
8566        SDValue ExpIsZero =
8567            DAG.getSetCC(DL, ResultVT, ExpBits, ZeroV, ISD::SETEQ);
8568        SDValue IsPseudo =
8569            DAG.getSetCC(DL, ResultVT, getIntBitIsSet(), ExpIsZero, ISD::SETEQ);
8570        PartialRes = DAG.getNode(ISD::OR, DL, ResultVT, PartialRes, IsPseudo);
8571      }
8572    } else if (PartialCheck == fcQNan) {
8573      // isquiet(V) ==> abs(V) >= (unsigned(Inf) | quiet_bit)
8574      PartialRes =
8575          DAG.getSetCC(DL, ResultVT, AbsV, InfWithQnanBitV, ISD::SETGE);
8576    } else { // ISD::fcSNan
8577      // issignaling(V) ==> abs(V) > unsigned(Inf) &&
8578      //                    abs(V) < (unsigned(Inf) | quiet_bit)
8579      SDValue IsNan = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETGT);
8580      SDValue IsNotQnan =
8581          DAG.getSetCC(DL, ResultVT, AbsV, InfWithQnanBitV, ISD::SETLT);
8582      PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, IsNan, IsNotQnan);
8583    }
8584    appendResult(PartialRes);
8585  }
8586
8587  if (unsigned PartialCheck = Test & fcNormal) {
8588    // isnormal(V) ==> (0 < exp < max_exp) ==> (unsigned(exp-1) < (max_exp-1))
8589    APInt ExpLSB = ExpMask & ~(ExpMask.shl(1));
8590    SDValue ExpLSBV = DAG.getConstant(ExpLSB, DL, IntVT);
8591    SDValue ExpMinus1 = DAG.getNode(ISD::SUB, DL, IntVT, AbsV, ExpLSBV);
8592    APInt ExpLimit = ExpMask - ExpLSB;
8593    SDValue ExpLimitV = DAG.getConstant(ExpLimit, DL, IntVT);
8594    PartialRes = DAG.getSetCC(DL, ResultVT, ExpMinus1, ExpLimitV, ISD::SETULT);
8595    if (PartialCheck == fcNegNormal)
8596      PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
8597    else if (PartialCheck == fcPosNormal) {
8598      SDValue PosSignV =
8599          DAG.getNode(ISD::XOR, DL, ResultVT, SignV, ResultInvertionMask);
8600      PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, PosSignV);
8601    }
8602    if (IsF80)
8603      PartialRes =
8604          DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, getIntBitIsSet());
8605    appendResult(PartialRes);
8606  }
8607
8608  if (!Res)
8609    return DAG.getConstant(IsInverted, DL, ResultVT);
8610  if (IsInverted)
8611    Res = DAG.getNode(ISD::XOR, DL, ResultVT, Res, ResultInvertionMask);
8612  return Res;
8613}
8614
8615// Only expand vector types if we have the appropriate vector bit operations.
8616static bool canExpandVectorCTPOP(const TargetLowering &TLI, EVT VT) {
8617  assert(VT.isVector() && "Expected vector type");
8618  unsigned Len = VT.getScalarSizeInBits();
8619  return TLI.isOperationLegalOrCustom(ISD::ADD, VT) &&
8620         TLI.isOperationLegalOrCustom(ISD::SUB, VT) &&
8621         TLI.isOperationLegalOrCustom(ISD::SRL, VT) &&
8622         (Len == 8 || TLI.isOperationLegalOrCustom(ISD::MUL, VT)) &&
8623         TLI.isOperationLegalOrCustomOrPromote(ISD::AND, VT);
8624}
8625
8626SDValue TargetLowering::expandCTPOP(SDNode *Node, SelectionDAG &DAG) const {
8627  SDLoc dl(Node);
8628  EVT VT = Node->getValueType(0);
8629  EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
8630  SDValue Op = Node->getOperand(0);
8631  unsigned Len = VT.getScalarSizeInBits();
8632  assert(VT.isInteger() && "CTPOP not implemented for this type.");
8633
8634  // TODO: Add support for irregular type lengths.
8635  if (!(Len <= 128 && Len % 8 == 0))
8636    return SDValue();
8637
8638  // Only expand vector types if we have the appropriate vector bit operations.
8639  if (VT.isVector() && !canExpandVectorCTPOP(*this, VT))
8640    return SDValue();
8641
8642  // This is the "best" algorithm from
8643  // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
8644  SDValue Mask55 =
8645      DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), dl, VT);
8646  SDValue Mask33 =
8647      DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), dl, VT);
8648  SDValue Mask0F =
8649      DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), dl, VT);
8650
8651  // v = v - ((v >> 1) & 0x55555555...)
8652  Op = DAG.getNode(ISD::SUB, dl, VT, Op,
8653                   DAG.getNode(ISD::AND, dl, VT,
8654                               DAG.getNode(ISD::SRL, dl, VT, Op,
8655                                           DAG.getConstant(1, dl, ShVT)),
8656                               Mask55));
8657  // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
8658  Op = DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::AND, dl, VT, Op, Mask33),
8659                   DAG.getNode(ISD::AND, dl, VT,
8660                               DAG.getNode(ISD::SRL, dl, VT, Op,
8661                                           DAG.getConstant(2, dl, ShVT)),
8662                               Mask33));
8663  // v = (v + (v >> 4)) & 0x0F0F0F0F...
8664  Op = DAG.getNode(ISD::AND, dl, VT,
8665                   DAG.getNode(ISD::ADD, dl, VT, Op,
8666                               DAG.getNode(ISD::SRL, dl, VT, Op,
8667                                           DAG.getConstant(4, dl, ShVT))),
8668                   Mask0F);
8669
8670  if (Len <= 8)
8671    return Op;
8672
8673  // Avoid the multiply if we only have 2 bytes to add.
8674  // TODO: Only doing this for scalars because vectors weren't as obviously
8675  // improved.
8676  if (Len == 16 && !VT.isVector()) {
8677    // v = (v + (v >> 8)) & 0x00FF;
8678    return DAG.getNode(ISD::AND, dl, VT,
8679                     DAG.getNode(ISD::ADD, dl, VT, Op,
8680                                 DAG.getNode(ISD::SRL, dl, VT, Op,
8681                                             DAG.getConstant(8, dl, ShVT))),
8682                     DAG.getConstant(0xFF, dl, VT));
8683  }
8684
8685  // v = (v * 0x01010101...) >> (Len - 8)
8686  SDValue Mask01 =
8687      DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT);
8688  return DAG.getNode(ISD::SRL, dl, VT,
8689                     DAG.getNode(ISD::MUL, dl, VT, Op, Mask01),
8690                     DAG.getConstant(Len - 8, dl, ShVT));
8691}
8692
8693SDValue TargetLowering::expandVPCTPOP(SDNode *Node, SelectionDAG &DAG) const {
8694  SDLoc dl(Node);
8695  EVT VT = Node->getValueType(0);
8696  EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
8697  SDValue Op = Node->getOperand(0);
8698  SDValue Mask = Node->getOperand(1);
8699  SDValue VL = Node->getOperand(2);
8700  unsigned Len = VT.getScalarSizeInBits();
8701  assert(VT.isInteger() && "VP_CTPOP not implemented for this type.");
8702
8703  // TODO: Add support for irregular type lengths.
8704  if (!(Len <= 128 && Len % 8 == 0))
8705    return SDValue();
8706
8707  // This is same algorithm of expandCTPOP from
8708  // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
8709  SDValue Mask55 =
8710      DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), dl, VT);
8711  SDValue Mask33 =
8712      DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), dl, VT);
8713  SDValue Mask0F =
8714      DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), dl, VT);
8715
8716  SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5;
8717
8718  // v = v - ((v >> 1) & 0x55555555...)
8719  Tmp1 = DAG.getNode(ISD::VP_AND, dl, VT,
8720                     DAG.getNode(ISD::VP_LSHR, dl, VT, Op,
8721                                 DAG.getConstant(1, dl, ShVT), Mask, VL),
8722                     Mask55, Mask, VL);
8723  Op = DAG.getNode(ISD::VP_SUB, dl, VT, Op, Tmp1, Mask, VL);
8724
8725  // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
8726  Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Op, Mask33, Mask, VL);
8727  Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT,
8728                     DAG.getNode(ISD::VP_LSHR, dl, VT, Op,
8729                                 DAG.getConstant(2, dl, ShVT), Mask, VL),
8730                     Mask33, Mask, VL);
8731  Op = DAG.getNode(ISD::VP_ADD, dl, VT, Tmp2, Tmp3, Mask, VL);
8732
8733  // v = (v + (v >> 4)) & 0x0F0F0F0F...
8734  Tmp4 = DAG.getNode(ISD::VP_LSHR, dl, VT, Op, DAG.getConstant(4, dl, ShVT),
8735                     Mask, VL),
8736  Tmp5 = DAG.getNode(ISD::VP_ADD, dl, VT, Op, Tmp4, Mask, VL);
8737  Op = DAG.getNode(ISD::VP_AND, dl, VT, Tmp5, Mask0F, Mask, VL);
8738
8739  if (Len <= 8)
8740    return Op;
8741
8742  // v = (v * 0x01010101...) >> (Len - 8)
8743  SDValue Mask01 =
8744      DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT);
8745  return DAG.getNode(ISD::VP_LSHR, dl, VT,
8746                     DAG.getNode(ISD::VP_MUL, dl, VT, Op, Mask01, Mask, VL),
8747                     DAG.getConstant(Len - 8, dl, ShVT), Mask, VL);
8748}
8749
8750SDValue TargetLowering::expandCTLZ(SDNode *Node, SelectionDAG &DAG) const {
8751  SDLoc dl(Node);
8752  EVT VT = Node->getValueType(0);
8753  EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
8754  SDValue Op = Node->getOperand(0);
8755  unsigned NumBitsPerElt = VT.getScalarSizeInBits();
8756
8757  // If the non-ZERO_UNDEF version is supported we can use that instead.
8758  if (Node->getOpcode() == ISD::CTLZ_ZERO_UNDEF &&
8759      isOperationLegalOrCustom(ISD::CTLZ, VT))
8760    return DAG.getNode(ISD::CTLZ, dl, VT, Op);
8761
8762  // If the ZERO_UNDEF version is supported use that and handle the zero case.
8763  if (isOperationLegalOrCustom(ISD::CTLZ_ZERO_UNDEF, VT)) {
8764    EVT SetCCVT =
8765        getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
8766    SDValue CTLZ = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, dl, VT, Op);
8767    SDValue Zero = DAG.getConstant(0, dl, VT);
8768    SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
8769    return DAG.getSelect(dl, VT, SrcIsZero,
8770                         DAG.getConstant(NumBitsPerElt, dl, VT), CTLZ);
8771  }
8772
8773  // Only expand vector types if we have the appropriate vector bit operations.
8774  // This includes the operations needed to expand CTPOP if it isn't supported.
8775  if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
8776                        (!isOperationLegalOrCustom(ISD::CTPOP, VT) &&
8777                         !canExpandVectorCTPOP(*this, VT)) ||
8778                        !isOperationLegalOrCustom(ISD::SRL, VT) ||
8779                        !isOperationLegalOrCustomOrPromote(ISD::OR, VT)))
8780    return SDValue();
8781
8782  // for now, we do this:
8783  // x = x | (x >> 1);
8784  // x = x | (x >> 2);
8785  // ...
8786  // x = x | (x >>16);
8787  // x = x | (x >>32); // for 64-bit input
8788  // return popcount(~x);
8789  //
8790  // Ref: "Hacker's Delight" by Henry Warren
8791  for (unsigned i = 0; (1U << i) < NumBitsPerElt; ++i) {
8792    SDValue Tmp = DAG.getConstant(1ULL << i, dl, ShVT);
8793    Op = DAG.getNode(ISD::OR, dl, VT, Op,
8794                     DAG.getNode(ISD::SRL, dl, VT, Op, Tmp));
8795  }
8796  Op = DAG.getNOT(dl, Op, VT);
8797  return DAG.getNode(ISD::CTPOP, dl, VT, Op);
8798}
8799
8800SDValue TargetLowering::expandVPCTLZ(SDNode *Node, SelectionDAG &DAG) const {
8801  SDLoc dl(Node);
8802  EVT VT = Node->getValueType(0);
8803  EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
8804  SDValue Op = Node->getOperand(0);
8805  SDValue Mask = Node->getOperand(1);
8806  SDValue VL = Node->getOperand(2);
8807  unsigned NumBitsPerElt = VT.getScalarSizeInBits();
8808
8809  // do this:
8810  // x = x | (x >> 1);
8811  // x = x | (x >> 2);
8812  // ...
8813  // x = x | (x >>16);
8814  // x = x | (x >>32); // for 64-bit input
8815  // return popcount(~x);
8816  for (unsigned i = 0; (1U << i) < NumBitsPerElt; ++i) {
8817    SDValue Tmp = DAG.getConstant(1ULL << i, dl, ShVT);
8818    Op = DAG.getNode(ISD::VP_OR, dl, VT, Op,
8819                     DAG.getNode(ISD::VP_LSHR, dl, VT, Op, Tmp, Mask, VL), Mask,
8820                     VL);
8821  }
8822  Op = DAG.getNode(ISD::VP_XOR, dl, VT, Op, DAG.getConstant(-1, dl, VT), Mask,
8823                   VL);
8824  return DAG.getNode(ISD::VP_CTPOP, dl, VT, Op, Mask, VL);
8825}
8826
8827SDValue TargetLowering::CTTZTableLookup(SDNode *Node, SelectionDAG &DAG,
8828                                        const SDLoc &DL, EVT VT, SDValue Op,
8829                                        unsigned BitWidth) const {
8830  if (BitWidth != 32 && BitWidth != 64)
8831    return SDValue();
8832  APInt DeBruijn = BitWidth == 32 ? APInt(32, 0x077CB531U)
8833                                  : APInt(64, 0x0218A392CD3D5DBFULL);
8834  const DataLayout &TD = DAG.getDataLayout();
8835  MachinePointerInfo PtrInfo =
8836      MachinePointerInfo::getConstantPool(DAG.getMachineFunction());
8837  unsigned ShiftAmt = BitWidth - Log2_32(BitWidth);
8838  SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
8839  SDValue Lookup = DAG.getNode(
8840      ISD::SRL, DL, VT,
8841      DAG.getNode(ISD::MUL, DL, VT, DAG.getNode(ISD::AND, DL, VT, Op, Neg),
8842                  DAG.getConstant(DeBruijn, DL, VT)),
8843      DAG.getConstant(ShiftAmt, DL, VT));
8844  Lookup = DAG.getSExtOrTrunc(Lookup, DL, getPointerTy(TD));
8845
8846  SmallVector<uint8_t> Table(BitWidth, 0);
8847  for (unsigned i = 0; i < BitWidth; i++) {
8848    APInt Shl = DeBruijn.shl(i);
8849    APInt Lshr = Shl.lshr(ShiftAmt);
8850    Table[Lshr.getZExtValue()] = i;
8851  }
8852
8853  // Create a ConstantArray in Constant Pool
8854  auto *CA = ConstantDataArray::get(*DAG.getContext(), Table);
8855  SDValue CPIdx = DAG.getConstantPool(CA, getPointerTy(TD),
8856                                      TD.getPrefTypeAlign(CA->getType()));
8857  SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, DL, VT, DAG.getEntryNode(),
8858                                   DAG.getMemBasePlusOffset(CPIdx, Lookup, DL),
8859                                   PtrInfo, MVT::i8);
8860  if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF)
8861    return ExtLoad;
8862
8863  EVT SetCCVT =
8864      getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
8865  SDValue Zero = DAG.getConstant(0, DL, VT);
8866  SDValue SrcIsZero = DAG.getSetCC(DL, SetCCVT, Op, Zero, ISD::SETEQ);
8867  return DAG.getSelect(DL, VT, SrcIsZero,
8868                       DAG.getConstant(BitWidth, DL, VT), ExtLoad);
8869}
8870
8871SDValue TargetLowering::expandCTTZ(SDNode *Node, SelectionDAG &DAG) const {
8872  SDLoc dl(Node);
8873  EVT VT = Node->getValueType(0);
8874  SDValue Op = Node->getOperand(0);
8875  unsigned NumBitsPerElt = VT.getScalarSizeInBits();
8876
8877  // If the non-ZERO_UNDEF version is supported we can use that instead.
8878  if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF &&
8879      isOperationLegalOrCustom(ISD::CTTZ, VT))
8880    return DAG.getNode(ISD::CTTZ, dl, VT, Op);
8881
8882  // If the ZERO_UNDEF version is supported use that and handle the zero case.
8883  if (isOperationLegalOrCustom(ISD::CTTZ_ZERO_UNDEF, VT)) {
8884    EVT SetCCVT =
8885        getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
8886    SDValue CTTZ = DAG.getNode(ISD::CTTZ_ZERO_UNDEF, dl, VT, Op);
8887    SDValue Zero = DAG.getConstant(0, dl, VT);
8888    SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
8889    return DAG.getSelect(dl, VT, SrcIsZero,
8890                         DAG.getConstant(NumBitsPerElt, dl, VT), CTTZ);
8891  }
8892
8893  // Only expand vector types if we have the appropriate vector bit operations.
8894  // This includes the operations needed to expand CTPOP if it isn't supported.
8895  if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
8896                        (!isOperationLegalOrCustom(ISD::CTPOP, VT) &&
8897                         !isOperationLegalOrCustom(ISD::CTLZ, VT) &&
8898                         !canExpandVectorCTPOP(*this, VT)) ||
8899                        !isOperationLegalOrCustom(ISD::SUB, VT) ||
8900                        !isOperationLegalOrCustomOrPromote(ISD::AND, VT) ||
8901                        !isOperationLegalOrCustomOrPromote(ISD::XOR, VT)))
8902    return SDValue();
8903
8904  // Emit Table Lookup if ISD::CTLZ and ISD::CTPOP are not legal.
8905  if (!VT.isVector() && isOperationExpand(ISD::CTPOP, VT) &&
8906      !isOperationLegal(ISD::CTLZ, VT))
8907    if (SDValue V = CTTZTableLookup(Node, DAG, dl, VT, Op, NumBitsPerElt))
8908      return V;
8909
8910  // for now, we use: { return popcount(~x & (x - 1)); }
8911  // unless the target has ctlz but not ctpop, in which case we use:
8912  // { return 32 - nlz(~x & (x-1)); }
8913  // Ref: "Hacker's Delight" by Henry Warren
8914  SDValue Tmp = DAG.getNode(
8915      ISD::AND, dl, VT, DAG.getNOT(dl, Op, VT),
8916      DAG.getNode(ISD::SUB, dl, VT, Op, DAG.getConstant(1, dl, VT)));
8917
8918  // If ISD::CTLZ is legal and CTPOP isn't, then do that instead.
8919  if (isOperationLegal(ISD::CTLZ, VT) && !isOperationLegal(ISD::CTPOP, VT)) {
8920    return DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(NumBitsPerElt, dl, VT),
8921                       DAG.getNode(ISD::CTLZ, dl, VT, Tmp));
8922  }
8923
8924  return DAG.getNode(ISD::CTPOP, dl, VT, Tmp);
8925}
8926
8927SDValue TargetLowering::expandVPCTTZ(SDNode *Node, SelectionDAG &DAG) const {
8928  SDValue Op = Node->getOperand(0);
8929  SDValue Mask = Node->getOperand(1);
8930  SDValue VL = Node->getOperand(2);
8931  SDLoc dl(Node);
8932  EVT VT = Node->getValueType(0);
8933
8934  // Same as the vector part of expandCTTZ, use: popcount(~x & (x - 1))
8935  SDValue Not = DAG.getNode(ISD::VP_XOR, dl, VT, Op,
8936                            DAG.getConstant(-1, dl, VT), Mask, VL);
8937  SDValue MinusOne = DAG.getNode(ISD::VP_SUB, dl, VT, Op,
8938                                 DAG.getConstant(1, dl, VT), Mask, VL);
8939  SDValue Tmp = DAG.getNode(ISD::VP_AND, dl, VT, Not, MinusOne, Mask, VL);
8940  return DAG.getNode(ISD::VP_CTPOP, dl, VT, Tmp, Mask, VL);
8941}
8942
8943SDValue TargetLowering::expandABS(SDNode *N, SelectionDAG &DAG,
8944                                  bool IsNegative) const {
8945  SDLoc dl(N);
8946  EVT VT = N->getValueType(0);
8947  EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
8948  SDValue Op = N->getOperand(0);
8949
8950  // abs(x) -> smax(x,sub(0,x))
8951  if (!IsNegative && isOperationLegal(ISD::SUB, VT) &&
8952      isOperationLegal(ISD::SMAX, VT)) {
8953    SDValue Zero = DAG.getConstant(0, dl, VT);
8954    return DAG.getNode(ISD::SMAX, dl, VT, Op,
8955                       DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
8956  }
8957
8958  // abs(x) -> umin(x,sub(0,x))
8959  if (!IsNegative && isOperationLegal(ISD::SUB, VT) &&
8960      isOperationLegal(ISD::UMIN, VT)) {
8961    SDValue Zero = DAG.getConstant(0, dl, VT);
8962    Op = DAG.getFreeze(Op);
8963    return DAG.getNode(ISD::UMIN, dl, VT, Op,
8964                       DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
8965  }
8966
8967  // 0 - abs(x) -> smin(x, sub(0,x))
8968  if (IsNegative && isOperationLegal(ISD::SUB, VT) &&
8969      isOperationLegal(ISD::SMIN, VT)) {
8970    Op = DAG.getFreeze(Op);
8971    SDValue Zero = DAG.getConstant(0, dl, VT);
8972    return DAG.getNode(ISD::SMIN, dl, VT, Op,
8973                       DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
8974  }
8975
8976  // Only expand vector types if we have the appropriate vector operations.
8977  if (VT.isVector() &&
8978      (!isOperationLegalOrCustom(ISD::SRA, VT) ||
8979       (!IsNegative && !isOperationLegalOrCustom(ISD::ADD, VT)) ||
8980       (IsNegative && !isOperationLegalOrCustom(ISD::SUB, VT)) ||
8981       !isOperationLegalOrCustomOrPromote(ISD::XOR, VT)))
8982    return SDValue();
8983
8984  Op = DAG.getFreeze(Op);
8985  SDValue Shift =
8986      DAG.getNode(ISD::SRA, dl, VT, Op,
8987                  DAG.getConstant(VT.getScalarSizeInBits() - 1, dl, ShVT));
8988  SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Op, Shift);
8989
8990  // abs(x) -> Y = sra (X, size(X)-1); sub (xor (X, Y), Y)
8991  if (!IsNegative)
8992    return DAG.getNode(ISD::SUB, dl, VT, Xor, Shift);
8993
8994  // 0 - abs(x) -> Y = sra (X, size(X)-1); sub (Y, xor (X, Y))
8995  return DAG.getNode(ISD::SUB, dl, VT, Shift, Xor);
8996}
8997
8998SDValue TargetLowering::expandABD(SDNode *N, SelectionDAG &DAG) const {
8999  SDLoc dl(N);
9000  EVT VT = N->getValueType(0);
9001  SDValue LHS = DAG.getFreeze(N->getOperand(0));
9002  SDValue RHS = DAG.getFreeze(N->getOperand(1));
9003  bool IsSigned = N->getOpcode() == ISD::ABDS;
9004
9005  // abds(lhs, rhs) -> sub(smax(lhs,rhs), smin(lhs,rhs))
9006  // abdu(lhs, rhs) -> sub(umax(lhs,rhs), umin(lhs,rhs))
9007  unsigned MaxOpc = IsSigned ? ISD::SMAX : ISD::UMAX;
9008  unsigned MinOpc = IsSigned ? ISD::SMIN : ISD::UMIN;
9009  if (isOperationLegal(MaxOpc, VT) && isOperationLegal(MinOpc, VT)) {
9010    SDValue Max = DAG.getNode(MaxOpc, dl, VT, LHS, RHS);
9011    SDValue Min = DAG.getNode(MinOpc, dl, VT, LHS, RHS);
9012    return DAG.getNode(ISD::SUB, dl, VT, Max, Min);
9013  }
9014
9015  // abdu(lhs, rhs) -> or(usubsat(lhs,rhs), usubsat(rhs,lhs))
9016  if (!IsSigned && isOperationLegal(ISD::USUBSAT, VT))
9017    return DAG.getNode(ISD::OR, dl, VT,
9018                       DAG.getNode(ISD::USUBSAT, dl, VT, LHS, RHS),
9019                       DAG.getNode(ISD::USUBSAT, dl, VT, RHS, LHS));
9020
9021  // abds(lhs, rhs) -> select(sgt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
9022  // abdu(lhs, rhs) -> select(ugt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
9023  EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9024  ISD::CondCode CC = IsSigned ? ISD::CondCode::SETGT : ISD::CondCode::SETUGT;
9025  SDValue Cmp = DAG.getSetCC(dl, CCVT, LHS, RHS, CC);
9026  return DAG.getSelect(dl, VT, Cmp, DAG.getNode(ISD::SUB, dl, VT, LHS, RHS),
9027                       DAG.getNode(ISD::SUB, dl, VT, RHS, LHS));
9028}
9029
9030SDValue TargetLowering::expandBSWAP(SDNode *N, SelectionDAG &DAG) const {
9031  SDLoc dl(N);
9032  EVT VT = N->getValueType(0);
9033  SDValue Op = N->getOperand(0);
9034
9035  if (!VT.isSimple())
9036    return SDValue();
9037
9038  EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
9039  SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
9040  switch (VT.getSimpleVT().getScalarType().SimpleTy) {
9041  default:
9042    return SDValue();
9043  case MVT::i16:
9044    // Use a rotate by 8. This can be further expanded if necessary.
9045    return DAG.getNode(ISD::ROTL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
9046  case MVT::i32:
9047    Tmp4 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
9048    Tmp3 = DAG.getNode(ISD::AND, dl, VT, Op,
9049                       DAG.getConstant(0xFF00, dl, VT));
9050    Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(8, dl, SHVT));
9051    Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
9052    Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(0xFF00, dl, VT));
9053    Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
9054    Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
9055    Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);
9056    return DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);
9057  case MVT::i64:
9058    Tmp8 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(56, dl, SHVT));
9059    Tmp7 = DAG.getNode(ISD::AND, dl, VT, Op,
9060                       DAG.getConstant(255ULL<<8, dl, VT));
9061    Tmp7 = DAG.getNode(ISD::SHL, dl, VT, Tmp7, DAG.getConstant(40, dl, SHVT));
9062    Tmp6 = DAG.getNode(ISD::AND, dl, VT, Op,
9063                       DAG.getConstant(255ULL<<16, dl, VT));
9064    Tmp6 = DAG.getNode(ISD::SHL, dl, VT, Tmp6, DAG.getConstant(24, dl, SHVT));
9065    Tmp5 = DAG.getNode(ISD::AND, dl, VT, Op,
9066                       DAG.getConstant(255ULL<<24, dl, VT));
9067    Tmp5 = DAG.getNode(ISD::SHL, dl, VT, Tmp5, DAG.getConstant(8, dl, SHVT));
9068    Tmp4 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
9069    Tmp4 = DAG.getNode(ISD::AND, dl, VT, Tmp4,
9070                       DAG.getConstant(255ULL<<24, dl, VT));
9071    Tmp3 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
9072    Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3,
9073                       DAG.getConstant(255ULL<<16, dl, VT));
9074    Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(40, dl, SHVT));
9075    Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2,
9076                       DAG.getConstant(255ULL<<8, dl, VT));
9077    Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(56, dl, SHVT));
9078    Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp7);
9079    Tmp6 = DAG.getNode(ISD::OR, dl, VT, Tmp6, Tmp5);
9080    Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
9081    Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);
9082    Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp6);
9083    Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);
9084    return DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp4);
9085  }
9086}
9087
9088SDValue TargetLowering::expandVPBSWAP(SDNode *N, SelectionDAG &DAG) const {
9089  SDLoc dl(N);
9090  EVT VT = N->getValueType(0);
9091  SDValue Op = N->getOperand(0);
9092  SDValue Mask = N->getOperand(1);
9093  SDValue EVL = N->getOperand(2);
9094
9095  if (!VT.isSimple())
9096    return SDValue();
9097
9098  EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
9099  SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
9100  switch (VT.getSimpleVT().getScalarType().SimpleTy) {
9101  default:
9102    return SDValue();
9103  case MVT::i16:
9104    Tmp1 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
9105                       Mask, EVL);
9106    Tmp2 = DAG.getNode(ISD::VP_LSHR, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
9107                       Mask, EVL);
9108    return DAG.getNode(ISD::VP_OR, dl, VT, Tmp1, Tmp2, Mask, EVL);
9109  case MVT::i32:
9110    Tmp4 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
9111                       Mask, EVL);
9112    Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Op, DAG.getConstant(0xFF00, dl, VT),
9113                       Mask, EVL);
9114    Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(8, dl, SHVT),
9115                       Mask, EVL);
9116    Tmp2 = DAG.getNode(ISD::VP_LSHR, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
9117                       Mask, EVL);
9118    Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
9119                       DAG.getConstant(0xFF00, dl, VT), Mask, EVL);
9120    Tmp1 = DAG.getNode(ISD::VP_LSHR, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
9121                       Mask, EVL);
9122    Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp3, Mask, EVL);
9123    Tmp2 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp1, Mask, EVL);
9124    return DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp2, Mask, EVL);
9125  case MVT::i64:
9126    Tmp8 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(56, dl, SHVT),
9127                       Mask, EVL);
9128    Tmp7 = DAG.getNode(ISD::VP_AND, dl, VT, Op,
9129                       DAG.getConstant(255ULL << 8, dl, VT), Mask, EVL);
9130    Tmp7 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp7, DAG.getConstant(40, dl, SHVT),
9131                       Mask, EVL);
9132    Tmp6 = DAG.getNode(ISD::VP_AND, dl, VT, Op,
9133                       DAG.getConstant(255ULL << 16, dl, VT), Mask, EVL);
9134    Tmp6 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp6, DAG.getConstant(24, dl, SHVT),
9135                       Mask, EVL);
9136    Tmp5 = DAG.getNode(ISD::VP_AND, dl, VT, Op,
9137                       DAG.getConstant(255ULL << 24, dl, VT), Mask, EVL);
9138    Tmp5 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp5, DAG.getConstant(8, dl, SHVT),
9139                       Mask, EVL);
9140    Tmp4 = DAG.getNode(ISD::VP_LSHR, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
9141                       Mask, EVL);
9142    Tmp4 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp4,
9143                       DAG.getConstant(255ULL << 24, dl, VT), Mask, EVL);
9144    Tmp3 = DAG.getNode(ISD::VP_LSHR, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
9145                       Mask, EVL);
9146    Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp3,
9147                       DAG.getConstant(255ULL << 16, dl, VT), Mask, EVL);
9148    Tmp2 = DAG.getNode(ISD::VP_LSHR, dl, VT, Op, DAG.getConstant(40, dl, SHVT),
9149                       Mask, EVL);
9150    Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
9151                       DAG.getConstant(255ULL << 8, dl, VT), Mask, EVL);
9152    Tmp1 = DAG.getNode(ISD::VP_LSHR, dl, VT, Op, DAG.getConstant(56, dl, SHVT),
9153                       Mask, EVL);
9154    Tmp8 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp7, Mask, EVL);
9155    Tmp6 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp6, Tmp5, Mask, EVL);
9156    Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp3, Mask, EVL);
9157    Tmp2 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp1, Mask, EVL);
9158    Tmp8 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp6, Mask, EVL);
9159    Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp2, Mask, EVL);
9160    return DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp4, Mask, EVL);
9161  }
9162}
9163
9164SDValue TargetLowering::expandBITREVERSE(SDNode *N, SelectionDAG &DAG) const {
9165  SDLoc dl(N);
9166  EVT VT = N->getValueType(0);
9167  SDValue Op = N->getOperand(0);
9168  EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
9169  unsigned Sz = VT.getScalarSizeInBits();
9170
9171  SDValue Tmp, Tmp2, Tmp3;
9172
9173  // If we can, perform BSWAP first and then the mask+swap the i4, then i2
9174  // and finally the i1 pairs.
9175  // TODO: We can easily support i4/i2 legal types if any target ever does.
9176  if (Sz >= 8 && isPowerOf2_32(Sz)) {
9177    // Create the masks - repeating the pattern every byte.
9178    APInt Mask4 = APInt::getSplat(Sz, APInt(8, 0x0F));
9179    APInt Mask2 = APInt::getSplat(Sz, APInt(8, 0x33));
9180    APInt Mask1 = APInt::getSplat(Sz, APInt(8, 0x55));
9181
9182    // BSWAP if the type is wider than a single byte.
9183    Tmp = (Sz > 8 ? DAG.getNode(ISD::BSWAP, dl, VT, Op) : Op);
9184
9185    // swap i4: ((V >> 4) & 0x0F) | ((V & 0x0F) << 4)
9186    Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(4, dl, SHVT));
9187    Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask4, dl, VT));
9188    Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask4, dl, VT));
9189    Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(4, dl, SHVT));
9190    Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9191
9192    // swap i2: ((V >> 2) & 0x33) | ((V & 0x33) << 2)
9193    Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(2, dl, SHVT));
9194    Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask2, dl, VT));
9195    Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask2, dl, VT));
9196    Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(2, dl, SHVT));
9197    Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9198
9199    // swap i1: ((V >> 1) & 0x55) | ((V & 0x55) << 1)
9200    Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(1, dl, SHVT));
9201    Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask1, dl, VT));
9202    Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask1, dl, VT));
9203    Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(1, dl, SHVT));
9204    Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9205    return Tmp;
9206  }
9207
9208  Tmp = DAG.getConstant(0, dl, VT);
9209  for (unsigned I = 0, J = Sz-1; I < Sz; ++I, --J) {
9210    if (I < J)
9211      Tmp2 =
9212          DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(J - I, dl, SHVT));
9213    else
9214      Tmp2 =
9215          DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(I - J, dl, SHVT));
9216
9217    APInt Shift = APInt::getOneBitSet(Sz, J);
9218    Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Shift, dl, VT));
9219    Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp, Tmp2);
9220  }
9221
9222  return Tmp;
9223}
9224
9225SDValue TargetLowering::expandVPBITREVERSE(SDNode *N, SelectionDAG &DAG) const {
9226  assert(N->getOpcode() == ISD::VP_BITREVERSE);
9227
9228  SDLoc dl(N);
9229  EVT VT = N->getValueType(0);
9230  SDValue Op = N->getOperand(0);
9231  SDValue Mask = N->getOperand(1);
9232  SDValue EVL = N->getOperand(2);
9233  EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
9234  unsigned Sz = VT.getScalarSizeInBits();
9235
9236  SDValue Tmp, Tmp2, Tmp3;
9237
9238  // If we can, perform BSWAP first and then the mask+swap the i4, then i2
9239  // and finally the i1 pairs.
9240  // TODO: We can easily support i4/i2 legal types if any target ever does.
9241  if (Sz >= 8 && isPowerOf2_32(Sz)) {
9242    // Create the masks - repeating the pattern every byte.
9243    APInt Mask4 = APInt::getSplat(Sz, APInt(8, 0x0F));
9244    APInt Mask2 = APInt::getSplat(Sz, APInt(8, 0x33));
9245    APInt Mask1 = APInt::getSplat(Sz, APInt(8, 0x55));
9246
9247    // BSWAP if the type is wider than a single byte.
9248    Tmp = (Sz > 8 ? DAG.getNode(ISD::VP_BSWAP, dl, VT, Op, Mask, EVL) : Op);
9249
9250    // swap i4: ((V >> 4) & 0x0F) | ((V & 0x0F) << 4)
9251    Tmp2 = DAG.getNode(ISD::VP_LSHR, dl, VT, Tmp, DAG.getConstant(4, dl, SHVT),
9252                       Mask, EVL);
9253    Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
9254                       DAG.getConstant(Mask4, dl, VT), Mask, EVL);
9255    Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask4, dl, VT),
9256                       Mask, EVL);
9257    Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(4, dl, SHVT),
9258                       Mask, EVL);
9259    Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
9260
9261    // swap i2: ((V >> 2) & 0x33) | ((V & 0x33) << 2)
9262    Tmp2 = DAG.getNode(ISD::VP_LSHR, dl, VT, Tmp, DAG.getConstant(2, dl, SHVT),
9263                       Mask, EVL);
9264    Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
9265                       DAG.getConstant(Mask2, dl, VT), Mask, EVL);
9266    Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask2, dl, VT),
9267                       Mask, EVL);
9268    Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(2, dl, SHVT),
9269                       Mask, EVL);
9270    Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
9271
9272    // swap i1: ((V >> 1) & 0x55) | ((V & 0x55) << 1)
9273    Tmp2 = DAG.getNode(ISD::VP_LSHR, dl, VT, Tmp, DAG.getConstant(1, dl, SHVT),
9274                       Mask, EVL);
9275    Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
9276                       DAG.getConstant(Mask1, dl, VT), Mask, EVL);
9277    Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask1, dl, VT),
9278                       Mask, EVL);
9279    Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(1, dl, SHVT),
9280                       Mask, EVL);
9281    Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
9282    return Tmp;
9283  }
9284  return SDValue();
9285}
9286
9287std::pair<SDValue, SDValue>
9288TargetLowering::scalarizeVectorLoad(LoadSDNode *LD,
9289                                    SelectionDAG &DAG) const {
9290  SDLoc SL(LD);
9291  SDValue Chain = LD->getChain();
9292  SDValue BasePTR = LD->getBasePtr();
9293  EVT SrcVT = LD->getMemoryVT();
9294  EVT DstVT = LD->getValueType(0);
9295  ISD::LoadExtType ExtType = LD->getExtensionType();
9296
9297  if (SrcVT.isScalableVector())
9298    report_fatal_error("Cannot scalarize scalable vector loads");
9299
9300  unsigned NumElem = SrcVT.getVectorNumElements();
9301
9302  EVT SrcEltVT = SrcVT.getScalarType();
9303  EVT DstEltVT = DstVT.getScalarType();
9304
9305  // A vector must always be stored in memory as-is, i.e. without any padding
9306  // between the elements, since various code depend on it, e.g. in the
9307  // handling of a bitcast of a vector type to int, which may be done with a
9308  // vector store followed by an integer load. A vector that does not have
9309  // elements that are byte-sized must therefore be stored as an integer
9310  // built out of the extracted vector elements.
9311  if (!SrcEltVT.isByteSized()) {
9312    unsigned NumLoadBits = SrcVT.getStoreSizeInBits();
9313    EVT LoadVT = EVT::getIntegerVT(*DAG.getContext(), NumLoadBits);
9314
9315    unsigned NumSrcBits = SrcVT.getSizeInBits();
9316    EVT SrcIntVT = EVT::getIntegerVT(*DAG.getContext(), NumSrcBits);
9317
9318    unsigned SrcEltBits = SrcEltVT.getSizeInBits();
9319    SDValue SrcEltBitMask = DAG.getConstant(
9320        APInt::getLowBitsSet(NumLoadBits, SrcEltBits), SL, LoadVT);
9321
9322    // Load the whole vector and avoid masking off the top bits as it makes
9323    // the codegen worse.
9324    SDValue Load =
9325        DAG.getExtLoad(ISD::EXTLOAD, SL, LoadVT, Chain, BasePTR,
9326                       LD->getPointerInfo(), SrcIntVT, LD->getOriginalAlign(),
9327                       LD->getMemOperand()->getFlags(), LD->getAAInfo());
9328
9329    SmallVector<SDValue, 8> Vals;
9330    for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
9331      unsigned ShiftIntoIdx =
9332          (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
9333      SDValue ShiftAmount =
9334          DAG.getShiftAmountConstant(ShiftIntoIdx * SrcEltVT.getSizeInBits(),
9335                                     LoadVT, SL, /*LegalTypes=*/false);
9336      SDValue ShiftedElt = DAG.getNode(ISD::SRL, SL, LoadVT, Load, ShiftAmount);
9337      SDValue Elt =
9338          DAG.getNode(ISD::AND, SL, LoadVT, ShiftedElt, SrcEltBitMask);
9339      SDValue Scalar = DAG.getNode(ISD::TRUNCATE, SL, SrcEltVT, Elt);
9340
9341      if (ExtType != ISD::NON_EXTLOAD) {
9342        unsigned ExtendOp = ISD::getExtForLoadExtType(false, ExtType);
9343        Scalar = DAG.getNode(ExtendOp, SL, DstEltVT, Scalar);
9344      }
9345
9346      Vals.push_back(Scalar);
9347    }
9348
9349    SDValue Value = DAG.getBuildVector(DstVT, SL, Vals);
9350    return std::make_pair(Value, Load.getValue(1));
9351  }
9352
9353  unsigned Stride = SrcEltVT.getSizeInBits() / 8;
9354  assert(SrcEltVT.isByteSized());
9355
9356  SmallVector<SDValue, 8> Vals;
9357  SmallVector<SDValue, 8> LoadChains;
9358
9359  for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
9360    SDValue ScalarLoad =
9361        DAG.getExtLoad(ExtType, SL, DstEltVT, Chain, BasePTR,
9362                       LD->getPointerInfo().getWithOffset(Idx * Stride),
9363                       SrcEltVT, LD->getOriginalAlign(),
9364                       LD->getMemOperand()->getFlags(), LD->getAAInfo());
9365
9366    BasePTR = DAG.getObjectPtrOffset(SL, BasePTR, TypeSize::getFixed(Stride));
9367
9368    Vals.push_back(ScalarLoad.getValue(0));
9369    LoadChains.push_back(ScalarLoad.getValue(1));
9370  }
9371
9372  SDValue NewChain = DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoadChains);
9373  SDValue Value = DAG.getBuildVector(DstVT, SL, Vals);
9374
9375  return std::make_pair(Value, NewChain);
9376}
9377
9378SDValue TargetLowering::scalarizeVectorStore(StoreSDNode *ST,
9379                                             SelectionDAG &DAG) const {
9380  SDLoc SL(ST);
9381
9382  SDValue Chain = ST->getChain();
9383  SDValue BasePtr = ST->getBasePtr();
9384  SDValue Value = ST->getValue();
9385  EVT StVT = ST->getMemoryVT();
9386
9387  if (StVT.isScalableVector())
9388    report_fatal_error("Cannot scalarize scalable vector stores");
9389
9390  // The type of the data we want to save
9391  EVT RegVT = Value.getValueType();
9392  EVT RegSclVT = RegVT.getScalarType();
9393
9394  // The type of data as saved in memory.
9395  EVT MemSclVT = StVT.getScalarType();
9396
9397  unsigned NumElem = StVT.getVectorNumElements();
9398
9399  // A vector must always be stored in memory as-is, i.e. without any padding
9400  // between the elements, since various code depend on it, e.g. in the
9401  // handling of a bitcast of a vector type to int, which may be done with a
9402  // vector store followed by an integer load. A vector that does not have
9403  // elements that are byte-sized must therefore be stored as an integer
9404  // built out of the extracted vector elements.
9405  if (!MemSclVT.isByteSized()) {
9406    unsigned NumBits = StVT.getSizeInBits();
9407    EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), NumBits);
9408
9409    SDValue CurrVal = DAG.getConstant(0, SL, IntVT);
9410
9411    for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
9412      SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, RegSclVT, Value,
9413                                DAG.getVectorIdxConstant(Idx, SL));
9414      SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, MemSclVT, Elt);
9415      SDValue ExtElt = DAG.getNode(ISD::ZERO_EXTEND, SL, IntVT, Trunc);
9416      unsigned ShiftIntoIdx =
9417          (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
9418      SDValue ShiftAmount =
9419          DAG.getConstant(ShiftIntoIdx * MemSclVT.getSizeInBits(), SL, IntVT);
9420      SDValue ShiftedElt =
9421          DAG.getNode(ISD::SHL, SL, IntVT, ExtElt, ShiftAmount);
9422      CurrVal = DAG.getNode(ISD::OR, SL, IntVT, CurrVal, ShiftedElt);
9423    }
9424
9425    return DAG.getStore(Chain, SL, CurrVal, BasePtr, ST->getPointerInfo(),
9426                        ST->getOriginalAlign(), ST->getMemOperand()->getFlags(),
9427                        ST->getAAInfo());
9428  }
9429
9430  // Store Stride in bytes
9431  unsigned Stride = MemSclVT.getSizeInBits() / 8;
9432  assert(Stride && "Zero stride!");
9433  // Extract each of the elements from the original vector and save them into
9434  // memory individually.
9435  SmallVector<SDValue, 8> Stores;
9436  for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
9437    SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, RegSclVT, Value,
9438                              DAG.getVectorIdxConstant(Idx, SL));
9439
9440    SDValue Ptr =
9441        DAG.getObjectPtrOffset(SL, BasePtr, TypeSize::getFixed(Idx * Stride));
9442
9443    // This scalar TruncStore may be illegal, but we legalize it later.
9444    SDValue Store = DAG.getTruncStore(
9445        Chain, SL, Elt, Ptr, ST->getPointerInfo().getWithOffset(Idx * Stride),
9446        MemSclVT, ST->getOriginalAlign(), ST->getMemOperand()->getFlags(),
9447        ST->getAAInfo());
9448
9449    Stores.push_back(Store);
9450  }
9451
9452  return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, Stores);
9453}
9454
9455std::pair<SDValue, SDValue>
9456TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const {
9457  assert(LD->getAddressingMode() == ISD::UNINDEXED &&
9458         "unaligned indexed loads not implemented!");
9459  SDValue Chain = LD->getChain();
9460  SDValue Ptr = LD->getBasePtr();
9461  EVT VT = LD->getValueType(0);
9462  EVT LoadedVT = LD->getMemoryVT();
9463  SDLoc dl(LD);
9464  auto &MF = DAG.getMachineFunction();
9465
9466  if (VT.isFloatingPoint() || VT.isVector()) {
9467    EVT intVT = EVT::getIntegerVT(*DAG.getContext(), LoadedVT.getSizeInBits());
9468    if (isTypeLegal(intVT) && isTypeLegal(LoadedVT)) {
9469      if (!isOperationLegalOrCustom(ISD::LOAD, intVT) &&
9470          LoadedVT.isVector()) {
9471        // Scalarize the load and let the individual components be handled.
9472        return scalarizeVectorLoad(LD, DAG);
9473      }
9474
9475      // Expand to a (misaligned) integer load of the same size,
9476      // then bitconvert to floating point or vector.
9477      SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr,
9478                                    LD->getMemOperand());
9479      SDValue Result = DAG.getNode(ISD::BITCAST, dl, LoadedVT, newLoad);
9480      if (LoadedVT != VT)
9481        Result = DAG.getNode(VT.isFloatingPoint() ? ISD::FP_EXTEND :
9482                             ISD::ANY_EXTEND, dl, VT, Result);
9483
9484      return std::make_pair(Result, newLoad.getValue(1));
9485    }
9486
9487    // Copy the value to a (aligned) stack slot using (unaligned) integer
9488    // loads and stores, then do a (aligned) load from the stack slot.
9489    MVT RegVT = getRegisterType(*DAG.getContext(), intVT);
9490    unsigned LoadedBytes = LoadedVT.getStoreSize();
9491    unsigned RegBytes = RegVT.getSizeInBits() / 8;
9492    unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes;
9493
9494    // Make sure the stack slot is also aligned for the register type.
9495    SDValue StackBase = DAG.CreateStackTemporary(LoadedVT, RegVT);
9496    auto FrameIndex = cast<FrameIndexSDNode>(StackBase.getNode())->getIndex();
9497    SmallVector<SDValue, 8> Stores;
9498    SDValue StackPtr = StackBase;
9499    unsigned Offset = 0;
9500
9501    EVT PtrVT = Ptr.getValueType();
9502    EVT StackPtrVT = StackPtr.getValueType();
9503
9504    SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
9505    SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);
9506
9507    // Do all but one copies using the full register width.
9508    for (unsigned i = 1; i < NumRegs; i++) {
9509      // Load one integer register's worth from the original location.
9510      SDValue Load = DAG.getLoad(
9511          RegVT, dl, Chain, Ptr, LD->getPointerInfo().getWithOffset(Offset),
9512          LD->getOriginalAlign(), LD->getMemOperand()->getFlags(),
9513          LD->getAAInfo());
9514      // Follow the load with a store to the stack slot.  Remember the store.
9515      Stores.push_back(DAG.getStore(
9516          Load.getValue(1), dl, Load, StackPtr,
9517          MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset)));
9518      // Increment the pointers.
9519      Offset += RegBytes;
9520
9521      Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement);
9522      StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
9523    }
9524
9525    // The last copy may be partial.  Do an extending load.
9526    EVT MemVT = EVT::getIntegerVT(*DAG.getContext(),
9527                                  8 * (LoadedBytes - Offset));
9528    SDValue Load =
9529        DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Chain, Ptr,
9530                       LD->getPointerInfo().getWithOffset(Offset), MemVT,
9531                       LD->getOriginalAlign(), LD->getMemOperand()->getFlags(),
9532                       LD->getAAInfo());
9533    // Follow the load with a store to the stack slot.  Remember the store.
9534    // On big-endian machines this requires a truncating store to ensure
9535    // that the bits end up in the right place.
9536    Stores.push_back(DAG.getTruncStore(
9537        Load.getValue(1), dl, Load, StackPtr,
9538        MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), MemVT));
9539
9540    // The order of the stores doesn't matter - say it with a TokenFactor.
9541    SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
9542
9543    // Finally, perform the original load only redirected to the stack slot.
9544    Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase,
9545                          MachinePointerInfo::getFixedStack(MF, FrameIndex, 0),
9546                          LoadedVT);
9547
9548    // Callers expect a MERGE_VALUES node.
9549    return std::make_pair(Load, TF);
9550  }
9551
9552  assert(LoadedVT.isInteger() && !LoadedVT.isVector() &&
9553         "Unaligned load of unsupported type.");
9554
9555  // Compute the new VT that is half the size of the old one.  This is an
9556  // integer MVT.
9557  unsigned NumBits = LoadedVT.getSizeInBits();
9558  EVT NewLoadedVT;
9559  NewLoadedVT = EVT::getIntegerVT(*DAG.getContext(), NumBits/2);
9560  NumBits >>= 1;
9561
9562  Align Alignment = LD->getOriginalAlign();
9563  unsigned IncrementSize = NumBits / 8;
9564  ISD::LoadExtType HiExtType = LD->getExtensionType();
9565
9566  // If the original load is NON_EXTLOAD, the hi part load must be ZEXTLOAD.
9567  if (HiExtType == ISD::NON_EXTLOAD)
9568    HiExtType = ISD::ZEXTLOAD;
9569
9570  // Load the value in two parts
9571  SDValue Lo, Hi;
9572  if (DAG.getDataLayout().isLittleEndian()) {
9573    Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo(),
9574                        NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
9575                        LD->getAAInfo());
9576
9577    Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize));
9578    Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr,
9579                        LD->getPointerInfo().getWithOffset(IncrementSize),
9580                        NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
9581                        LD->getAAInfo());
9582  } else {
9583    Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo(),
9584                        NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
9585                        LD->getAAInfo());
9586
9587    Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize));
9588    Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr,
9589                        LD->getPointerInfo().getWithOffset(IncrementSize),
9590                        NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
9591                        LD->getAAInfo());
9592  }
9593
9594  // aggregate the two parts
9595  SDValue ShiftAmount =
9596      DAG.getConstant(NumBits, dl, getShiftAmountTy(Hi.getValueType(),
9597                                                    DAG.getDataLayout()));
9598  SDValue Result = DAG.getNode(ISD::SHL, dl, VT, Hi, ShiftAmount);
9599  Result = DAG.getNode(ISD::OR, dl, VT, Result, Lo);
9600
9601  SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
9602                             Hi.getValue(1));
9603
9604  return std::make_pair(Result, TF);
9605}
9606
9607SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST,
9608                                             SelectionDAG &DAG) const {
9609  assert(ST->getAddressingMode() == ISD::UNINDEXED &&
9610         "unaligned indexed stores not implemented!");
9611  SDValue Chain = ST->getChain();
9612  SDValue Ptr = ST->getBasePtr();
9613  SDValue Val = ST->getValue();
9614  EVT VT = Val.getValueType();
9615  Align Alignment = ST->getOriginalAlign();
9616  auto &MF = DAG.getMachineFunction();
9617  EVT StoreMemVT = ST->getMemoryVT();
9618
9619  SDLoc dl(ST);
9620  if (StoreMemVT.isFloatingPoint() || StoreMemVT.isVector()) {
9621    EVT intVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
9622    if (isTypeLegal(intVT)) {
9623      if (!isOperationLegalOrCustom(ISD::STORE, intVT) &&
9624          StoreMemVT.isVector()) {
9625        // Scalarize the store and let the individual components be handled.
9626        SDValue Result = scalarizeVectorStore(ST, DAG);
9627        return Result;
9628      }
9629      // Expand to a bitconvert of the value to the integer type of the
9630      // same size, then a (misaligned) int store.
9631      // FIXME: Does not handle truncating floating point stores!
9632      SDValue Result = DAG.getNode(ISD::BITCAST, dl, intVT, Val);
9633      Result = DAG.getStore(Chain, dl, Result, Ptr, ST->getPointerInfo(),
9634                            Alignment, ST->getMemOperand()->getFlags());
9635      return Result;
9636    }
9637    // Do a (aligned) store to a stack slot, then copy from the stack slot
9638    // to the final destination using (unaligned) integer loads and stores.
9639    MVT RegVT = getRegisterType(
9640        *DAG.getContext(),
9641        EVT::getIntegerVT(*DAG.getContext(), StoreMemVT.getSizeInBits()));
9642    EVT PtrVT = Ptr.getValueType();
9643    unsigned StoredBytes = StoreMemVT.getStoreSize();
9644    unsigned RegBytes = RegVT.getSizeInBits() / 8;
9645    unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes;
9646
9647    // Make sure the stack slot is also aligned for the register type.
9648    SDValue StackPtr = DAG.CreateStackTemporary(StoreMemVT, RegVT);
9649    auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
9650
9651    // Perform the original store, only redirected to the stack slot.
9652    SDValue Store = DAG.getTruncStore(
9653        Chain, dl, Val, StackPtr,
9654        MachinePointerInfo::getFixedStack(MF, FrameIndex, 0), StoreMemVT);
9655
9656    EVT StackPtrVT = StackPtr.getValueType();
9657
9658    SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
9659    SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);
9660    SmallVector<SDValue, 8> Stores;
9661    unsigned Offset = 0;
9662
9663    // Do all but one copies using the full register width.
9664    for (unsigned i = 1; i < NumRegs; i++) {
9665      // Load one integer register's worth from the stack slot.
9666      SDValue Load = DAG.getLoad(
9667          RegVT, dl, Store, StackPtr,
9668          MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset));
9669      // Store it to the final location.  Remember the store.
9670      Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr,
9671                                    ST->getPointerInfo().getWithOffset(Offset),
9672                                    ST->getOriginalAlign(),
9673                                    ST->getMemOperand()->getFlags()));
9674      // Increment the pointers.
9675      Offset += RegBytes;
9676      StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
9677      Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement);
9678    }
9679
9680    // The last store may be partial.  Do a truncating store.  On big-endian
9681    // machines this requires an extending load from the stack slot to ensure
9682    // that the bits are in the right place.
9683    EVT LoadMemVT =
9684        EVT::getIntegerVT(*DAG.getContext(), 8 * (StoredBytes - Offset));
9685
9686    // Load from the stack slot.
9687    SDValue Load = DAG.getExtLoad(
9688        ISD::EXTLOAD, dl, RegVT, Store, StackPtr,
9689        MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), LoadMemVT);
9690
9691    Stores.push_back(
9692        DAG.getTruncStore(Load.getValue(1), dl, Load, Ptr,
9693                          ST->getPointerInfo().getWithOffset(Offset), LoadMemVT,
9694                          ST->getOriginalAlign(),
9695                          ST->getMemOperand()->getFlags(), ST->getAAInfo()));
9696    // The order of the stores doesn't matter - say it with a TokenFactor.
9697    SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
9698    return Result;
9699  }
9700
9701  assert(StoreMemVT.isInteger() && !StoreMemVT.isVector() &&
9702         "Unaligned store of unknown type.");
9703  // Get the half-size VT
9704  EVT NewStoredVT = StoreMemVT.getHalfSizedIntegerVT(*DAG.getContext());
9705  unsigned NumBits = NewStoredVT.getFixedSizeInBits();
9706  unsigned IncrementSize = NumBits / 8;
9707
9708  // Divide the stored value in two parts.
9709  SDValue ShiftAmount = DAG.getConstant(
9710      NumBits, dl, getShiftAmountTy(Val.getValueType(), DAG.getDataLayout()));
9711  SDValue Lo = Val;
9712  // If Val is a constant, replace the upper bits with 0. The SRL will constant
9713  // fold and not use the upper bits. A smaller constant may be easier to
9714  // materialize.
9715  if (auto *C = dyn_cast<ConstantSDNode>(Lo); C && !C->isOpaque())
9716    Lo = DAG.getNode(
9717        ISD::AND, dl, VT, Lo,
9718        DAG.getConstant(APInt::getLowBitsSet(VT.getSizeInBits(), NumBits), dl,
9719                        VT));
9720  SDValue Hi = DAG.getNode(ISD::SRL, dl, VT, Val, ShiftAmount);
9721
9722  // Store the two parts
9723  SDValue Store1, Store2;
9724  Store1 = DAG.getTruncStore(Chain, dl,
9725                             DAG.getDataLayout().isLittleEndian() ? Lo : Hi,
9726                             Ptr, ST->getPointerInfo(), NewStoredVT, Alignment,
9727                             ST->getMemOperand()->getFlags());
9728
9729  Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize));
9730  Store2 = DAG.getTruncStore(
9731      Chain, dl, DAG.getDataLayout().isLittleEndian() ? Hi : Lo, Ptr,
9732      ST->getPointerInfo().getWithOffset(IncrementSize), NewStoredVT, Alignment,
9733      ST->getMemOperand()->getFlags(), ST->getAAInfo());
9734
9735  SDValue Result =
9736      DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2);
9737  return Result;
9738}
9739
9740SDValue
9741TargetLowering::IncrementMemoryAddress(SDValue Addr, SDValue Mask,
9742                                       const SDLoc &DL, EVT DataVT,
9743                                       SelectionDAG &DAG,
9744                                       bool IsCompressedMemory) const {
9745  SDValue Increment;
9746  EVT AddrVT = Addr.getValueType();
9747  EVT MaskVT = Mask.getValueType();
9748  assert(DataVT.getVectorElementCount() == MaskVT.getVectorElementCount() &&
9749         "Incompatible types of Data and Mask");
9750  if (IsCompressedMemory) {
9751    if (DataVT.isScalableVector())
9752      report_fatal_error(
9753          "Cannot currently handle compressed memory with scalable vectors");
9754    // Incrementing the pointer according to number of '1's in the mask.
9755    EVT MaskIntVT = EVT::getIntegerVT(*DAG.getContext(), MaskVT.getSizeInBits());
9756    SDValue MaskInIntReg = DAG.getBitcast(MaskIntVT, Mask);
9757    if (MaskIntVT.getSizeInBits() < 32) {
9758      MaskInIntReg = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, MaskInIntReg);
9759      MaskIntVT = MVT::i32;
9760    }
9761
9762    // Count '1's with POPCNT.
9763    Increment = DAG.getNode(ISD::CTPOP, DL, MaskIntVT, MaskInIntReg);
9764    Increment = DAG.getZExtOrTrunc(Increment, DL, AddrVT);
9765    // Scale is an element size in bytes.
9766    SDValue Scale = DAG.getConstant(DataVT.getScalarSizeInBits() / 8, DL,
9767                                    AddrVT);
9768    Increment = DAG.getNode(ISD::MUL, DL, AddrVT, Increment, Scale);
9769  } else if (DataVT.isScalableVector()) {
9770    Increment = DAG.getVScale(DL, AddrVT,
9771                              APInt(AddrVT.getFixedSizeInBits(),
9772                                    DataVT.getStoreSize().getKnownMinValue()));
9773  } else
9774    Increment = DAG.getConstant(DataVT.getStoreSize(), DL, AddrVT);
9775
9776  return DAG.getNode(ISD::ADD, DL, AddrVT, Addr, Increment);
9777}
9778
9779static SDValue clampDynamicVectorIndex(SelectionDAG &DAG, SDValue Idx,
9780                                       EVT VecVT, const SDLoc &dl,
9781                                       ElementCount SubEC) {
9782  assert(!(SubEC.isScalable() && VecVT.isFixedLengthVector()) &&
9783         "Cannot index a scalable vector within a fixed-width vector");
9784
9785  unsigned NElts = VecVT.getVectorMinNumElements();
9786  unsigned NumSubElts = SubEC.getKnownMinValue();
9787  EVT IdxVT = Idx.getValueType();
9788
9789  if (VecVT.isScalableVector() && !SubEC.isScalable()) {
9790    // If this is a constant index and we know the value plus the number of the
9791    // elements in the subvector minus one is less than the minimum number of
9792    // elements then it's safe to return Idx.
9793    if (auto *IdxCst = dyn_cast<ConstantSDNode>(Idx))
9794      if (IdxCst->getZExtValue() + (NumSubElts - 1) < NElts)
9795        return Idx;
9796    SDValue VS =
9797        DAG.getVScale(dl, IdxVT, APInt(IdxVT.getFixedSizeInBits(), NElts));
9798    unsigned SubOpcode = NumSubElts <= NElts ? ISD::SUB : ISD::USUBSAT;
9799    SDValue Sub = DAG.getNode(SubOpcode, dl, IdxVT, VS,
9800                              DAG.getConstant(NumSubElts, dl, IdxVT));
9801    return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx, Sub);
9802  }
9803  if (isPowerOf2_32(NElts) && NumSubElts == 1) {
9804    APInt Imm = APInt::getLowBitsSet(IdxVT.getSizeInBits(), Log2_32(NElts));
9805    return DAG.getNode(ISD::AND, dl, IdxVT, Idx,
9806                       DAG.getConstant(Imm, dl, IdxVT));
9807  }
9808  unsigned MaxIndex = NumSubElts < NElts ? NElts - NumSubElts : 0;
9809  return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx,
9810                     DAG.getConstant(MaxIndex, dl, IdxVT));
9811}
9812
9813SDValue TargetLowering::getVectorElementPointer(SelectionDAG &DAG,
9814                                                SDValue VecPtr, EVT VecVT,
9815                                                SDValue Index) const {
9816  return getVectorSubVecPointer(
9817      DAG, VecPtr, VecVT,
9818      EVT::getVectorVT(*DAG.getContext(), VecVT.getVectorElementType(), 1),
9819      Index);
9820}
9821
9822SDValue TargetLowering::getVectorSubVecPointer(SelectionDAG &DAG,
9823                                               SDValue VecPtr, EVT VecVT,
9824                                               EVT SubVecVT,
9825                                               SDValue Index) const {
9826  SDLoc dl(Index);
9827  // Make sure the index type is big enough to compute in.
9828  Index = DAG.getZExtOrTrunc(Index, dl, VecPtr.getValueType());
9829
9830  EVT EltVT = VecVT.getVectorElementType();
9831
9832  // Calculate the element offset and add it to the pointer.
9833  unsigned EltSize = EltVT.getFixedSizeInBits() / 8; // FIXME: should be ABI size.
9834  assert(EltSize * 8 == EltVT.getFixedSizeInBits() &&
9835         "Converting bits to bytes lost precision");
9836  assert(SubVecVT.getVectorElementType() == EltVT &&
9837         "Sub-vector must be a vector with matching element type");
9838  Index = clampDynamicVectorIndex(DAG, Index, VecVT, dl,
9839                                  SubVecVT.getVectorElementCount());
9840
9841  EVT IdxVT = Index.getValueType();
9842  if (SubVecVT.isScalableVector())
9843    Index =
9844        DAG.getNode(ISD::MUL, dl, IdxVT, Index,
9845                    DAG.getVScale(dl, IdxVT, APInt(IdxVT.getSizeInBits(), 1)));
9846
9847  Index = DAG.getNode(ISD::MUL, dl, IdxVT, Index,
9848                      DAG.getConstant(EltSize, dl, IdxVT));
9849  return DAG.getMemBasePlusOffset(VecPtr, Index, dl);
9850}
9851
9852//===----------------------------------------------------------------------===//
9853// Implementation of Emulated TLS Model
9854//===----------------------------------------------------------------------===//
9855
9856SDValue TargetLowering::LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA,
9857                                                SelectionDAG &DAG) const {
9858  // Access to address of TLS varialbe xyz is lowered to a function call:
9859  //   __emutls_get_address( address of global variable named "__emutls_v.xyz" )
9860  EVT PtrVT = getPointerTy(DAG.getDataLayout());
9861  PointerType *VoidPtrType = PointerType::get(*DAG.getContext(), 0);
9862  SDLoc dl(GA);
9863
9864  ArgListTy Args;
9865  ArgListEntry Entry;
9866  std::string NameString = ("__emutls_v." + GA->getGlobal()->getName()).str();
9867  Module *VariableModule = const_cast<Module*>(GA->getGlobal()->getParent());
9868  StringRef EmuTlsVarName(NameString);
9869  GlobalVariable *EmuTlsVar = VariableModule->getNamedGlobal(EmuTlsVarName);
9870  assert(EmuTlsVar && "Cannot find EmuTlsVar ");
9871  Entry.Node = DAG.getGlobalAddress(EmuTlsVar, dl, PtrVT);
9872  Entry.Ty = VoidPtrType;
9873  Args.push_back(Entry);
9874
9875  SDValue EmuTlsGetAddr = DAG.getExternalSymbol("__emutls_get_address", PtrVT);
9876
9877  TargetLowering::CallLoweringInfo CLI(DAG);
9878  CLI.setDebugLoc(dl).setChain(DAG.getEntryNode());
9879  CLI.setLibCallee(CallingConv::C, VoidPtrType, EmuTlsGetAddr, std::move(Args));
9880  std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
9881
9882  // TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
9883  // At last for X86 targets, maybe good for other targets too?
9884  MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
9885  MFI.setAdjustsStack(true); // Is this only for X86 target?
9886  MFI.setHasCalls(true);
9887
9888  assert((GA->getOffset() == 0) &&
9889         "Emulated TLS must have zero offset in GlobalAddressSDNode");
9890  return CallResult.first;
9891}
9892
9893SDValue TargetLowering::lowerCmpEqZeroToCtlzSrl(SDValue Op,
9894                                                SelectionDAG &DAG) const {
9895  assert((Op->getOpcode() == ISD::SETCC) && "Input has to be a SETCC node.");
9896  if (!isCtlzFast())
9897    return SDValue();
9898  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
9899  SDLoc dl(Op);
9900  if (isNullConstant(Op.getOperand(1)) && CC == ISD::SETEQ) {
9901    EVT VT = Op.getOperand(0).getValueType();
9902    SDValue Zext = Op.getOperand(0);
9903    if (VT.bitsLT(MVT::i32)) {
9904      VT = MVT::i32;
9905      Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0));
9906    }
9907    unsigned Log2b = Log2_32(VT.getSizeInBits());
9908    SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext);
9909    SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz,
9910                              DAG.getConstant(Log2b, dl, MVT::i32));
9911    return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc);
9912  }
9913  return SDValue();
9914}
9915
9916SDValue TargetLowering::expandIntMINMAX(SDNode *Node, SelectionDAG &DAG) const {
9917  SDValue Op0 = Node->getOperand(0);
9918  SDValue Op1 = Node->getOperand(1);
9919  EVT VT = Op0.getValueType();
9920  EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9921  unsigned Opcode = Node->getOpcode();
9922  SDLoc DL(Node);
9923
9924  // umax(x,1) --> sub(x,cmpeq(x,0)) iff cmp result is allbits
9925  if (Opcode == ISD::UMAX && llvm::isOneOrOneSplat(Op1, true) && BoolVT == VT &&
9926      getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent) {
9927    Op0 = DAG.getFreeze(Op0);
9928    SDValue Zero = DAG.getConstant(0, DL, VT);
9929    return DAG.getNode(ISD::SUB, DL, VT, Op0,
9930                       DAG.getSetCC(DL, VT, Op0, Zero, ISD::SETEQ));
9931  }
9932
9933  // umin(x,y) -> sub(x,usubsat(x,y))
9934  // TODO: Missing freeze(Op0)?
9935  if (Opcode == ISD::UMIN && isOperationLegal(ISD::SUB, VT) &&
9936      isOperationLegal(ISD::USUBSAT, VT)) {
9937    return DAG.getNode(ISD::SUB, DL, VT, Op0,
9938                       DAG.getNode(ISD::USUBSAT, DL, VT, Op0, Op1));
9939  }
9940
9941  // umax(x,y) -> add(x,usubsat(y,x))
9942  // TODO: Missing freeze(Op0)?
9943  if (Opcode == ISD::UMAX && isOperationLegal(ISD::ADD, VT) &&
9944      isOperationLegal(ISD::USUBSAT, VT)) {
9945    return DAG.getNode(ISD::ADD, DL, VT, Op0,
9946                       DAG.getNode(ISD::USUBSAT, DL, VT, Op1, Op0));
9947  }
9948
9949  // FIXME: Should really try to split the vector in case it's legal on a
9950  // subvector.
9951  if (VT.isVector() && !isOperationLegalOrCustom(ISD::VSELECT, VT))
9952    return DAG.UnrollVectorOp(Node);
9953
9954  // Attempt to find an existing SETCC node that we can reuse.
9955  // TODO: Do we need a generic doesSETCCNodeExist?
9956  // TODO: Missing freeze(Op0)/freeze(Op1)?
9957  auto buildMinMax = [&](ISD::CondCode PrefCC, ISD::CondCode AltCC,
9958                         ISD::CondCode PrefCommuteCC,
9959                         ISD::CondCode AltCommuteCC) {
9960    SDVTList BoolVTList = DAG.getVTList(BoolVT);
9961    for (ISD::CondCode CC : {PrefCC, AltCC}) {
9962      if (DAG.doesNodeExist(ISD::SETCC, BoolVTList,
9963                            {Op0, Op1, DAG.getCondCode(CC)})) {
9964        SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, CC);
9965        return DAG.getSelect(DL, VT, Cond, Op0, Op1);
9966      }
9967    }
9968    for (ISD::CondCode CC : {PrefCommuteCC, AltCommuteCC}) {
9969      if (DAG.doesNodeExist(ISD::SETCC, BoolVTList,
9970                            {Op0, Op1, DAG.getCondCode(CC)})) {
9971        SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, CC);
9972        return DAG.getSelect(DL, VT, Cond, Op1, Op0);
9973      }
9974    }
9975    SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, PrefCC);
9976    return DAG.getSelect(DL, VT, Cond, Op0, Op1);
9977  };
9978
9979  // Expand Y = MAX(A, B) -> Y = (A > B) ? A : B
9980  //                      -> Y = (A < B) ? B : A
9981  //                      -> Y = (A >= B) ? A : B
9982  //                      -> Y = (A <= B) ? B : A
9983  switch (Opcode) {
9984  case ISD::SMAX:
9985    return buildMinMax(ISD::SETGT, ISD::SETGE, ISD::SETLT, ISD::SETLE);
9986  case ISD::SMIN:
9987    return buildMinMax(ISD::SETLT, ISD::SETLE, ISD::SETGT, ISD::SETGE);
9988  case ISD::UMAX:
9989    return buildMinMax(ISD::SETUGT, ISD::SETUGE, ISD::SETULT, ISD::SETULE);
9990  case ISD::UMIN:
9991    return buildMinMax(ISD::SETULT, ISD::SETULE, ISD::SETUGT, ISD::SETUGE);
9992  }
9993
9994  llvm_unreachable("How did we get here?");
9995}
9996
9997SDValue TargetLowering::expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const {
9998  unsigned Opcode = Node->getOpcode();
9999  SDValue LHS = Node->getOperand(0);
10000  SDValue RHS = Node->getOperand(1);
10001  EVT VT = LHS.getValueType();
10002  SDLoc dl(Node);
10003
10004  assert(VT == RHS.getValueType() && "Expected operands to be the same type");
10005  assert(VT.isInteger() && "Expected operands to be integers");
10006
10007  // usub.sat(a, b) -> umax(a, b) - b
10008  if (Opcode == ISD::USUBSAT && isOperationLegal(ISD::UMAX, VT)) {
10009    SDValue Max = DAG.getNode(ISD::UMAX, dl, VT, LHS, RHS);
10010    return DAG.getNode(ISD::SUB, dl, VT, Max, RHS);
10011  }
10012
10013  // uadd.sat(a, b) -> umin(a, ~b) + b
10014  if (Opcode == ISD::UADDSAT && isOperationLegal(ISD::UMIN, VT)) {
10015    SDValue InvRHS = DAG.getNOT(dl, RHS, VT);
10016    SDValue Min = DAG.getNode(ISD::UMIN, dl, VT, LHS, InvRHS);
10017    return DAG.getNode(ISD::ADD, dl, VT, Min, RHS);
10018  }
10019
10020  unsigned OverflowOp;
10021  switch (Opcode) {
10022  case ISD::SADDSAT:
10023    OverflowOp = ISD::SADDO;
10024    break;
10025  case ISD::UADDSAT:
10026    OverflowOp = ISD::UADDO;
10027    break;
10028  case ISD::SSUBSAT:
10029    OverflowOp = ISD::SSUBO;
10030    break;
10031  case ISD::USUBSAT:
10032    OverflowOp = ISD::USUBO;
10033    break;
10034  default:
10035    llvm_unreachable("Expected method to receive signed or unsigned saturation "
10036                     "addition or subtraction node.");
10037  }
10038
10039  // FIXME: Should really try to split the vector in case it's legal on a
10040  // subvector.
10041  if (VT.isVector() && !isOperationLegalOrCustom(ISD::VSELECT, VT))
10042    return DAG.UnrollVectorOp(Node);
10043
10044  unsigned BitWidth = LHS.getScalarValueSizeInBits();
10045  EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10046  SDValue Result = DAG.getNode(OverflowOp, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
10047  SDValue SumDiff = Result.getValue(0);
10048  SDValue Overflow = Result.getValue(1);
10049  SDValue Zero = DAG.getConstant(0, dl, VT);
10050  SDValue AllOnes = DAG.getAllOnesConstant(dl, VT);
10051
10052  if (Opcode == ISD::UADDSAT) {
10053    if (getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent) {
10054      // (LHS + RHS) | OverflowMask
10055      SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);
10056      return DAG.getNode(ISD::OR, dl, VT, SumDiff, OverflowMask);
10057    }
10058    // Overflow ? 0xffff.... : (LHS + RHS)
10059    return DAG.getSelect(dl, VT, Overflow, AllOnes, SumDiff);
10060  }
10061
10062  if (Opcode == ISD::USUBSAT) {
10063    if (getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent) {
10064      // (LHS - RHS) & ~OverflowMask
10065      SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);
10066      SDValue Not = DAG.getNOT(dl, OverflowMask, VT);
10067      return DAG.getNode(ISD::AND, dl, VT, SumDiff, Not);
10068    }
10069    // Overflow ? 0 : (LHS - RHS)
10070    return DAG.getSelect(dl, VT, Overflow, Zero, SumDiff);
10071  }
10072
10073  if (Opcode == ISD::SADDSAT || Opcode == ISD::SSUBSAT) {
10074    APInt MinVal = APInt::getSignedMinValue(BitWidth);
10075    APInt MaxVal = APInt::getSignedMaxValue(BitWidth);
10076
10077    KnownBits KnownLHS = DAG.computeKnownBits(LHS);
10078    KnownBits KnownRHS = DAG.computeKnownBits(RHS);
10079
10080    // If either of the operand signs are known, then they are guaranteed to
10081    // only saturate in one direction. If non-negative they will saturate
10082    // towards SIGNED_MAX, if negative they will saturate towards SIGNED_MIN.
10083    //
10084    // In the case of ISD::SSUBSAT, 'x - y' is equivalent to 'x + (-y)', so the
10085    // sign of 'y' has to be flipped.
10086
10087    bool LHSIsNonNegative = KnownLHS.isNonNegative();
10088    bool RHSIsNonNegative = Opcode == ISD::SADDSAT ? KnownRHS.isNonNegative()
10089                                                   : KnownRHS.isNegative();
10090    if (LHSIsNonNegative || RHSIsNonNegative) {
10091      SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
10092      return DAG.getSelect(dl, VT, Overflow, SatMax, SumDiff);
10093    }
10094
10095    bool LHSIsNegative = KnownLHS.isNegative();
10096    bool RHSIsNegative = Opcode == ISD::SADDSAT ? KnownRHS.isNegative()
10097                                                : KnownRHS.isNonNegative();
10098    if (LHSIsNegative || RHSIsNegative) {
10099      SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
10100      return DAG.getSelect(dl, VT, Overflow, SatMin, SumDiff);
10101    }
10102  }
10103
10104  // Overflow ? (SumDiff >> BW) ^ MinVal : SumDiff
10105  APInt MinVal = APInt::getSignedMinValue(BitWidth);
10106  SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
10107  SDValue Shift = DAG.getNode(ISD::SRA, dl, VT, SumDiff,
10108                              DAG.getConstant(BitWidth - 1, dl, VT));
10109  Result = DAG.getNode(ISD::XOR, dl, VT, Shift, SatMin);
10110  return DAG.getSelect(dl, VT, Overflow, Result, SumDiff);
10111}
10112
10113SDValue TargetLowering::expandShlSat(SDNode *Node, SelectionDAG &DAG) const {
10114  unsigned Opcode = Node->getOpcode();
10115  bool IsSigned = Opcode == ISD::SSHLSAT;
10116  SDValue LHS = Node->getOperand(0);
10117  SDValue RHS = Node->getOperand(1);
10118  EVT VT = LHS.getValueType();
10119  SDLoc dl(Node);
10120
10121  assert((Node->getOpcode() == ISD::SSHLSAT ||
10122          Node->getOpcode() == ISD::USHLSAT) &&
10123          "Expected a SHLSAT opcode");
10124  assert(VT == RHS.getValueType() && "Expected operands to be the same type");
10125  assert(VT.isInteger() && "Expected operands to be integers");
10126
10127  if (VT.isVector() && !isOperationLegalOrCustom(ISD::VSELECT, VT))
10128    return DAG.UnrollVectorOp(Node);
10129
10130  // If LHS != (LHS << RHS) >> RHS, we have overflow and must saturate.
10131
10132  unsigned BW = VT.getScalarSizeInBits();
10133  EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10134  SDValue Result = DAG.getNode(ISD::SHL, dl, VT, LHS, RHS);
10135  SDValue Orig =
10136      DAG.getNode(IsSigned ? ISD::SRA : ISD::SRL, dl, VT, Result, RHS);
10137
10138  SDValue SatVal;
10139  if (IsSigned) {
10140    SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(BW), dl, VT);
10141    SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(BW), dl, VT);
10142    SDValue Cond =
10143        DAG.getSetCC(dl, BoolVT, LHS, DAG.getConstant(0, dl, VT), ISD::SETLT);
10144    SatVal = DAG.getSelect(dl, VT, Cond, SatMin, SatMax);
10145  } else {
10146    SatVal = DAG.getConstant(APInt::getMaxValue(BW), dl, VT);
10147  }
10148  SDValue Cond = DAG.getSetCC(dl, BoolVT, LHS, Orig, ISD::SETNE);
10149  return DAG.getSelect(dl, VT, Cond, SatVal, Result);
10150}
10151
10152SDValue
10153TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const {
10154  assert((Node->getOpcode() == ISD::SMULFIX ||
10155          Node->getOpcode() == ISD::UMULFIX ||
10156          Node->getOpcode() == ISD::SMULFIXSAT ||
10157          Node->getOpcode() == ISD::UMULFIXSAT) &&
10158         "Expected a fixed point multiplication opcode");
10159
10160  SDLoc dl(Node);
10161  SDValue LHS = Node->getOperand(0);
10162  SDValue RHS = Node->getOperand(1);
10163  EVT VT = LHS.getValueType();
10164  unsigned Scale = Node->getConstantOperandVal(2);
10165  bool Saturating = (Node->getOpcode() == ISD::SMULFIXSAT ||
10166                     Node->getOpcode() == ISD::UMULFIXSAT);
10167  bool Signed = (Node->getOpcode() == ISD::SMULFIX ||
10168                 Node->getOpcode() == ISD::SMULFIXSAT);
10169  EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10170  unsigned VTSize = VT.getScalarSizeInBits();
10171
10172  if (!Scale) {
10173    // [us]mul.fix(a, b, 0) -> mul(a, b)
10174    if (!Saturating) {
10175      if (isOperationLegalOrCustom(ISD::MUL, VT))
10176        return DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
10177    } else if (Signed && isOperationLegalOrCustom(ISD::SMULO, VT)) {
10178      SDValue Result =
10179          DAG.getNode(ISD::SMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
10180      SDValue Product = Result.getValue(0);
10181      SDValue Overflow = Result.getValue(1);
10182      SDValue Zero = DAG.getConstant(0, dl, VT);
10183
10184      APInt MinVal = APInt::getSignedMinValue(VTSize);
10185      APInt MaxVal = APInt::getSignedMaxValue(VTSize);
10186      SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
10187      SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
10188      // Xor the inputs, if resulting sign bit is 0 the product will be
10189      // positive, else negative.
10190      SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, LHS, RHS);
10191      SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Xor, Zero, ISD::SETLT);
10192      Result = DAG.getSelect(dl, VT, ProdNeg, SatMin, SatMax);
10193      return DAG.getSelect(dl, VT, Overflow, Result, Product);
10194    } else if (!Signed && isOperationLegalOrCustom(ISD::UMULO, VT)) {
10195      SDValue Result =
10196          DAG.getNode(ISD::UMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
10197      SDValue Product = Result.getValue(0);
10198      SDValue Overflow = Result.getValue(1);
10199
10200      APInt MaxVal = APInt::getMaxValue(VTSize);
10201      SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
10202      return DAG.getSelect(dl, VT, Overflow, SatMax, Product);
10203    }
10204  }
10205
10206  assert(((Signed && Scale < VTSize) || (!Signed && Scale <= VTSize)) &&
10207         "Expected scale to be less than the number of bits if signed or at "
10208         "most the number of bits if unsigned.");
10209  assert(LHS.getValueType() == RHS.getValueType() &&
10210         "Expected both operands to be the same type");
10211
10212  // Get the upper and lower bits of the result.
10213  SDValue Lo, Hi;
10214  unsigned LoHiOp = Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI;
10215  unsigned HiOp = Signed ? ISD::MULHS : ISD::MULHU;
10216  if (isOperationLegalOrCustom(LoHiOp, VT)) {
10217    SDValue Result = DAG.getNode(LoHiOp, dl, DAG.getVTList(VT, VT), LHS, RHS);
10218    Lo = Result.getValue(0);
10219    Hi = Result.getValue(1);
10220  } else if (isOperationLegalOrCustom(HiOp, VT)) {
10221    Lo = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
10222    Hi = DAG.getNode(HiOp, dl, VT, LHS, RHS);
10223  } else if (VT.isVector()) {
10224    return SDValue();
10225  } else {
10226    report_fatal_error("Unable to expand fixed point multiplication.");
10227  }
10228
10229  if (Scale == VTSize)
10230    // Result is just the top half since we'd be shifting by the width of the
10231    // operand. Overflow impossible so this works for both UMULFIX and
10232    // UMULFIXSAT.
10233    return Hi;
10234
10235  // The result will need to be shifted right by the scale since both operands
10236  // are scaled. The result is given to us in 2 halves, so we only want part of
10237  // both in the result.
10238  EVT ShiftTy = getShiftAmountTy(VT, DAG.getDataLayout());
10239  SDValue Result = DAG.getNode(ISD::FSHR, dl, VT, Hi, Lo,
10240                               DAG.getConstant(Scale, dl, ShiftTy));
10241  if (!Saturating)
10242    return Result;
10243
10244  if (!Signed) {
10245    // Unsigned overflow happened if the upper (VTSize - Scale) bits (of the
10246    // widened multiplication) aren't all zeroes.
10247
10248    // Saturate to max if ((Hi >> Scale) != 0),
10249    // which is the same as if (Hi > ((1 << Scale) - 1))
10250    APInt MaxVal = APInt::getMaxValue(VTSize);
10251    SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale),
10252                                      dl, VT);
10253    Result = DAG.getSelectCC(dl, Hi, LowMask,
10254                             DAG.getConstant(MaxVal, dl, VT), Result,
10255                             ISD::SETUGT);
10256
10257    return Result;
10258  }
10259
10260  // Signed overflow happened if the upper (VTSize - Scale + 1) bits (of the
10261  // widened multiplication) aren't all ones or all zeroes.
10262
10263  SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(VTSize), dl, VT);
10264  SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(VTSize), dl, VT);
10265
10266  if (Scale == 0) {
10267    SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, Lo,
10268                               DAG.getConstant(VTSize - 1, dl, ShiftTy));
10269    SDValue Overflow = DAG.getSetCC(dl, BoolVT, Hi, Sign, ISD::SETNE);
10270    // Saturated to SatMin if wide product is negative, and SatMax if wide
10271    // product is positive ...
10272    SDValue Zero = DAG.getConstant(0, dl, VT);
10273    SDValue ResultIfOverflow = DAG.getSelectCC(dl, Hi, Zero, SatMin, SatMax,
10274                                               ISD::SETLT);
10275    // ... but only if we overflowed.
10276    return DAG.getSelect(dl, VT, Overflow, ResultIfOverflow, Result);
10277  }
10278
10279  //  We handled Scale==0 above so all the bits to examine is in Hi.
10280
10281  // Saturate to max if ((Hi >> (Scale - 1)) > 0),
10282  // which is the same as if (Hi > (1 << (Scale - 1)) - 1)
10283  SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale - 1),
10284                                    dl, VT);
10285  Result = DAG.getSelectCC(dl, Hi, LowMask, SatMax, Result, ISD::SETGT);
10286  // Saturate to min if (Hi >> (Scale - 1)) < -1),
10287  // which is the same as if (HI < (-1 << (Scale - 1))
10288  SDValue HighMask =
10289      DAG.getConstant(APInt::getHighBitsSet(VTSize, VTSize - Scale + 1),
10290                      dl, VT);
10291  Result = DAG.getSelectCC(dl, Hi, HighMask, SatMin, Result, ISD::SETLT);
10292  return Result;
10293}
10294
10295SDValue
10296TargetLowering::expandFixedPointDiv(unsigned Opcode, const SDLoc &dl,
10297                                    SDValue LHS, SDValue RHS,
10298                                    unsigned Scale, SelectionDAG &DAG) const {
10299  assert((Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT ||
10300          Opcode == ISD::UDIVFIX || Opcode == ISD::UDIVFIXSAT) &&
10301         "Expected a fixed point division opcode");
10302
10303  EVT VT = LHS.getValueType();
10304  bool Signed = Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT;
10305  bool Saturating = Opcode == ISD::SDIVFIXSAT || Opcode == ISD::UDIVFIXSAT;
10306  EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10307
10308  // If there is enough room in the type to upscale the LHS or downscale the
10309  // RHS before the division, we can perform it in this type without having to
10310  // resize. For signed operations, the LHS headroom is the number of
10311  // redundant sign bits, and for unsigned ones it is the number of zeroes.
10312  // The headroom for the RHS is the number of trailing zeroes.
10313  unsigned LHSLead = Signed ? DAG.ComputeNumSignBits(LHS) - 1
10314                            : DAG.computeKnownBits(LHS).countMinLeadingZeros();
10315  unsigned RHSTrail = DAG.computeKnownBits(RHS).countMinTrailingZeros();
10316
10317  // For signed saturating operations, we need to be able to detect true integer
10318  // division overflow; that is, when you have MIN / -EPS. However, this
10319  // is undefined behavior and if we emit divisions that could take such
10320  // values it may cause undesired behavior (arithmetic exceptions on x86, for
10321  // example).
10322  // Avoid this by requiring an extra bit so that we never get this case.
10323  // FIXME: This is a bit unfortunate as it means that for an 8-bit 7-scale
10324  // signed saturating division, we need to emit a whopping 32-bit division.
10325  if (LHSLead + RHSTrail < Scale + (unsigned)(Saturating && Signed))
10326    return SDValue();
10327
10328  unsigned LHSShift = std::min(LHSLead, Scale);
10329  unsigned RHSShift = Scale - LHSShift;
10330
10331  // At this point, we know that if we shift the LHS up by LHSShift and the
10332  // RHS down by RHSShift, we can emit a regular division with a final scaling
10333  // factor of Scale.
10334
10335  EVT ShiftTy = getShiftAmountTy(VT, DAG.getDataLayout());
10336  if (LHSShift)
10337    LHS = DAG.getNode(ISD::SHL, dl, VT, LHS,
10338                      DAG.getConstant(LHSShift, dl, ShiftTy));
10339  if (RHSShift)
10340    RHS = DAG.getNode(Signed ? ISD::SRA : ISD::SRL, dl, VT, RHS,
10341                      DAG.getConstant(RHSShift, dl, ShiftTy));
10342
10343  SDValue Quot;
10344  if (Signed) {
10345    // For signed operations, if the resulting quotient is negative and the
10346    // remainder is nonzero, subtract 1 from the quotient to round towards
10347    // negative infinity.
10348    SDValue Rem;
10349    // FIXME: Ideally we would always produce an SDIVREM here, but if the
10350    // type isn't legal, SDIVREM cannot be expanded. There is no reason why
10351    // we couldn't just form a libcall, but the type legalizer doesn't do it.
10352    if (isTypeLegal(VT) &&
10353        isOperationLegalOrCustom(ISD::SDIVREM, VT)) {
10354      Quot = DAG.getNode(ISD::SDIVREM, dl,
10355                         DAG.getVTList(VT, VT),
10356                         LHS, RHS);
10357      Rem = Quot.getValue(1);
10358      Quot = Quot.getValue(0);
10359    } else {
10360      Quot = DAG.getNode(ISD::SDIV, dl, VT,
10361                         LHS, RHS);
10362      Rem = DAG.getNode(ISD::SREM, dl, VT,
10363                        LHS, RHS);
10364    }
10365    SDValue Zero = DAG.getConstant(0, dl, VT);
10366    SDValue RemNonZero = DAG.getSetCC(dl, BoolVT, Rem, Zero, ISD::SETNE);
10367    SDValue LHSNeg = DAG.getSetCC(dl, BoolVT, LHS, Zero, ISD::SETLT);
10368    SDValue RHSNeg = DAG.getSetCC(dl, BoolVT, RHS, Zero, ISD::SETLT);
10369    SDValue QuotNeg = DAG.getNode(ISD::XOR, dl, BoolVT, LHSNeg, RHSNeg);
10370    SDValue Sub1 = DAG.getNode(ISD::SUB, dl, VT, Quot,
10371                               DAG.getConstant(1, dl, VT));
10372    Quot = DAG.getSelect(dl, VT,
10373                         DAG.getNode(ISD::AND, dl, BoolVT, RemNonZero, QuotNeg),
10374                         Sub1, Quot);
10375  } else
10376    Quot = DAG.getNode(ISD::UDIV, dl, VT,
10377                       LHS, RHS);
10378
10379  return Quot;
10380}
10381
10382void TargetLowering::expandUADDSUBO(
10383    SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
10384  SDLoc dl(Node);
10385  SDValue LHS = Node->getOperand(0);
10386  SDValue RHS = Node->getOperand(1);
10387  bool IsAdd = Node->getOpcode() == ISD::UADDO;
10388
10389  // If UADDO_CARRY/SUBO_CARRY is legal, use that instead.
10390  unsigned OpcCarry = IsAdd ? ISD::UADDO_CARRY : ISD::USUBO_CARRY;
10391  if (isOperationLegalOrCustom(OpcCarry, Node->getValueType(0))) {
10392    SDValue CarryIn = DAG.getConstant(0, dl, Node->getValueType(1));
10393    SDValue NodeCarry = DAG.getNode(OpcCarry, dl, Node->getVTList(),
10394                                    { LHS, RHS, CarryIn });
10395    Result = SDValue(NodeCarry.getNode(), 0);
10396    Overflow = SDValue(NodeCarry.getNode(), 1);
10397    return;
10398  }
10399
10400  Result = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl,
10401                            LHS.getValueType(), LHS, RHS);
10402
10403  EVT ResultType = Node->getValueType(1);
10404  EVT SetCCType = getSetCCResultType(
10405      DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
10406  SDValue SetCC;
10407  if (IsAdd && isOneConstant(RHS)) {
10408    // Special case: uaddo X, 1 overflowed if X+1 is 0. This potential reduces
10409    // the live range of X. We assume comparing with 0 is cheap.
10410    // The general case (X + C) < C is not necessarily beneficial. Although we
10411    // reduce the live range of X, we may introduce the materialization of
10412    // constant C.
10413    SetCC =
10414        DAG.getSetCC(dl, SetCCType, Result,
10415                     DAG.getConstant(0, dl, Node->getValueType(0)), ISD::SETEQ);
10416  } else if (IsAdd && isAllOnesConstant(RHS)) {
10417    // Special case: uaddo X, -1 overflows if X != 0.
10418    SetCC =
10419        DAG.getSetCC(dl, SetCCType, LHS,
10420                     DAG.getConstant(0, dl, Node->getValueType(0)), ISD::SETNE);
10421  } else {
10422    ISD::CondCode CC = IsAdd ? ISD::SETULT : ISD::SETUGT;
10423    SetCC = DAG.getSetCC(dl, SetCCType, Result, LHS, CC);
10424  }
10425  Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
10426}
10427
10428void TargetLowering::expandSADDSUBO(
10429    SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
10430  SDLoc dl(Node);
10431  SDValue LHS = Node->getOperand(0);
10432  SDValue RHS = Node->getOperand(1);
10433  bool IsAdd = Node->getOpcode() == ISD::SADDO;
10434
10435  Result = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl,
10436                            LHS.getValueType(), LHS, RHS);
10437
10438  EVT ResultType = Node->getValueType(1);
10439  EVT OType = getSetCCResultType(
10440      DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
10441
10442  // If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
10443  unsigned OpcSat = IsAdd ? ISD::SADDSAT : ISD::SSUBSAT;
10444  if (isOperationLegal(OpcSat, LHS.getValueType())) {
10445    SDValue Sat = DAG.getNode(OpcSat, dl, LHS.getValueType(), LHS, RHS);
10446    SDValue SetCC = DAG.getSetCC(dl, OType, Result, Sat, ISD::SETNE);
10447    Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
10448    return;
10449  }
10450
10451  SDValue Zero = DAG.getConstant(0, dl, LHS.getValueType());
10452
10453  // For an addition, the result should be less than one of the operands (LHS)
10454  // if and only if the other operand (RHS) is negative, otherwise there will
10455  // be overflow.
10456  // For a subtraction, the result should be less than one of the operands
10457  // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
10458  // otherwise there will be overflow.
10459  SDValue ResultLowerThanLHS = DAG.getSetCC(dl, OType, Result, LHS, ISD::SETLT);
10460  SDValue ConditionRHS =
10461      DAG.getSetCC(dl, OType, RHS, Zero, IsAdd ? ISD::SETLT : ISD::SETGT);
10462
10463  Overflow = DAG.getBoolExtOrTrunc(
10464      DAG.getNode(ISD::XOR, dl, OType, ConditionRHS, ResultLowerThanLHS), dl,
10465      ResultType, ResultType);
10466}
10467
10468bool TargetLowering::expandMULO(SDNode *Node, SDValue &Result,
10469                                SDValue &Overflow, SelectionDAG &DAG) const {
10470  SDLoc dl(Node);
10471  EVT VT = Node->getValueType(0);
10472  EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10473  SDValue LHS = Node->getOperand(0);
10474  SDValue RHS = Node->getOperand(1);
10475  bool isSigned = Node->getOpcode() == ISD::SMULO;
10476
10477  // For power-of-two multiplications we can use a simpler shift expansion.
10478  if (ConstantSDNode *RHSC = isConstOrConstSplat(RHS)) {
10479    const APInt &C = RHSC->getAPIntValue();
10480    // mulo(X, 1 << S) -> { X << S, (X << S) >> S != X }
10481    if (C.isPowerOf2()) {
10482      // smulo(x, signed_min) is same as umulo(x, signed_min).
10483      bool UseArithShift = isSigned && !C.isMinSignedValue();
10484      EVT ShiftAmtTy = getShiftAmountTy(VT, DAG.getDataLayout());
10485      SDValue ShiftAmt = DAG.getConstant(C.logBase2(), dl, ShiftAmtTy);
10486      Result = DAG.getNode(ISD::SHL, dl, VT, LHS, ShiftAmt);
10487      Overflow = DAG.getSetCC(dl, SetCCVT,
10488          DAG.getNode(UseArithShift ? ISD::SRA : ISD::SRL,
10489                      dl, VT, Result, ShiftAmt),
10490          LHS, ISD::SETNE);
10491      return true;
10492    }
10493  }
10494
10495  EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getScalarSizeInBits() * 2);
10496  if (VT.isVector())
10497    WideVT =
10498        EVT::getVectorVT(*DAG.getContext(), WideVT, VT.getVectorElementCount());
10499
10500  SDValue BottomHalf;
10501  SDValue TopHalf;
10502  static const unsigned Ops[2][3] =
10503      { { ISD::MULHU, ISD::UMUL_LOHI, ISD::ZERO_EXTEND },
10504        { ISD::MULHS, ISD::SMUL_LOHI, ISD::SIGN_EXTEND }};
10505  if (isOperationLegalOrCustom(Ops[isSigned][0], VT)) {
10506    BottomHalf = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
10507    TopHalf = DAG.getNode(Ops[isSigned][0], dl, VT, LHS, RHS);
10508  } else if (isOperationLegalOrCustom(Ops[isSigned][1], VT)) {
10509    BottomHalf = DAG.getNode(Ops[isSigned][1], dl, DAG.getVTList(VT, VT), LHS,
10510                             RHS);
10511    TopHalf = BottomHalf.getValue(1);
10512  } else if (isTypeLegal(WideVT)) {
10513    LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS);
10514    RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS);
10515    SDValue Mul = DAG.getNode(ISD::MUL, dl, WideVT, LHS, RHS);
10516    BottomHalf = DAG.getNode(ISD::TRUNCATE, dl, VT, Mul);
10517    SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits(), dl,
10518        getShiftAmountTy(WideVT, DAG.getDataLayout()));
10519    TopHalf = DAG.getNode(ISD::TRUNCATE, dl, VT,
10520                          DAG.getNode(ISD::SRL, dl, WideVT, Mul, ShiftAmt));
10521  } else {
10522    if (VT.isVector())
10523      return false;
10524
10525    // We can fall back to a libcall with an illegal type for the MUL if we
10526    // have a libcall big enough.
10527    // Also, we can fall back to a division in some cases, but that's a big
10528    // performance hit in the general case.
10529    RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
10530    if (WideVT == MVT::i16)
10531      LC = RTLIB::MUL_I16;
10532    else if (WideVT == MVT::i32)
10533      LC = RTLIB::MUL_I32;
10534    else if (WideVT == MVT::i64)
10535      LC = RTLIB::MUL_I64;
10536    else if (WideVT == MVT::i128)
10537      LC = RTLIB::MUL_I128;
10538    assert(LC != RTLIB::UNKNOWN_LIBCALL && "Cannot expand this operation!");
10539
10540    SDValue HiLHS;
10541    SDValue HiRHS;
10542    if (isSigned) {
10543      // The high part is obtained by SRA'ing all but one of the bits of low
10544      // part.
10545      unsigned LoSize = VT.getFixedSizeInBits();
10546      HiLHS =
10547          DAG.getNode(ISD::SRA, dl, VT, LHS,
10548                      DAG.getConstant(LoSize - 1, dl,
10549                                      getPointerTy(DAG.getDataLayout())));
10550      HiRHS =
10551          DAG.getNode(ISD::SRA, dl, VT, RHS,
10552                      DAG.getConstant(LoSize - 1, dl,
10553                                      getPointerTy(DAG.getDataLayout())));
10554    } else {
10555        HiLHS = DAG.getConstant(0, dl, VT);
10556        HiRHS = DAG.getConstant(0, dl, VT);
10557    }
10558
10559    // Here we're passing the 2 arguments explicitly as 4 arguments that are
10560    // pre-lowered to the correct types. This all depends upon WideVT not
10561    // being a legal type for the architecture and thus has to be split to
10562    // two arguments.
10563    SDValue Ret;
10564    TargetLowering::MakeLibCallOptions CallOptions;
10565    CallOptions.setSExt(isSigned);
10566    CallOptions.setIsPostTypeLegalization(true);
10567    if (shouldSplitFunctionArgumentsAsLittleEndian(DAG.getDataLayout())) {
10568      // Halves of WideVT are packed into registers in different order
10569      // depending on platform endianness. This is usually handled by
10570      // the C calling convention, but we can't defer to it in
10571      // the legalizer.
10572      SDValue Args[] = { LHS, HiLHS, RHS, HiRHS };
10573      Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
10574    } else {
10575      SDValue Args[] = { HiLHS, LHS, HiRHS, RHS };
10576      Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
10577    }
10578    assert(Ret.getOpcode() == ISD::MERGE_VALUES &&
10579           "Ret value is a collection of constituent nodes holding result.");
10580    if (DAG.getDataLayout().isLittleEndian()) {
10581      // Same as above.
10582      BottomHalf = Ret.getOperand(0);
10583      TopHalf = Ret.getOperand(1);
10584    } else {
10585      BottomHalf = Ret.getOperand(1);
10586      TopHalf = Ret.getOperand(0);
10587    }
10588  }
10589
10590  Result = BottomHalf;
10591  if (isSigned) {
10592    SDValue ShiftAmt = DAG.getConstant(
10593        VT.getScalarSizeInBits() - 1, dl,
10594        getShiftAmountTy(BottomHalf.getValueType(), DAG.getDataLayout()));
10595    SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, BottomHalf, ShiftAmt);
10596    Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf, Sign, ISD::SETNE);
10597  } else {
10598    Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf,
10599                            DAG.getConstant(0, dl, VT), ISD::SETNE);
10600  }
10601
10602  // Truncate the result if SetCC returns a larger type than needed.
10603  EVT RType = Node->getValueType(1);
10604  if (RType.bitsLT(Overflow.getValueType()))
10605    Overflow = DAG.getNode(ISD::TRUNCATE, dl, RType, Overflow);
10606
10607  assert(RType.getSizeInBits() == Overflow.getValueSizeInBits() &&
10608         "Unexpected result type for S/UMULO legalization");
10609  return true;
10610}
10611
10612SDValue TargetLowering::expandVecReduce(SDNode *Node, SelectionDAG &DAG) const {
10613  SDLoc dl(Node);
10614  unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(Node->getOpcode());
10615  SDValue Op = Node->getOperand(0);
10616  EVT VT = Op.getValueType();
10617
10618  if (VT.isScalableVector())
10619    report_fatal_error(
10620        "Expanding reductions for scalable vectors is undefined.");
10621
10622  // Try to use a shuffle reduction for power of two vectors.
10623  if (VT.isPow2VectorType()) {
10624    while (VT.getVectorNumElements() > 1) {
10625      EVT HalfVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
10626      if (!isOperationLegalOrCustom(BaseOpcode, HalfVT))
10627        break;
10628
10629      SDValue Lo, Hi;
10630      std::tie(Lo, Hi) = DAG.SplitVector(Op, dl);
10631      Op = DAG.getNode(BaseOpcode, dl, HalfVT, Lo, Hi);
10632      VT = HalfVT;
10633    }
10634  }
10635
10636  EVT EltVT = VT.getVectorElementType();
10637  unsigned NumElts = VT.getVectorNumElements();
10638
10639  SmallVector<SDValue, 8> Ops;
10640  DAG.ExtractVectorElements(Op, Ops, 0, NumElts);
10641
10642  SDValue Res = Ops[0];
10643  for (unsigned i = 1; i < NumElts; i++)
10644    Res = DAG.getNode(BaseOpcode, dl, EltVT, Res, Ops[i], Node->getFlags());
10645
10646  // Result type may be wider than element type.
10647  if (EltVT != Node->getValueType(0))
10648    Res = DAG.getNode(ISD::ANY_EXTEND, dl, Node->getValueType(0), Res);
10649  return Res;
10650}
10651
10652SDValue TargetLowering::expandVecReduceSeq(SDNode *Node, SelectionDAG &DAG) const {
10653  SDLoc dl(Node);
10654  SDValue AccOp = Node->getOperand(0);
10655  SDValue VecOp = Node->getOperand(1);
10656  SDNodeFlags Flags = Node->getFlags();
10657
10658  EVT VT = VecOp.getValueType();
10659  EVT EltVT = VT.getVectorElementType();
10660
10661  if (VT.isScalableVector())
10662    report_fatal_error(
10663        "Expanding reductions for scalable vectors is undefined.");
10664
10665  unsigned NumElts = VT.getVectorNumElements();
10666
10667  SmallVector<SDValue, 8> Ops;
10668  DAG.ExtractVectorElements(VecOp, Ops, 0, NumElts);
10669
10670  unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(Node->getOpcode());
10671
10672  SDValue Res = AccOp;
10673  for (unsigned i = 0; i < NumElts; i++)
10674    Res = DAG.getNode(BaseOpcode, dl, EltVT, Res, Ops[i], Flags);
10675
10676  return Res;
10677}
10678
10679bool TargetLowering::expandREM(SDNode *Node, SDValue &Result,
10680                               SelectionDAG &DAG) const {
10681  EVT VT = Node->getValueType(0);
10682  SDLoc dl(Node);
10683  bool isSigned = Node->getOpcode() == ISD::SREM;
10684  unsigned DivOpc = isSigned ? ISD::SDIV : ISD::UDIV;
10685  unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
10686  SDValue Dividend = Node->getOperand(0);
10687  SDValue Divisor = Node->getOperand(1);
10688  if (isOperationLegalOrCustom(DivRemOpc, VT)) {
10689    SDVTList VTs = DAG.getVTList(VT, VT);
10690    Result = DAG.getNode(DivRemOpc, dl, VTs, Dividend, Divisor).getValue(1);
10691    return true;
10692  }
10693  if (isOperationLegalOrCustom(DivOpc, VT)) {
10694    // X % Y -> X-X/Y*Y
10695    SDValue Divide = DAG.getNode(DivOpc, dl, VT, Dividend, Divisor);
10696    SDValue Mul = DAG.getNode(ISD::MUL, dl, VT, Divide, Divisor);
10697    Result = DAG.getNode(ISD::SUB, dl, VT, Dividend, Mul);
10698    return true;
10699  }
10700  return false;
10701}
10702
10703SDValue TargetLowering::expandFP_TO_INT_SAT(SDNode *Node,
10704                                            SelectionDAG &DAG) const {
10705  bool IsSigned = Node->getOpcode() == ISD::FP_TO_SINT_SAT;
10706  SDLoc dl(SDValue(Node, 0));
10707  SDValue Src = Node->getOperand(0);
10708
10709  // DstVT is the result type, while SatVT is the size to which we saturate
10710  EVT SrcVT = Src.getValueType();
10711  EVT DstVT = Node->getValueType(0);
10712
10713  EVT SatVT = cast<VTSDNode>(Node->getOperand(1))->getVT();
10714  unsigned SatWidth = SatVT.getScalarSizeInBits();
10715  unsigned DstWidth = DstVT.getScalarSizeInBits();
10716  assert(SatWidth <= DstWidth &&
10717         "Expected saturation width smaller than result width");
10718
10719  // Determine minimum and maximum integer values and their corresponding
10720  // floating-point values.
10721  APInt MinInt, MaxInt;
10722  if (IsSigned) {
10723    MinInt = APInt::getSignedMinValue(SatWidth).sext(DstWidth);
10724    MaxInt = APInt::getSignedMaxValue(SatWidth).sext(DstWidth);
10725  } else {
10726    MinInt = APInt::getMinValue(SatWidth).zext(DstWidth);
10727    MaxInt = APInt::getMaxValue(SatWidth).zext(DstWidth);
10728  }
10729
10730  // We cannot risk emitting FP_TO_XINT nodes with a source VT of [b]f16, as
10731  // libcall emission cannot handle this. Large result types will fail.
10732  if (SrcVT == MVT::f16 || SrcVT == MVT::bf16) {
10733    Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, Src);
10734    SrcVT = Src.getValueType();
10735  }
10736
10737  APFloat MinFloat(DAG.EVTToAPFloatSemantics(SrcVT));
10738  APFloat MaxFloat(DAG.EVTToAPFloatSemantics(SrcVT));
10739
10740  APFloat::opStatus MinStatus =
10741      MinFloat.convertFromAPInt(MinInt, IsSigned, APFloat::rmTowardZero);
10742  APFloat::opStatus MaxStatus =
10743      MaxFloat.convertFromAPInt(MaxInt, IsSigned, APFloat::rmTowardZero);
10744  bool AreExactFloatBounds = !(MinStatus & APFloat::opStatus::opInexact) &&
10745                             !(MaxStatus & APFloat::opStatus::opInexact);
10746
10747  SDValue MinFloatNode = DAG.getConstantFP(MinFloat, dl, SrcVT);
10748  SDValue MaxFloatNode = DAG.getConstantFP(MaxFloat, dl, SrcVT);
10749
10750  // If the integer bounds are exactly representable as floats and min/max are
10751  // legal, emit a min+max+fptoi sequence. Otherwise we have to use a sequence
10752  // of comparisons and selects.
10753  bool MinMaxLegal = isOperationLegal(ISD::FMINNUM, SrcVT) &&
10754                     isOperationLegal(ISD::FMAXNUM, SrcVT);
10755  if (AreExactFloatBounds && MinMaxLegal) {
10756    SDValue Clamped = Src;
10757
10758    // Clamp Src by MinFloat from below. If Src is NaN the result is MinFloat.
10759    Clamped = DAG.getNode(ISD::FMAXNUM, dl, SrcVT, Clamped, MinFloatNode);
10760    // Clamp by MaxFloat from above. NaN cannot occur.
10761    Clamped = DAG.getNode(ISD::FMINNUM, dl, SrcVT, Clamped, MaxFloatNode);
10762    // Convert clamped value to integer.
10763    SDValue FpToInt = DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT,
10764                                  dl, DstVT, Clamped);
10765
10766    // In the unsigned case we're done, because we mapped NaN to MinFloat,
10767    // which will cast to zero.
10768    if (!IsSigned)
10769      return FpToInt;
10770
10771    // Otherwise, select 0 if Src is NaN.
10772    SDValue ZeroInt = DAG.getConstant(0, dl, DstVT);
10773    EVT SetCCVT =
10774        getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
10775    SDValue IsNan = DAG.getSetCC(dl, SetCCVT, Src, Src, ISD::CondCode::SETUO);
10776    return DAG.getSelect(dl, DstVT, IsNan, ZeroInt, FpToInt);
10777  }
10778
10779  SDValue MinIntNode = DAG.getConstant(MinInt, dl, DstVT);
10780  SDValue MaxIntNode = DAG.getConstant(MaxInt, dl, DstVT);
10781
10782  // Result of direct conversion. The assumption here is that the operation is
10783  // non-trapping and it's fine to apply it to an out-of-range value if we
10784  // select it away later.
10785  SDValue FpToInt =
10786      DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT, dl, DstVT, Src);
10787
10788  SDValue Select = FpToInt;
10789
10790  EVT SetCCVT =
10791      getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
10792
10793  // If Src ULT MinFloat, select MinInt. In particular, this also selects
10794  // MinInt if Src is NaN.
10795  SDValue ULT = DAG.getSetCC(dl, SetCCVT, Src, MinFloatNode, ISD::SETULT);
10796  Select = DAG.getSelect(dl, DstVT, ULT, MinIntNode, Select);
10797  // If Src OGT MaxFloat, select MaxInt.
10798  SDValue OGT = DAG.getSetCC(dl, SetCCVT, Src, MaxFloatNode, ISD::SETOGT);
10799  Select = DAG.getSelect(dl, DstVT, OGT, MaxIntNode, Select);
10800
10801  // In the unsigned case we are done, because we mapped NaN to MinInt, which
10802  // is already zero.
10803  if (!IsSigned)
10804    return Select;
10805
10806  // Otherwise, select 0 if Src is NaN.
10807  SDValue ZeroInt = DAG.getConstant(0, dl, DstVT);
10808  SDValue IsNan = DAG.getSetCC(dl, SetCCVT, Src, Src, ISD::CondCode::SETUO);
10809  return DAG.getSelect(dl, DstVT, IsNan, ZeroInt, Select);
10810}
10811
10812SDValue TargetLowering::expandVectorSplice(SDNode *Node,
10813                                           SelectionDAG &DAG) const {
10814  assert(Node->getOpcode() == ISD::VECTOR_SPLICE && "Unexpected opcode!");
10815  assert(Node->getValueType(0).isScalableVector() &&
10816         "Fixed length vector types expected to use SHUFFLE_VECTOR!");
10817
10818  EVT VT = Node->getValueType(0);
10819  SDValue V1 = Node->getOperand(0);
10820  SDValue V2 = Node->getOperand(1);
10821  int64_t Imm = cast<ConstantSDNode>(Node->getOperand(2))->getSExtValue();
10822  SDLoc DL(Node);
10823
10824  // Expand through memory thusly:
10825  //  Alloca CONCAT_VECTORS_TYPES(V1, V2) Ptr
10826  //  Store V1, Ptr
10827  //  Store V2, Ptr + sizeof(V1)
10828  //  If (Imm < 0)
10829  //    TrailingElts = -Imm
10830  //    Ptr = Ptr + sizeof(V1) - (TrailingElts * sizeof(VT.Elt))
10831  //  else
10832  //    Ptr = Ptr + (Imm * sizeof(VT.Elt))
10833  //  Res = Load Ptr
10834
10835  Align Alignment = DAG.getReducedAlign(VT, /*UseABI=*/false);
10836
10837  EVT MemVT = EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(),
10838                               VT.getVectorElementCount() * 2);
10839  SDValue StackPtr = DAG.CreateStackTemporary(MemVT.getStoreSize(), Alignment);
10840  EVT PtrVT = StackPtr.getValueType();
10841  auto &MF = DAG.getMachineFunction();
10842  auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
10843  auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
10844
10845  // Store the lo part of CONCAT_VECTORS(V1, V2)
10846  SDValue StoreV1 = DAG.getStore(DAG.getEntryNode(), DL, V1, StackPtr, PtrInfo);
10847  // Store the hi part of CONCAT_VECTORS(V1, V2)
10848  SDValue OffsetToV2 = DAG.getVScale(
10849      DL, PtrVT,
10850      APInt(PtrVT.getFixedSizeInBits(), VT.getStoreSize().getKnownMinValue()));
10851  SDValue StackPtr2 = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, OffsetToV2);
10852  SDValue StoreV2 = DAG.getStore(StoreV1, DL, V2, StackPtr2, PtrInfo);
10853
10854  if (Imm >= 0) {
10855    // Load back the required element. getVectorElementPointer takes care of
10856    // clamping the index if it's out-of-bounds.
10857    StackPtr = getVectorElementPointer(DAG, StackPtr, VT, Node->getOperand(2));
10858    // Load the spliced result
10859    return DAG.getLoad(VT, DL, StoreV2, StackPtr,
10860                       MachinePointerInfo::getUnknownStack(MF));
10861  }
10862
10863  uint64_t TrailingElts = -Imm;
10864
10865  // NOTE: TrailingElts must be clamped so as not to read outside of V1:V2.
10866  TypeSize EltByteSize = VT.getVectorElementType().getStoreSize();
10867  SDValue TrailingBytes =
10868      DAG.getConstant(TrailingElts * EltByteSize, DL, PtrVT);
10869
10870  if (TrailingElts > VT.getVectorMinNumElements()) {
10871    SDValue VLBytes =
10872        DAG.getVScale(DL, PtrVT,
10873                      APInt(PtrVT.getFixedSizeInBits(),
10874                            VT.getStoreSize().getKnownMinValue()));
10875    TrailingBytes = DAG.getNode(ISD::UMIN, DL, PtrVT, TrailingBytes, VLBytes);
10876  }
10877
10878  // Calculate the start address of the spliced result.
10879  StackPtr2 = DAG.getNode(ISD::SUB, DL, PtrVT, StackPtr2, TrailingBytes);
10880
10881  // Load the spliced result
10882  return DAG.getLoad(VT, DL, StoreV2, StackPtr2,
10883                     MachinePointerInfo::getUnknownStack(MF));
10884}
10885
10886bool TargetLowering::LegalizeSetCCCondCode(SelectionDAG &DAG, EVT VT,
10887                                           SDValue &LHS, SDValue &RHS,
10888                                           SDValue &CC, SDValue Mask,
10889                                           SDValue EVL, bool &NeedInvert,
10890                                           const SDLoc &dl, SDValue &Chain,
10891                                           bool IsSignaling) const {
10892  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
10893  MVT OpVT = LHS.getSimpleValueType();
10894  ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get();
10895  NeedInvert = false;
10896  assert(!EVL == !Mask && "VP Mask and EVL must either both be set or unset");
10897  bool IsNonVP = !EVL;
10898  switch (TLI.getCondCodeAction(CCCode, OpVT)) {
10899  default:
10900    llvm_unreachable("Unknown condition code action!");
10901  case TargetLowering::Legal:
10902    // Nothing to do.
10903    break;
10904  case TargetLowering::Expand: {
10905    ISD::CondCode InvCC = ISD::getSetCCSwappedOperands(CCCode);
10906    if (TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) {
10907      std::swap(LHS, RHS);
10908      CC = DAG.getCondCode(InvCC);
10909      return true;
10910    }
10911    // Swapping operands didn't work. Try inverting the condition.
10912    bool NeedSwap = false;
10913    InvCC = getSetCCInverse(CCCode, OpVT);
10914    if (!TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) {
10915      // If inverting the condition is not enough, try swapping operands
10916      // on top of it.
10917      InvCC = ISD::getSetCCSwappedOperands(InvCC);
10918      NeedSwap = true;
10919    }
10920    if (TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) {
10921      CC = DAG.getCondCode(InvCC);
10922      NeedInvert = true;
10923      if (NeedSwap)
10924        std::swap(LHS, RHS);
10925      return true;
10926    }
10927
10928    ISD::CondCode CC1 = ISD::SETCC_INVALID, CC2 = ISD::SETCC_INVALID;
10929    unsigned Opc = 0;
10930    switch (CCCode) {
10931    default:
10932      llvm_unreachable("Don't know how to expand this condition!");
10933    case ISD::SETUO:
10934      if (TLI.isCondCodeLegal(ISD::SETUNE, OpVT)) {
10935        CC1 = ISD::SETUNE;
10936        CC2 = ISD::SETUNE;
10937        Opc = ISD::OR;
10938        break;
10939      }
10940      assert(TLI.isCondCodeLegal(ISD::SETOEQ, OpVT) &&
10941             "If SETUE is expanded, SETOEQ or SETUNE must be legal!");
10942      NeedInvert = true;
10943      [[fallthrough]];
10944    case ISD::SETO:
10945      assert(TLI.isCondCodeLegal(ISD::SETOEQ, OpVT) &&
10946             "If SETO is expanded, SETOEQ must be legal!");
10947      CC1 = ISD::SETOEQ;
10948      CC2 = ISD::SETOEQ;
10949      Opc = ISD::AND;
10950      break;
10951    case ISD::SETONE:
10952    case ISD::SETUEQ:
10953      // If the SETUO or SETO CC isn't legal, we might be able to use
10954      // SETOGT || SETOLT, inverting the result for SETUEQ. We only need one
10955      // of SETOGT/SETOLT to be legal, the other can be emulated by swapping
10956      // the operands.
10957      CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
10958      if (!TLI.isCondCodeLegal(CC2, OpVT) &&
10959          (TLI.isCondCodeLegal(ISD::SETOGT, OpVT) ||
10960           TLI.isCondCodeLegal(ISD::SETOLT, OpVT))) {
10961        CC1 = ISD::SETOGT;
10962        CC2 = ISD::SETOLT;
10963        Opc = ISD::OR;
10964        NeedInvert = ((unsigned)CCCode & 0x8U);
10965        break;
10966      }
10967      [[fallthrough]];
10968    case ISD::SETOEQ:
10969    case ISD::SETOGT:
10970    case ISD::SETOGE:
10971    case ISD::SETOLT:
10972    case ISD::SETOLE:
10973    case ISD::SETUNE:
10974    case ISD::SETUGT:
10975    case ISD::SETUGE:
10976    case ISD::SETULT:
10977    case ISD::SETULE:
10978      // If we are floating point, assign and break, otherwise fall through.
10979      if (!OpVT.isInteger()) {
10980        // We can use the 4th bit to tell if we are the unordered
10981        // or ordered version of the opcode.
10982        CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
10983        Opc = ((unsigned)CCCode & 0x8U) ? ISD::OR : ISD::AND;
10984        CC1 = (ISD::CondCode)(((int)CCCode & 0x7) | 0x10);
10985        break;
10986      }
10987      // Fallthrough if we are unsigned integer.
10988      [[fallthrough]];
10989    case ISD::SETLE:
10990    case ISD::SETGT:
10991    case ISD::SETGE:
10992    case ISD::SETLT:
10993    case ISD::SETNE:
10994    case ISD::SETEQ:
10995      // If all combinations of inverting the condition and swapping operands
10996      // didn't work then we have no means to expand the condition.
10997      llvm_unreachable("Don't know how to expand this condition!");
10998    }
10999
11000    SDValue SetCC1, SetCC2;
11001    if (CCCode != ISD::SETO && CCCode != ISD::SETUO) {
11002      // If we aren't the ordered or unorder operation,
11003      // then the pattern is (LHS CC1 RHS) Opc (LHS CC2 RHS).
11004      if (IsNonVP) {
11005        SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1, Chain, IsSignaling);
11006        SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2, Chain, IsSignaling);
11007      } else {
11008        SetCC1 = DAG.getSetCCVP(dl, VT, LHS, RHS, CC1, Mask, EVL);
11009        SetCC2 = DAG.getSetCCVP(dl, VT, LHS, RHS, CC2, Mask, EVL);
11010      }
11011    } else {
11012      // Otherwise, the pattern is (LHS CC1 LHS) Opc (RHS CC2 RHS)
11013      if (IsNonVP) {
11014        SetCC1 = DAG.getSetCC(dl, VT, LHS, LHS, CC1, Chain, IsSignaling);
11015        SetCC2 = DAG.getSetCC(dl, VT, RHS, RHS, CC2, Chain, IsSignaling);
11016      } else {
11017        SetCC1 = DAG.getSetCCVP(dl, VT, LHS, LHS, CC1, Mask, EVL);
11018        SetCC2 = DAG.getSetCCVP(dl, VT, RHS, RHS, CC2, Mask, EVL);
11019      }
11020    }
11021    if (Chain)
11022      Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, SetCC1.getValue(1),
11023                          SetCC2.getValue(1));
11024    if (IsNonVP)
11025      LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2);
11026    else {
11027      // Transform the binary opcode to the VP equivalent.
11028      assert((Opc == ISD::OR || Opc == ISD::AND) && "Unexpected opcode");
11029      Opc = Opc == ISD::OR ? ISD::VP_OR : ISD::VP_AND;
11030      LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2, Mask, EVL);
11031    }
11032    RHS = SDValue();
11033    CC = SDValue();
11034    return true;
11035  }
11036  }
11037  return false;
11038}
11039