patch-r262261-llvm-r199975-sparc.diff revision 269012
1Pull in r199975 from upstream llvm trunk (by Jakob Stoklund Olesen):
2
3  Implement atomicrmw operations in 32 and 64 bits for SPARCv9.
4
5  These all use the compare-and-swap CASA/CASXA instructions.
6
7Introduced here: http://svnweb.freebsd.org/changeset/base/262261
8
9Index: test/CodeGen/SPARC/atomics.ll
10===================================================================
11--- test/CodeGen/SPARC/atomics.ll
12+++ test/CodeGen/SPARC/atomics.ll
13@@ -1,4 +1,4 @@
14-; RUN: llc < %s -march=sparcv9 | FileCheck %s
15+; RUN: llc < %s -march=sparcv9 -verify-machineinstrs | FileCheck %s
16 
17 ; CHECK-LABEL: test_atomic_i32
18 ; CHECK:       ld [%o0]
19@@ -61,3 +61,84 @@ entry:
20   %b = atomicrmw xchg i32* %ptr, i32 42 monotonic
21   ret i32 %b
22 }
23+
24+; CHECK-LABEL: test_load_add_32
25+; CHECK: membar
26+; CHECK: add
27+; CHECK: cas [%o0]
28+; CHECK: membar
29+define zeroext i32 @test_load_add_32(i32* %p, i32 zeroext %v) {
30+entry:
31+  %0 = atomicrmw add i32* %p, i32 %v seq_cst
32+  ret i32 %0
33+}
34+
35+; CHECK-LABEL: test_load_sub_64
36+; CHECK: membar
37+; CHECK: sub
38+; CHECK: casx [%o0]
39+; CHECK: membar
40+define zeroext i64 @test_load_sub_64(i64* %p, i64 zeroext %v) {
41+entry:
42+  %0 = atomicrmw sub i64* %p, i64 %v seq_cst
43+  ret i64 %0
44+}
45+
46+; CHECK-LABEL: test_load_xor_32
47+; CHECK: membar
48+; CHECK: xor
49+; CHECK: cas [%o0]
50+; CHECK: membar
51+define zeroext i32 @test_load_xor_32(i32* %p, i32 zeroext %v) {
52+entry:
53+  %0 = atomicrmw xor i32* %p, i32 %v seq_cst
54+  ret i32 %0
55+}
56+
57+; CHECK-LABEL: test_load_and_32
58+; CHECK: membar
59+; CHECK: and
60+; CHECK-NOT: xor
61+; CHECK: cas [%o0]
62+; CHECK: membar
63+define zeroext i32 @test_load_and_32(i32* %p, i32 zeroext %v) {
64+entry:
65+  %0 = atomicrmw and i32* %p, i32 %v seq_cst
66+  ret i32 %0
67+}
68+
69+; CHECK-LABEL: test_load_nand_32
70+; CHECK: membar
71+; CHECK: and
72+; CHECK: xor
73+; CHECK: cas [%o0]
74+; CHECK: membar
75+define zeroext i32 @test_load_nand_32(i32* %p, i32 zeroext %v) {
76+entry:
77+  %0 = atomicrmw nand i32* %p, i32 %v seq_cst
78+  ret i32 %0
79+}
80+
81+; CHECK-LABEL: test_load_max_64
82+; CHECK: membar
83+; CHECK: cmp
84+; CHECK: movg %xcc
85+; CHECK: casx [%o0]
86+; CHECK: membar
87+define zeroext i64 @test_load_max_64(i64* %p, i64 zeroext %v) {
88+entry:
89+  %0 = atomicrmw max i64* %p, i64 %v seq_cst
90+  ret i64 %0
91+}
92+
93+; CHECK-LABEL: test_load_umin_32
94+; CHECK: membar
95+; CHECK: cmp
96+; CHECK: movleu %icc
97+; CHECK: cas [%o0]
98+; CHECK: membar
99+define zeroext i32 @test_load_umin_32(i32* %p, i32 zeroext %v) {
100+entry:
101+  %0 = atomicrmw umin i32* %p, i32 %v seq_cst
102+  ret i32 %0
103+}
104Index: lib/Target/Sparc/SparcInstr64Bit.td
105===================================================================
106--- lib/Target/Sparc/SparcInstr64Bit.td
107+++ lib/Target/Sparc/SparcInstr64Bit.td
108@@ -438,6 +438,31 @@ def : Pat<(atomic_store ADDRri:$dst, i64:$val), (S
109 
110 } // Predicates = [Is64Bit]
111 
112+let usesCustomInserter = 1, hasCtrlDep = 1, mayLoad = 1, mayStore = 1,
113+    Defs = [ICC] in
114+multiclass AtomicRMW<SDPatternOperator op32, SDPatternOperator op64> {
115+
116+  def _32 : Pseudo<(outs IntRegs:$rd),
117+                   (ins ptr_rc:$addr, IntRegs:$rs2), "",
118+                   [(set i32:$rd, (op32 iPTR:$addr, i32:$rs2))]>;
119+
120+  let Predicates = [Is64Bit] in
121+  def _64 : Pseudo<(outs I64Regs:$rd),
122+                   (ins ptr_rc:$addr, I64Regs:$rs2), "",
123+                   [(set i64:$rd, (op64 iPTR:$addr, i64:$rs2))]>;
124+}
125+
126+defm ATOMIC_LOAD_ADD  : AtomicRMW<atomic_load_add_32,  atomic_load_add_64>;
127+defm ATOMIC_LOAD_SUB  : AtomicRMW<atomic_load_sub_32,  atomic_load_sub_64>;
128+defm ATOMIC_LOAD_AND  : AtomicRMW<atomic_load_and_32,  atomic_load_and_64>;
129+defm ATOMIC_LOAD_OR   : AtomicRMW<atomic_load_or_32,   atomic_load_or_64>;
130+defm ATOMIC_LOAD_XOR  : AtomicRMW<atomic_load_xor_32,  atomic_load_xor_64>;
131+defm ATOMIC_LOAD_NAND : AtomicRMW<atomic_load_nand_32, atomic_load_nand_64>;
132+defm ATOMIC_LOAD_MIN  : AtomicRMW<atomic_load_min_32,  atomic_load_min_64>;
133+defm ATOMIC_LOAD_MAX  : AtomicRMW<atomic_load_max_32,  atomic_load_max_64>;
134+defm ATOMIC_LOAD_UMIN : AtomicRMW<atomic_load_umin_32, atomic_load_umin_64>;
135+defm ATOMIC_LOAD_UMAX : AtomicRMW<atomic_load_umax_32, atomic_load_umax_64>;
136+
137 // Global addresses, constant pool entries
138 let Predicates = [Is64Bit] in {
139 
140Index: lib/Target/Sparc/SparcISelLowering.cpp
141===================================================================
142--- lib/Target/Sparc/SparcISelLowering.cpp
143+++ lib/Target/Sparc/SparcISelLowering.cpp
144@@ -2831,11 +2831,6 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) cons
145 MachineBasicBlock *
146 SparcTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
147                                                  MachineBasicBlock *BB) const {
148-  const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo();
149-  unsigned BROpcode;
150-  unsigned CC;
151-  DebugLoc dl = MI->getDebugLoc();
152-  // Figure out the conditional branch opcode to use for this select_cc.
153   switch (MI->getOpcode()) {
154   default: llvm_unreachable("Unknown SELECT_CC!");
155   case SP::SELECT_CC_Int_ICC:
156@@ -2842,17 +2837,64 @@ SparcTargetLowering::EmitInstrWithCustomInserter(M
157   case SP::SELECT_CC_FP_ICC:
158   case SP::SELECT_CC_DFP_ICC:
159   case SP::SELECT_CC_QFP_ICC:
160-    BROpcode = SP::BCOND;
161-    break;
162+    return expandSelectCC(MI, BB, SP::BCOND);
163   case SP::SELECT_CC_Int_FCC:
164   case SP::SELECT_CC_FP_FCC:
165   case SP::SELECT_CC_DFP_FCC:
166   case SP::SELECT_CC_QFP_FCC:
167-    BROpcode = SP::FBCOND;
168-    break;
169+    return expandSelectCC(MI, BB, SP::FBCOND);
170+
171+  case SP::ATOMIC_LOAD_ADD_32:
172+    return expandAtomicRMW(MI, BB, SP::ADDrr);
173+  case SP::ATOMIC_LOAD_ADD_64:
174+    return expandAtomicRMW(MI, BB, SP::ADDXrr);
175+  case SP::ATOMIC_LOAD_SUB_32:
176+    return expandAtomicRMW(MI, BB, SP::SUBrr);
177+  case SP::ATOMIC_LOAD_SUB_64:
178+    return expandAtomicRMW(MI, BB, SP::SUBXrr);
179+  case SP::ATOMIC_LOAD_AND_32:
180+    return expandAtomicRMW(MI, BB, SP::ANDrr);
181+  case SP::ATOMIC_LOAD_AND_64:
182+    return expandAtomicRMW(MI, BB, SP::ANDXrr);
183+  case SP::ATOMIC_LOAD_OR_32:
184+    return expandAtomicRMW(MI, BB, SP::ORrr);
185+  case SP::ATOMIC_LOAD_OR_64:
186+    return expandAtomicRMW(MI, BB, SP::ORXrr);
187+  case SP::ATOMIC_LOAD_XOR_32:
188+    return expandAtomicRMW(MI, BB, SP::XORrr);
189+  case SP::ATOMIC_LOAD_XOR_64:
190+    return expandAtomicRMW(MI, BB, SP::XORXrr);
191+  case SP::ATOMIC_LOAD_NAND_32:
192+    return expandAtomicRMW(MI, BB, SP::ANDrr);
193+  case SP::ATOMIC_LOAD_NAND_64:
194+    return expandAtomicRMW(MI, BB, SP::ANDXrr);
195+
196+  case SP::ATOMIC_LOAD_MAX_32:
197+    return expandAtomicRMW(MI, BB, SP::MOVICCrr, SPCC::ICC_G);
198+  case SP::ATOMIC_LOAD_MAX_64:
199+    return expandAtomicRMW(MI, BB, SP::MOVXCCrr, SPCC::ICC_G);
200+  case SP::ATOMIC_LOAD_MIN_32:
201+    return expandAtomicRMW(MI, BB, SP::MOVICCrr, SPCC::ICC_LE);
202+  case SP::ATOMIC_LOAD_MIN_64:
203+    return expandAtomicRMW(MI, BB, SP::MOVXCCrr, SPCC::ICC_LE);
204+  case SP::ATOMIC_LOAD_UMAX_32:
205+    return expandAtomicRMW(MI, BB, SP::MOVICCrr, SPCC::ICC_GU);
206+  case SP::ATOMIC_LOAD_UMAX_64:
207+    return expandAtomicRMW(MI, BB, SP::MOVXCCrr, SPCC::ICC_GU);
208+  case SP::ATOMIC_LOAD_UMIN_32:
209+    return expandAtomicRMW(MI, BB, SP::MOVICCrr, SPCC::ICC_LEU);
210+  case SP::ATOMIC_LOAD_UMIN_64:
211+    return expandAtomicRMW(MI, BB, SP::MOVXCCrr, SPCC::ICC_LEU);
212   }
213+}
214 
215-  CC = (SPCC::CondCodes)MI->getOperand(3).getImm();
216+MachineBasicBlock*
217+SparcTargetLowering::expandSelectCC(MachineInstr *MI,
218+                                    MachineBasicBlock *BB,
219+                                    unsigned BROpcode) const {
220+  const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo();
221+  DebugLoc dl = MI->getDebugLoc();
222+  unsigned CC = (SPCC::CondCodes)MI->getOperand(3).getImm();
223 
224   // To "insert" a SELECT_CC instruction, we actually have to insert the diamond
225   // control-flow pattern.  The incoming instruction knows the destination vreg
226@@ -2906,6 +2948,100 @@ SparcTargetLowering::EmitInstrWithCustomInserter(M
227   return BB;
228 }
229 
230+MachineBasicBlock*
231+SparcTargetLowering::expandAtomicRMW(MachineInstr *MI,
232+                                     MachineBasicBlock *MBB,
233+                                     unsigned Opcode,
234+                                     unsigned CondCode) const {
235+  const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo();
236+  MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
237+  DebugLoc DL = MI->getDebugLoc();
238+
239+  // MI is an atomic read-modify-write instruction of the form:
240+  //
241+  //   rd = atomicrmw<op> addr, rs2
242+  //
243+  // All three operands are registers.
244+  unsigned DestReg = MI->getOperand(0).getReg();
245+  unsigned AddrReg = MI->getOperand(1).getReg();
246+  unsigned Rs2Reg  = MI->getOperand(2).getReg();
247+
248+  // SelectionDAG has already inserted memory barriers before and after MI, so
249+  // we simply have to implement the operatiuon in terms of compare-and-swap.
250+  //
251+  //   %val0 = load %addr
252+  // loop:
253+  //   %val = phi %val0, %dest
254+  //   %upd = op %val, %rs2
255+  //   %dest = cas %addr, %upd, %val
256+  //   cmp %val, %dest
257+  //   bne loop
258+  // done:
259+  //
260+  bool is64Bit = SP::I64RegsRegClass.hasSubClassEq(MRI.getRegClass(DestReg));
261+  const TargetRegisterClass *ValueRC =
262+    is64Bit ? &SP::I64RegsRegClass : &SP::IntRegsRegClass;
263+  unsigned Val0Reg = MRI.createVirtualRegister(ValueRC);
264+
265+  BuildMI(*MBB, MI, DL, TII.get(is64Bit ? SP::LDXri : SP::LDri), Val0Reg)
266+    .addReg(AddrReg).addImm(0);
267+
268+  // Split the basic block MBB before MI and insert the loop block in the hole.
269+  MachineFunction::iterator MFI = MBB;
270+  const BasicBlock *LLVM_BB = MBB->getBasicBlock();
271+  MachineFunction *MF = MBB->getParent();
272+  MachineBasicBlock *LoopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
273+  MachineBasicBlock *DoneMBB = MF->CreateMachineBasicBlock(LLVM_BB);
274+  ++MFI;
275+  MF->insert(MFI, LoopMBB);
276+  MF->insert(MFI, DoneMBB);
277+
278+  // Move MI and following instructions to DoneMBB.
279+  DoneMBB->splice(DoneMBB->begin(), MBB, MI, MBB->end());
280+  DoneMBB->transferSuccessorsAndUpdatePHIs(MBB);
281+
282+  // Connect the CFG again.
283+  MBB->addSuccessor(LoopMBB);
284+  LoopMBB->addSuccessor(LoopMBB);
285+  LoopMBB->addSuccessor(DoneMBB);
286+
287+  // Build the loop block.
288+  unsigned ValReg = MRI.createVirtualRegister(ValueRC);
289+  unsigned UpdReg = MRI.createVirtualRegister(ValueRC);
290+
291+  BuildMI(LoopMBB, DL, TII.get(SP::PHI), ValReg)
292+    .addReg(Val0Reg).addMBB(MBB)
293+    .addReg(DestReg).addMBB(LoopMBB);
294+
295+  if (CondCode) {
296+    // This is one of the min/max operations. We need a CMPrr followed by a
297+    // MOVXCC/MOVICC.
298+    BuildMI(LoopMBB, DL, TII.get(SP::CMPrr)).addReg(ValReg).addReg(Rs2Reg);
299+    BuildMI(LoopMBB, DL, TII.get(Opcode), UpdReg)
300+      .addReg(ValReg).addReg(Rs2Reg).addImm(CondCode);
301+  } else {
302+    BuildMI(LoopMBB, DL, TII.get(Opcode), UpdReg)
303+      .addReg(ValReg).addReg(Rs2Reg);
304+  }
305+
306+  if (MI->getOpcode() == SP::ATOMIC_LOAD_NAND_32 ||
307+      MI->getOpcode() == SP::ATOMIC_LOAD_NAND_64) {
308+    unsigned TmpReg = UpdReg;
309+    UpdReg = MRI.createVirtualRegister(ValueRC);
310+    BuildMI(LoopMBB, DL, TII.get(SP::XORri), UpdReg).addReg(TmpReg).addImm(-1);
311+  }
312+
313+  BuildMI(LoopMBB, DL, TII.get(is64Bit ? SP::CASXrr : SP::CASrr), DestReg)
314+    .addReg(AddrReg).addReg(UpdReg).addReg(ValReg)
315+    .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
316+  BuildMI(LoopMBB, DL, TII.get(SP::CMPrr)).addReg(ValReg).addReg(DestReg);
317+  BuildMI(LoopMBB, DL, TII.get(is64Bit ? SP::BPXCC : SP::BCOND))
318+    .addMBB(LoopMBB).addImm(SPCC::ICC_NE);
319+
320+  MI->eraseFromParent();
321+  return DoneMBB;
322+}
323+
324 //===----------------------------------------------------------------------===//
325 //                         Sparc Inline Assembly Support
326 //===----------------------------------------------------------------------===//
327Index: lib/Target/Sparc/SparcISelLowering.h
328===================================================================
329--- lib/Target/Sparc/SparcISelLowering.h
330+++ lib/Target/Sparc/SparcISelLowering.h
331@@ -165,6 +165,13 @@ namespace llvm {
332     virtual void ReplaceNodeResults(SDNode *N,
333                                     SmallVectorImpl<SDValue>& Results,
334                                     SelectionDAG &DAG) const;
335+
336+    MachineBasicBlock *expandSelectCC(MachineInstr *MI, MachineBasicBlock *BB,
337+                                      unsigned BROpcode) const;
338+    MachineBasicBlock *expandAtomicRMW(MachineInstr *MI,
339+                                       MachineBasicBlock *BB,
340+                                       unsigned Opcode,
341+                                       unsigned CondCode = 0) const;
342   };
343 } // end namespace llvm
344 
345