patch-r275633-llvm-r223171-fix-vectorizer.diff revision 283015
1Pull in r223170 from upstream llvm trunk (by Michael Zolotukhin):
2
3  Apply loop-rotate to several vectorizer tests.
4
5  Such loops shouldn't be vectorized due to the loops form.
6  After applying loop-rotate (+simplifycfg) the tests again start to check
7  what they are intended to check.
8
9Pull in r223171 from upstream llvm trunk (by Michael Zolotukhin):
10
11  PR21302. Vectorize only bottom-tested loops.
12
13  rdar://problem/18886083
14
15This fixes a bug in the llvm vectorizer, which could sometimes cause
16vectorized loops to perform an additional iteration, leading to possible
17buffer overruns.  Symptoms of this, which are usually segfaults, were
18first noticed when building gcc ports, here:
19
20https://lists.freebsd.org/pipermail/freebsd-ports/2014-September/095466.html
21https://lists.freebsd.org/pipermail/freebsd-toolchain/2014-September/001211.html
22
23Introduced here: http://svnweb.freebsd.org/changeset/base/275633
24
25Index: lib/Transforms/Vectorize/LoopVectorize.cpp
26===================================================================
27--- lib/Transforms/Vectorize/LoopVectorize.cpp
28+++ lib/Transforms/Vectorize/LoopVectorize.cpp
29@@ -2864,6 +2864,14 @@ bool LoopVectorizationLegality::canVectorize() {
30   if (!TheLoop->getExitingBlock())
31     return false;
32 
33+  // We only handle bottom-tested loops, i.e. loop in which the condition is
34+  // checked at the end of each iteration. With that we can assume that all
35+  // instructions in the loop are executed the same number of times.
36+  if (TheLoop->getExitingBlock() != TheLoop->getLoopLatch()) {
37+    DEBUG(dbgs() << "LV: loop control flow is not understood by vectorizer\n");
38+    return false;
39+  }
40+
41   // We need to have a loop header.
42   DEBUG(dbgs() << "LV: Found a loop: " <<
43         TheLoop->getHeader()->getName() << '\n');
44Index: test/Transforms/LoopVectorize/loop-form.ll
45===================================================================
46--- test/Transforms/LoopVectorize/loop-form.ll
47+++ test/Transforms/LoopVectorize/loop-form.ll
48@@ -0,0 +1,31 @@
49+; RUN: opt -S -loop-vectorize < %s | FileCheck %s
50+target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128"
51+
52+; Check that we vectorize only bottom-tested loops.
53+; This is a reduced testcase from PR21302.
54+;
55+; rdar://problem/18886083
56+
57+%struct.X = type { i32, i16 }
58+; CHECK-LABEL: @foo(
59+; CHECK-NOT: vector.body
60+
61+define void @foo(i32 %n) {
62+entry:
63+  br label %for.cond
64+
65+for.cond:
66+  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
67+  %cmp = icmp slt i32 %i, %n
68+  br i1 %cmp, label %for.body, label %if.end
69+
70+for.body:
71+  %iprom = sext i32 %i to i64
72+  %b = getelementptr inbounds %struct.X* undef, i64 %iprom, i32 1
73+  store i16 0, i16* %b, align 4
74+  %inc = add nsw i32 %i, 1
75+  br label %for.cond
76+
77+if.end:
78+  ret void
79+}
80Index: test/Transforms/LoopVectorize/runtime-check-address-space.ll
81===================================================================
82--- test/Transforms/LoopVectorize/runtime-check-address-space.ll
83+++ test/Transforms/LoopVectorize/runtime-check-address-space.ll
84@@ -31,25 +31,23 @@ define void @foo(i32 addrspace(1)* %a, i32 addrspa
85 ; CHECK: ret
86 
87 entry:
88-  br label %for.cond
89+  %cmp1 = icmp slt i32 0, %n
90+  br i1 %cmp1, label %for.body, label %for.end
91 
92-for.cond:                                         ; preds = %for.body, %entry
93-  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
94-  %cmp = icmp slt i32 %i.0, %n
95-  br i1 %cmp, label %for.body, label %for.end
96-
97-for.body:                                         ; preds = %for.cond
98-  %idxprom = sext i32 %i.0 to i64
99+for.body:                                         ; preds = %entry, %for.body
100+  %i.02 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
101+  %idxprom = sext i32 %i.02 to i64
102   %arrayidx = getelementptr inbounds i32 addrspace(1)* %b, i64 %idxprom
103   %0 = load i32 addrspace(1)* %arrayidx, align 4
104   %mul = mul nsw i32 %0, 3
105-  %idxprom1 = sext i32 %i.0 to i64
106+  %idxprom1 = sext i32 %i.02 to i64
107   %arrayidx2 = getelementptr inbounds i32 addrspace(1)* %a, i64 %idxprom1
108   store i32 %mul, i32 addrspace(1)* %arrayidx2, align 4
109-  %inc = add nsw i32 %i.0, 1
110-  br label %for.cond
111+  %inc = add nsw i32 %i.02, 1
112+  %cmp = icmp slt i32 %inc, %n
113+  br i1 %cmp, label %for.body, label %for.end
114 
115-for.end:                                          ; preds = %for.cond
116+for.end:                                          ; preds = %for.body, %entry
117   ret void
118 }
119 
120@@ -60,25 +58,23 @@ define void @bar0(i32* %a, i32 addrspace(1)* %b, i
121 ; CHECK: ret
122 
123 entry:
124-  br label %for.cond
125+  %cmp1 = icmp slt i32 0, %n
126+  br i1 %cmp1, label %for.body, label %for.end
127 
128-for.cond:                                         ; preds = %for.body, %entry
129-  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
130-  %cmp = icmp slt i32 %i.0, %n
131-  br i1 %cmp, label %for.body, label %for.end
132-
133-for.body:                                         ; preds = %for.cond
134-  %idxprom = sext i32 %i.0 to i64
135+for.body:                                         ; preds = %entry, %for.body
136+  %i.02 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
137+  %idxprom = sext i32 %i.02 to i64
138   %arrayidx = getelementptr inbounds i32 addrspace(1)* %b, i64 %idxprom
139   %0 = load i32 addrspace(1)* %arrayidx, align 4
140   %mul = mul nsw i32 %0, 3
141-  %idxprom1 = sext i32 %i.0 to i64
142+  %idxprom1 = sext i32 %i.02 to i64
143   %arrayidx2 = getelementptr inbounds i32* %a, i64 %idxprom1
144   store i32 %mul, i32* %arrayidx2, align 4
145-  %inc = add nsw i32 %i.0, 1
146-  br label %for.cond
147+  %inc = add nsw i32 %i.02, 1
148+  %cmp = icmp slt i32 %inc, %n
149+  br i1 %cmp, label %for.body, label %for.end
150 
151-for.end:                                          ; preds = %for.cond
152+for.end:                                          ; preds = %for.body, %entry
153   ret void
154 }
155 
156@@ -89,25 +85,23 @@ define void @bar1(i32 addrspace(1)* %a, i32* %b, i
157 ; CHECK: ret
158 
159 entry:
160-  br label %for.cond
161+  %cmp1 = icmp slt i32 0, %n
162+  br i1 %cmp1, label %for.body, label %for.end
163 
164-for.cond:                                         ; preds = %for.body, %entry
165-  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
166-  %cmp = icmp slt i32 %i.0, %n
167-  br i1 %cmp, label %for.body, label %for.end
168-
169-for.body:                                         ; preds = %for.cond
170-  %idxprom = sext i32 %i.0 to i64
171+for.body:                                         ; preds = %entry, %for.body
172+  %i.02 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
173+  %idxprom = sext i32 %i.02 to i64
174   %arrayidx = getelementptr inbounds i32* %b, i64 %idxprom
175   %0 = load i32* %arrayidx, align 4
176   %mul = mul nsw i32 %0, 3
177-  %idxprom1 = sext i32 %i.0 to i64
178+  %idxprom1 = sext i32 %i.02 to i64
179   %arrayidx2 = getelementptr inbounds i32 addrspace(1)* %a, i64 %idxprom1
180   store i32 %mul, i32 addrspace(1)* %arrayidx2, align 4
181-  %inc = add nsw i32 %i.0, 1
182-  br label %for.cond
183+  %inc = add nsw i32 %i.02, 1
184+  %cmp = icmp slt i32 %inc, %n
185+  br i1 %cmp, label %for.body, label %for.end
186 
187-for.end:                                          ; preds = %for.cond
188+for.end:                                          ; preds = %for.body, %entry
189   ret void
190 }
191 
192@@ -119,25 +113,23 @@ define void @bar2(i32* noalias %a, i32 addrspace(1
193 ; CHECK: ret
194 
195 entry:
196-  br label %for.cond
197+  %cmp1 = icmp slt i32 0, %n
198+  br i1 %cmp1, label %for.body, label %for.end
199 
200-for.cond:                                         ; preds = %for.body, %entry
201-  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
202-  %cmp = icmp slt i32 %i.0, %n
203-  br i1 %cmp, label %for.body, label %for.end
204-
205-for.body:                                         ; preds = %for.cond
206-  %idxprom = sext i32 %i.0 to i64
207+for.body:                                         ; preds = %entry, %for.body
208+  %i.02 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
209+  %idxprom = sext i32 %i.02 to i64
210   %arrayidx = getelementptr inbounds i32 addrspace(1)* %b, i64 %idxprom
211   %0 = load i32 addrspace(1)* %arrayidx, align 4
212   %mul = mul nsw i32 %0, 3
213-  %idxprom1 = sext i32 %i.0 to i64
214+  %idxprom1 = sext i32 %i.02 to i64
215   %arrayidx2 = getelementptr inbounds i32* %a, i64 %idxprom1
216   store i32 %mul, i32* %arrayidx2, align 4
217-  %inc = add nsw i32 %i.0, 1
218-  br label %for.cond
219+  %inc = add nsw i32 %i.02, 1
220+  %cmp = icmp slt i32 %inc, %n
221+  br i1 %cmp, label %for.body, label %for.end
222 
223-for.end:                                          ; preds = %for.cond
224+for.end:                                          ; preds = %for.body, %entry
225   ret void
226 }
227 
228@@ -149,25 +141,23 @@ define void @arst0(i32* %b, i32 %n) #0 {
229 ; CHECK: ret
230 
231 entry:
232-  br label %for.cond
233+  %cmp1 = icmp slt i32 0, %n
234+  br i1 %cmp1, label %for.body, label %for.end
235 
236-for.cond:                                         ; preds = %for.body, %entry
237-  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
238-  %cmp = icmp slt i32 %i.0, %n
239-  br i1 %cmp, label %for.body, label %for.end
240-
241-for.body:                                         ; preds = %for.cond
242-  %idxprom = sext i32 %i.0 to i64
243+for.body:                                         ; preds = %entry, %for.body
244+  %i.02 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
245+  %idxprom = sext i32 %i.02 to i64
246   %arrayidx = getelementptr inbounds i32* %b, i64 %idxprom
247   %0 = load i32* %arrayidx, align 4
248   %mul = mul nsw i32 %0, 3
249-  %idxprom1 = sext i32 %i.0 to i64
250+  %idxprom1 = sext i32 %i.02 to i64
251   %arrayidx2 = getelementptr inbounds [1024 x i32] addrspace(1)* @g_as1, i64 0, i64 %idxprom1
252   store i32 %mul, i32 addrspace(1)* %arrayidx2, align 4
253-  %inc = add nsw i32 %i.0, 1
254-  br label %for.cond
255+  %inc = add nsw i32 %i.02, 1
256+  %cmp = icmp slt i32 %inc, %n
257+  br i1 %cmp, label %for.body, label %for.end
258 
259-for.end:                                          ; preds = %for.cond
260+for.end:                                          ; preds = %for.body, %entry
261   ret void
262 }
263 
264@@ -180,25 +170,23 @@ define void @arst1(i32* %b, i32 %n) #0 {
265 ; CHECK: ret
266 
267 entry:
268-  br label %for.cond
269+  %cmp1 = icmp slt i32 0, %n
270+  br i1 %cmp1, label %for.body, label %for.end
271 
272-for.cond:                                         ; preds = %for.body, %entry
273-  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
274-  %cmp = icmp slt i32 %i.0, %n
275-  br i1 %cmp, label %for.body, label %for.end
276-
277-for.body:                                         ; preds = %for.cond
278-  %idxprom = sext i32 %i.0 to i64
279+for.body:                                         ; preds = %entry, %for.body
280+  %i.02 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
281+  %idxprom = sext i32 %i.02 to i64
282   %arrayidx = getelementptr inbounds [1024 x i32] addrspace(1)* @g_as1, i64 0, i64 %idxprom
283   %0 = load i32 addrspace(1)* %arrayidx, align 4
284   %mul = mul nsw i32 %0, 3
285-  %idxprom1 = sext i32 %i.0 to i64
286+  %idxprom1 = sext i32 %i.02 to i64
287   %arrayidx2 = getelementptr inbounds i32* %b, i64 %idxprom1
288   store i32 %mul, i32* %arrayidx2, align 4
289-  %inc = add nsw i32 %i.0, 1
290-  br label %for.cond
291+  %inc = add nsw i32 %i.02, 1
292+  %cmp = icmp slt i32 %inc, %n
293+  br i1 %cmp, label %for.body, label %for.end
294 
295-for.end:                                          ; preds = %for.cond
296+for.end:                                          ; preds = %for.body, %entry
297   ret void
298 }
299 
300@@ -210,25 +198,23 @@ define void @aoeu(i32 %n) #0 {
301 ; CHECK: ret
302 
303 entry:
304-  br label %for.cond
305+  %cmp1 = icmp slt i32 0, %n
306+  br i1 %cmp1, label %for.body, label %for.end
307 
308-for.cond:                                         ; preds = %for.body, %entry
309-  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
310-  %cmp = icmp slt i32 %i.0, %n
311-  br i1 %cmp, label %for.body, label %for.end
312-
313-for.body:                                         ; preds = %for.cond
314-  %idxprom = sext i32 %i.0 to i64
315+for.body:                                         ; preds = %entry, %for.body
316+  %i.02 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
317+  %idxprom = sext i32 %i.02 to i64
318   %arrayidx = getelementptr inbounds [1024 x i32] addrspace(2)* @q_as2, i64 0, i64 %idxprom
319   %0 = load i32 addrspace(2)* %arrayidx, align 4
320   %mul = mul nsw i32 %0, 3
321-  %idxprom1 = sext i32 %i.0 to i64
322+  %idxprom1 = sext i32 %i.02 to i64
323   %arrayidx2 = getelementptr inbounds [1024 x i32] addrspace(1)* @g_as1, i64 0, i64 %idxprom1
324   store i32 %mul, i32 addrspace(1)* %arrayidx2, align 4
325-  %inc = add nsw i32 %i.0, 1
326-  br label %for.cond
327+  %inc = add nsw i32 %i.02, 1
328+  %cmp = icmp slt i32 %inc, %n
329+  br i1 %cmp, label %for.body, label %for.end
330 
331-for.end:                                          ; preds = %for.cond
332+for.end:                                          ; preds = %for.body, %entry
333   ret void
334 }
335 
336Index: test/Transforms/LoopVectorize/runtime-check-readonly-address-space.ll
337===================================================================
338--- test/Transforms/LoopVectorize/runtime-check-readonly-address-space.ll
339+++ test/Transforms/LoopVectorize/runtime-check-readonly-address-space.ll
340@@ -8,26 +8,24 @@ define void @add_ints_1_1_1(i32 addrspace(1)* %a,
341 ; CHECK-LABEL: @add_ints_1_1_1(
342 ; CHECK: <4 x i32>
343 ; CHECK: ret
344+
345 entry:
346-  br label %for.cond
347+  br label %for.body
348 
349-for.cond:                                         ; preds = %for.body, %entry
350-  %i.0 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
351-  %cmp = icmp ult i64 %i.0, 200
352-  br i1 %cmp, label %for.body, label %for.end
353-
354-for.body:                                         ; preds = %for.cond
355-  %arrayidx = getelementptr inbounds i32 addrspace(1)* %b, i64 %i.0
356+for.body:                                         ; preds = %entry, %for.body
357+  %i.01 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
358+  %arrayidx = getelementptr inbounds i32 addrspace(1)* %b, i64 %i.01
359   %0 = load i32 addrspace(1)* %arrayidx, align 4
360-  %arrayidx1 = getelementptr inbounds i32 addrspace(1)* %c, i64 %i.0
361+  %arrayidx1 = getelementptr inbounds i32 addrspace(1)* %c, i64 %i.01
362   %1 = load i32 addrspace(1)* %arrayidx1, align 4
363   %add = add nsw i32 %0, %1
364-  %arrayidx2 = getelementptr inbounds i32 addrspace(1)* %a, i64 %i.0
365+  %arrayidx2 = getelementptr inbounds i32 addrspace(1)* %a, i64 %i.01
366   store i32 %add, i32 addrspace(1)* %arrayidx2, align 4
367-  %inc = add i64 %i.0, 1
368-  br label %for.cond
369+  %inc = add i64 %i.01, 1
370+  %cmp = icmp ult i64 %inc, 200
371+  br i1 %cmp, label %for.body, label %for.end
372 
373-for.end:                                          ; preds = %for.cond
374+for.end:                                          ; preds = %for.body
375   ret void
376 }
377 
378@@ -35,26 +33,24 @@ define void @add_ints_as_1_0_0(i32 addrspace(1)* %
379 ; CHECK-LABEL: @add_ints_as_1_0_0(
380 ; CHECK-NOT: <4 x i32>
381 ; CHECK: ret
382+
383 entry:
384-  br label %for.cond
385+  br label %for.body
386 
387-for.cond:                                         ; preds = %for.body, %entry
388-  %i.0 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
389-  %cmp = icmp ult i64 %i.0, 200
390-  br i1 %cmp, label %for.body, label %for.end
391-
392-for.body:                                         ; preds = %for.cond
393-  %arrayidx = getelementptr inbounds i32* %b, i64 %i.0
394+for.body:                                         ; preds = %entry, %for.body
395+  %i.01 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
396+  %arrayidx = getelementptr inbounds i32* %b, i64 %i.01
397   %0 = load i32* %arrayidx, align 4
398-  %arrayidx1 = getelementptr inbounds i32* %c, i64 %i.0
399+  %arrayidx1 = getelementptr inbounds i32* %c, i64 %i.01
400   %1 = load i32* %arrayidx1, align 4
401   %add = add nsw i32 %0, %1
402-  %arrayidx2 = getelementptr inbounds i32 addrspace(1)* %a, i64 %i.0
403+  %arrayidx2 = getelementptr inbounds i32 addrspace(1)* %a, i64 %i.01
404   store i32 %add, i32 addrspace(1)* %arrayidx2, align 4
405-  %inc = add i64 %i.0, 1
406-  br label %for.cond
407+  %inc = add i64 %i.01, 1
408+  %cmp = icmp ult i64 %inc, 200
409+  br i1 %cmp, label %for.body, label %for.end
410 
411-for.end:                                          ; preds = %for.cond
412+for.end:                                          ; preds = %for.body
413   ret void
414 }
415 
416@@ -62,26 +58,24 @@ define void @add_ints_as_0_1_0(i32* %a, i32 addrsp
417 ; CHECK-LABEL: @add_ints_as_0_1_0(
418 ; CHECK-NOT: <4 x i32>
419 ; CHECK: ret
420+
421 entry:
422-  br label %for.cond
423+  br label %for.body
424 
425-for.cond:                                         ; preds = %for.body, %entry
426-  %i.0 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
427-  %cmp = icmp ult i64 %i.0, 200
428-  br i1 %cmp, label %for.body, label %for.end
429-
430-for.body:                                         ; preds = %for.cond
431-  %arrayidx = getelementptr inbounds i32 addrspace(1)* %b, i64 %i.0
432+for.body:                                         ; preds = %entry, %for.body
433+  %i.01 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
434+  %arrayidx = getelementptr inbounds i32 addrspace(1)* %b, i64 %i.01
435   %0 = load i32 addrspace(1)* %arrayidx, align 4
436-  %arrayidx1 = getelementptr inbounds i32* %c, i64 %i.0
437+  %arrayidx1 = getelementptr inbounds i32* %c, i64 %i.01
438   %1 = load i32* %arrayidx1, align 4
439   %add = add nsw i32 %0, %1
440-  %arrayidx2 = getelementptr inbounds i32* %a, i64 %i.0
441+  %arrayidx2 = getelementptr inbounds i32* %a, i64 %i.01
442   store i32 %add, i32* %arrayidx2, align 4
443-  %inc = add i64 %i.0, 1
444-  br label %for.cond
445+  %inc = add i64 %i.01, 1
446+  %cmp = icmp ult i64 %inc, 200
447+  br i1 %cmp, label %for.body, label %for.end
448 
449-for.end:                                          ; preds = %for.cond
450+for.end:                                          ; preds = %for.body
451   ret void
452 }
453 
454@@ -89,26 +83,24 @@ define void @add_ints_as_0_1_1(i32* %a, i32 addrsp
455 ; CHECK-LABEL: @add_ints_as_0_1_1(
456 ; CHECK-NOT: <4 x i32>
457 ; CHECK: ret
458+
459 entry:
460-  br label %for.cond
461+  br label %for.body
462 
463-for.cond:                                         ; preds = %for.body, %entry
464-  %i.0 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
465-  %cmp = icmp ult i64 %i.0, 200
466-  br i1 %cmp, label %for.body, label %for.end
467-
468-for.body:                                         ; preds = %for.cond
469-  %arrayidx = getelementptr inbounds i32 addrspace(1)* %b, i64 %i.0
470+for.body:                                         ; preds = %entry, %for.body
471+  %i.01 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
472+  %arrayidx = getelementptr inbounds i32 addrspace(1)* %b, i64 %i.01
473   %0 = load i32 addrspace(1)* %arrayidx, align 4
474-  %arrayidx1 = getelementptr inbounds i32 addrspace(1)* %c, i64 %i.0
475+  %arrayidx1 = getelementptr inbounds i32 addrspace(1)* %c, i64 %i.01
476   %1 = load i32 addrspace(1)* %arrayidx1, align 4
477   %add = add nsw i32 %0, %1
478-  %arrayidx2 = getelementptr inbounds i32* %a, i64 %i.0
479+  %arrayidx2 = getelementptr inbounds i32* %a, i64 %i.01
480   store i32 %add, i32* %arrayidx2, align 4
481-  %inc = add i64 %i.0, 1
482-  br label %for.cond
483+  %inc = add i64 %i.01, 1
484+  %cmp = icmp ult i64 %inc, 200
485+  br i1 %cmp, label %for.body, label %for.end
486 
487-for.end:                                          ; preds = %for.cond
488+for.end:                                          ; preds = %for.body
489   ret void
490 }
491 
492@@ -116,26 +108,24 @@ define void @add_ints_as_0_1_2(i32* %a, i32 addrsp
493 ; CHECK-LABEL: @add_ints_as_0_1_2(
494 ; CHECK-NOT: <4 x i32>
495 ; CHECK: ret
496+
497 entry:
498-  br label %for.cond
499+  br label %for.body
500 
501-for.cond:                                         ; preds = %for.body, %entry
502-  %i.0 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
503-  %cmp = icmp ult i64 %i.0, 200
504-  br i1 %cmp, label %for.body, label %for.end
505-
506-for.body:                                         ; preds = %for.cond
507-  %arrayidx = getelementptr inbounds i32 addrspace(1)* %b, i64 %i.0
508+for.body:                                         ; preds = %entry, %for.body
509+  %i.01 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
510+  %arrayidx = getelementptr inbounds i32 addrspace(1)* %b, i64 %i.01
511   %0 = load i32 addrspace(1)* %arrayidx, align 4
512-  %arrayidx1 = getelementptr inbounds i32 addrspace(2)* %c, i64 %i.0
513+  %arrayidx1 = getelementptr inbounds i32 addrspace(2)* %c, i64 %i.01
514   %1 = load i32 addrspace(2)* %arrayidx1, align 4
515   %add = add nsw i32 %0, %1
516-  %arrayidx2 = getelementptr inbounds i32* %a, i64 %i.0
517+  %arrayidx2 = getelementptr inbounds i32* %a, i64 %i.01
518   store i32 %add, i32* %arrayidx2, align 4
519-  %inc = add i64 %i.0, 1
520-  br label %for.cond
521+  %inc = add i64 %i.01, 1
522+  %cmp = icmp ult i64 %inc, 200
523+  br i1 %cmp, label %for.body, label %for.end
524 
525-for.end:                                          ; preds = %for.cond
526+for.end:                                          ; preds = %for.body
527   ret void
528 }
529 
530