1/* Vectorizer Specific Loop Manipulations
2   Copyright (C) 2003-2015 Free Software Foundation, Inc.
3   Contributed by Dorit Naishlos <dorit@il.ibm.com>
4   and Ira Rosen <irar@il.ibm.com>
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
10Software Foundation; either version 3, or (at your option) any later
11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
19along with GCC; see the file COPYING3.  If not see
20<http://www.gnu.org/licenses/>.  */
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
25#include "dumpfile.h"
26#include "tm.h"
27#include "hash-set.h"
28#include "machmode.h"
29#include "vec.h"
30#include "double-int.h"
31#include "input.h"
32#include "alias.h"
33#include "symtab.h"
34#include "wide-int.h"
35#include "inchash.h"
36#include "tree.h"
37#include "fold-const.h"
38#include "predict.h"
39#include "hard-reg-set.h"
40#include "input.h"
41#include "function.h"
42#include "dominance.h"
43#include "cfg.h"
44#include "cfganal.h"
45#include "basic-block.h"
46#include "gimple-pretty-print.h"
47#include "tree-ssa-alias.h"
48#include "internal-fn.h"
49#include "gimple-expr.h"
50#include "is-a.h"
51#include "gimple.h"
52#include "gimplify.h"
53#include "gimple-iterator.h"
54#include "gimplify-me.h"
55#include "gimple-ssa.h"
56#include "tree-cfg.h"
57#include "tree-phinodes.h"
58#include "ssa-iterators.h"
59#include "stringpool.h"
60#include "tree-ssanames.h"
61#include "tree-ssa-loop-manip.h"
62#include "tree-into-ssa.h"
63#include "tree-ssa.h"
64#include "tree-pass.h"
65#include "cfgloop.h"
66#include "diagnostic-core.h"
67#include "tree-scalar-evolution.h"
68#include "tree-vectorizer.h"
69#include "langhooks.h"
70
71/*************************************************************************
72  Simple Loop Peeling Utilities
73
74  Utilities to support loop peeling for vectorization purposes.
75 *************************************************************************/
76
77
78/* Renames the use *OP_P.  */
79
80static void
81rename_use_op (use_operand_p op_p)
82{
83  tree new_name;
84
85  if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
86    return;
87
88  new_name = get_current_def (USE_FROM_PTR (op_p));
89
90  /* Something defined outside of the loop.  */
91  if (!new_name)
92    return;
93
94  /* An ordinary ssa name defined in the loop.  */
95
96  SET_USE (op_p, new_name);
97}
98
99
100/* Renames the variables in basic block BB.  */
101
102static void
103rename_variables_in_bb (basic_block bb)
104{
105  gimple stmt;
106  use_operand_p use_p;
107  ssa_op_iter iter;
108  edge e;
109  edge_iterator ei;
110  struct loop *loop = bb->loop_father;
111
112  for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
113       gsi_next (&gsi))
114    {
115      stmt = gsi_stmt (gsi);
116      FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES)
117	rename_use_op (use_p);
118    }
119
120  FOR_EACH_EDGE (e, ei, bb->preds)
121    {
122      if (!flow_bb_inside_loop_p (loop, e->src))
123	continue;
124      for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
125	   gsi_next (&gsi))
126        rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi.phi (), e));
127    }
128}
129
130
131typedef struct
132{
133  tree from, to;
134  basic_block bb;
135} adjust_info;
136
137/* A stack of values to be adjusted in debug stmts.  We have to
138   process them LIFO, so that the closest substitution applies.  If we
139   processed them FIFO, without the stack, we might substitute uses
140   with a PHI DEF that would soon become non-dominant, and when we got
141   to the suitable one, it wouldn't have anything to substitute any
142   more.  */
143static vec<adjust_info, va_heap> adjust_vec;
144
145/* Adjust any debug stmts that referenced AI->from values to use the
146   loop-closed AI->to, if the references are dominated by AI->bb and
147   not by the definition of AI->from.  */
148
149static void
150adjust_debug_stmts_now (adjust_info *ai)
151{
152  basic_block bbphi = ai->bb;
153  tree orig_def = ai->from;
154  tree new_def = ai->to;
155  imm_use_iterator imm_iter;
156  gimple stmt;
157  basic_block bbdef = gimple_bb (SSA_NAME_DEF_STMT (orig_def));
158
159  gcc_assert (dom_info_available_p (CDI_DOMINATORS));
160
161  /* Adjust any debug stmts that held onto non-loop-closed
162     references.  */
163  FOR_EACH_IMM_USE_STMT (stmt, imm_iter, orig_def)
164    {
165      use_operand_p use_p;
166      basic_block bbuse;
167
168      if (!is_gimple_debug (stmt))
169	continue;
170
171      gcc_assert (gimple_debug_bind_p (stmt));
172
173      bbuse = gimple_bb (stmt);
174
175      if ((bbuse == bbphi
176	   || dominated_by_p (CDI_DOMINATORS, bbuse, bbphi))
177	  && !(bbuse == bbdef
178	       || dominated_by_p (CDI_DOMINATORS, bbuse, bbdef)))
179	{
180	  if (new_def)
181	    FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
182	      SET_USE (use_p, new_def);
183	  else
184	    {
185	      gimple_debug_bind_reset_value (stmt);
186	      update_stmt (stmt);
187	    }
188	}
189    }
190}
191
192/* Adjust debug stmts as scheduled before.  */
193
194static void
195adjust_vec_debug_stmts (void)
196{
197  if (!MAY_HAVE_DEBUG_STMTS)
198    return;
199
200  gcc_assert (adjust_vec.exists ());
201
202  while (!adjust_vec.is_empty ())
203    {
204      adjust_debug_stmts_now (&adjust_vec.last ());
205      adjust_vec.pop ();
206    }
207
208  adjust_vec.release ();
209}
210
211/* Adjust any debug stmts that referenced FROM values to use the
212   loop-closed TO, if the references are dominated by BB and not by
213   the definition of FROM.  If adjust_vec is non-NULL, adjustments
214   will be postponed until adjust_vec_debug_stmts is called.  */
215
216static void
217adjust_debug_stmts (tree from, tree to, basic_block bb)
218{
219  adjust_info ai;
220
221  if (MAY_HAVE_DEBUG_STMTS
222      && TREE_CODE (from) == SSA_NAME
223      && ! SSA_NAME_IS_DEFAULT_DEF (from)
224      && ! virtual_operand_p (from))
225    {
226      ai.from = from;
227      ai.to = to;
228      ai.bb = bb;
229
230      if (adjust_vec.exists ())
231	adjust_vec.safe_push (ai);
232      else
233	adjust_debug_stmts_now (&ai);
234    }
235}
236
237/* Change E's phi arg in UPDATE_PHI to NEW_DEF, and record information
238   to adjust any debug stmts that referenced the old phi arg,
239   presumably non-loop-closed references left over from other
240   transformations.  */
241
242static void
243adjust_phi_and_debug_stmts (gimple update_phi, edge e, tree new_def)
244{
245  tree orig_def = PHI_ARG_DEF_FROM_EDGE (update_phi, e);
246
247  SET_PHI_ARG_DEF (update_phi, e->dest_idx, new_def);
248
249  if (MAY_HAVE_DEBUG_STMTS)
250    adjust_debug_stmts (orig_def, PHI_RESULT (update_phi),
251			gimple_bb (update_phi));
252}
253
254
255/* Update PHI nodes for a guard of the LOOP.
256
257   Input:
258   - LOOP, GUARD_EDGE: LOOP is a loop for which we added guard code that
259        controls whether LOOP is to be executed.  GUARD_EDGE is the edge that
260        originates from the guard-bb, skips LOOP and reaches the (unique) exit
261        bb of LOOP.  This loop-exit-bb is an empty bb with one successor.
262        We denote this bb NEW_MERGE_BB because before the guard code was added
263        it had a single predecessor (the LOOP header), and now it became a merge
264        point of two paths - the path that ends with the LOOP exit-edge, and
265        the path that ends with GUARD_EDGE.
266   - NEW_EXIT_BB: New basic block that is added by this function between LOOP
267        and NEW_MERGE_BB. It is used to place loop-closed-ssa-form exit-phis.
268
269   ===> The CFG before the guard-code was added:
270        LOOP_header_bb:
271          loop_body
272          if (exit_loop) goto update_bb
273          else           goto LOOP_header_bb
274        update_bb:
275
276   ==> The CFG after the guard-code was added:
277        guard_bb:
278          if (LOOP_guard_condition) goto new_merge_bb
279          else                      goto LOOP_header_bb
280        LOOP_header_bb:
281          loop_body
282          if (exit_loop_condition) goto new_merge_bb
283          else                     goto LOOP_header_bb
284        new_merge_bb:
285          goto update_bb
286        update_bb:
287
288   ==> The CFG after this function:
289        guard_bb:
290          if (LOOP_guard_condition) goto new_merge_bb
291          else                      goto LOOP_header_bb
292        LOOP_header_bb:
293          loop_body
294          if (exit_loop_condition) goto new_exit_bb
295          else                     goto LOOP_header_bb
296        new_exit_bb:
297        new_merge_bb:
298          goto update_bb
299        update_bb:
300
301   This function:
302   1. creates and updates the relevant phi nodes to account for the new
303      incoming edge (GUARD_EDGE) into NEW_MERGE_BB. This involves:
304      1.1. Create phi nodes at NEW_MERGE_BB.
305      1.2. Update the phi nodes at the successor of NEW_MERGE_BB (denoted
306           UPDATE_BB).  UPDATE_BB was the exit-bb of LOOP before NEW_MERGE_BB
307   2. preserves loop-closed-ssa-form by creating the required phi nodes
308      at the exit of LOOP (i.e, in NEW_EXIT_BB).
309
310   There are two flavors to this function:
311
312   slpeel_update_phi_nodes_for_guard1:
313     Here the guard controls whether we enter or skip LOOP, where LOOP is a
314     prolog_loop (loop1 below), and the new phis created in NEW_MERGE_BB are
315     for variables that have phis in the loop header.
316
317   slpeel_update_phi_nodes_for_guard2:
318     Here the guard controls whether we enter or skip LOOP, where LOOP is an
319     epilog_loop (loop2 below), and the new phis created in NEW_MERGE_BB are
320     for variables that have phis in the loop exit.
321
322   I.E., the overall structure is:
323
324        loop1_preheader_bb:
325                guard1 (goto loop1/merge1_bb)
326        loop1
327        loop1_exit_bb:
328                guard2 (goto merge1_bb/merge2_bb)
329        merge1_bb
330        loop2
331        loop2_exit_bb
332        merge2_bb
333        next_bb
334
335   slpeel_update_phi_nodes_for_guard1 takes care of creating phis in
336   loop1_exit_bb and merge1_bb. These are entry phis (phis for the vars
337   that have phis in loop1->header).
338
339   slpeel_update_phi_nodes_for_guard2 takes care of creating phis in
340   loop2_exit_bb and merge2_bb. These are exit phis (phis for the vars
341   that have phis in next_bb). It also adds some of these phis to
342   loop1_exit_bb.
343
344   slpeel_update_phi_nodes_for_guard1 is always called before
345   slpeel_update_phi_nodes_for_guard2. They are both needed in order
346   to create correct data-flow and loop-closed-ssa-form.
347
348   Generally slpeel_update_phi_nodes_for_guard1 creates phis for variables
349   that change between iterations of a loop (and therefore have a phi-node
350   at the loop entry), whereas slpeel_update_phi_nodes_for_guard2 creates
351   phis for variables that are used out of the loop (and therefore have
352   loop-closed exit phis). Some variables may be both updated between
353   iterations and used after the loop. This is why in loop1_exit_bb we
354   may need both entry_phis (created by slpeel_update_phi_nodes_for_guard1)
355   and exit phis (created by slpeel_update_phi_nodes_for_guard2).
356
357   - IS_NEW_LOOP: if IS_NEW_LOOP is true, then LOOP is a newly created copy of
358     an original loop. i.e., we have:
359
360           orig_loop
361           guard_bb (goto LOOP/new_merge)
362           new_loop <-- LOOP
363           new_exit
364           new_merge
365           next_bb
366
367     If IS_NEW_LOOP is false, then LOOP is an original loop, in which case we
368     have:
369
370           new_loop
371           guard_bb (goto LOOP/new_merge)
372           orig_loop <-- LOOP
373           new_exit
374           new_merge
375           next_bb
376
377     The SSA names defined in the original loop have a current
378     reaching definition that that records the corresponding new
379     ssa-name used in the new duplicated loop copy.
380  */
381
382/* Function slpeel_update_phi_nodes_for_guard1
383
384   Input:
385   - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above.
386   - DEFS - a bitmap of ssa names to mark new names for which we recorded
387            information.
388
389   In the context of the overall structure, we have:
390
391        loop1_preheader_bb:
392                guard1 (goto loop1/merge1_bb)
393LOOP->  loop1
394        loop1_exit_bb:
395                guard2 (goto merge1_bb/merge2_bb)
396        merge1_bb
397        loop2
398        loop2_exit_bb
399        merge2_bb
400        next_bb
401
402   For each name updated between loop iterations (i.e - for each name that has
403   an entry (loop-header) phi in LOOP) we create a new phi in:
404   1. merge1_bb (to account for the edge from guard1)
405   2. loop1_exit_bb (an exit-phi to keep LOOP in loop-closed form)
406*/
407
408static void
409slpeel_update_phi_nodes_for_guard1 (edge guard_edge, struct loop *loop,
410                                    bool is_new_loop, basic_block *new_exit_bb)
411{
412  gphi *orig_phi, *new_phi;
413  gphi *update_phi, *update_phi2;
414  tree guard_arg, loop_arg;
415  basic_block new_merge_bb = guard_edge->dest;
416  edge e = EDGE_SUCC (new_merge_bb, 0);
417  basic_block update_bb = e->dest;
418  basic_block orig_bb = loop->header;
419  edge new_exit_e;
420  tree current_new_name;
421  gphi_iterator gsi_orig, gsi_update;
422
423  /* Create new bb between loop and new_merge_bb.  */
424  *new_exit_bb = split_edge (single_exit (loop));
425
426  new_exit_e = EDGE_SUCC (*new_exit_bb, 0);
427
428  for (gsi_orig = gsi_start_phis (orig_bb),
429       gsi_update = gsi_start_phis (update_bb);
430       !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_update);
431       gsi_next (&gsi_orig), gsi_next (&gsi_update))
432    {
433      source_location loop_locus, guard_locus;
434      tree new_res;
435      orig_phi = gsi_orig.phi ();
436      update_phi = gsi_update.phi ();
437
438      /** 1. Handle new-merge-point phis  **/
439
440      /* 1.1. Generate new phi node in NEW_MERGE_BB:  */
441      new_res = copy_ssa_name (PHI_RESULT (orig_phi));
442      new_phi = create_phi_node (new_res, new_merge_bb);
443
444      /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
445            of LOOP. Set the two phi args in NEW_PHI for these edges:  */
446      loop_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, EDGE_SUCC (loop->latch, 0));
447      loop_locus = gimple_phi_arg_location_from_edge (orig_phi,
448						      EDGE_SUCC (loop->latch,
449								 0));
450      guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, loop_preheader_edge (loop));
451      guard_locus
452	= gimple_phi_arg_location_from_edge (orig_phi,
453					     loop_preheader_edge (loop));
454
455      add_phi_arg (new_phi, loop_arg, new_exit_e, loop_locus);
456      add_phi_arg (new_phi, guard_arg, guard_edge, guard_locus);
457
458      /* 1.3. Update phi in successor block.  */
459      gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == loop_arg
460                  || PHI_ARG_DEF_FROM_EDGE (update_phi, e) == guard_arg);
461      adjust_phi_and_debug_stmts (update_phi, e, PHI_RESULT (new_phi));
462      update_phi2 = new_phi;
463
464
465      /** 2. Handle loop-closed-ssa-form phis  **/
466
467      if (virtual_operand_p (PHI_RESULT (orig_phi)))
468	continue;
469
470      /* 2.1. Generate new phi node in NEW_EXIT_BB:  */
471      new_res = copy_ssa_name (PHI_RESULT (orig_phi));
472      new_phi = create_phi_node (new_res, *new_exit_bb);
473
474      /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop.  */
475      add_phi_arg (new_phi, loop_arg, single_exit (loop), loop_locus);
476
477      /* 2.3. Update phi in successor of NEW_EXIT_BB:  */
478      gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg);
479      adjust_phi_and_debug_stmts (update_phi2, new_exit_e,
480				  PHI_RESULT (new_phi));
481
482      /* 2.4. Record the newly created name with set_current_def.
483         We want to find a name such that
484                name = get_current_def (orig_loop_name)
485         and to set its current definition as follows:
486                set_current_def (name, new_phi_name)
487
488         If LOOP is a new loop then loop_arg is already the name we're
489         looking for. If LOOP is the original loop, then loop_arg is
490         the orig_loop_name and the relevant name is recorded in its
491         current reaching definition.  */
492      if (is_new_loop)
493        current_new_name = loop_arg;
494      else
495        {
496          current_new_name = get_current_def (loop_arg);
497	  /* current_def is not available only if the variable does not
498	     change inside the loop, in which case we also don't care
499	     about recording a current_def for it because we won't be
500	     trying to create loop-exit-phis for it.  */
501	  if (!current_new_name)
502	    continue;
503        }
504      tree new_name = get_current_def (current_new_name);
505      /* Because of peeled_chrec optimization it is possible that we have
506	 set this earlier.  Verify the PHI has the same value.  */
507      if (new_name)
508	{
509	  gimple phi = SSA_NAME_DEF_STMT (new_name);
510	  gcc_assert (gimple_code (phi) == GIMPLE_PHI
511		      && gimple_bb (phi) == *new_exit_bb
512		      && (PHI_ARG_DEF_FROM_EDGE (phi, single_exit (loop))
513			  == loop_arg));
514	  continue;
515	}
516
517      set_current_def (current_new_name, PHI_RESULT (new_phi));
518    }
519}
520
521
522/* Function slpeel_update_phi_nodes_for_guard2
523
524   Input:
525   - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above.
526
527   In the context of the overall structure, we have:
528
529        loop1_preheader_bb:
530                guard1 (goto loop1/merge1_bb)
531        loop1
532        loop1_exit_bb:
533                guard2 (goto merge1_bb/merge2_bb)
534        merge1_bb
535LOOP->  loop2
536        loop2_exit_bb
537        merge2_bb
538        next_bb
539
540   For each name used out side the loop (i.e - for each name that has an exit
541   phi in next_bb) we create a new phi in:
542   1. merge2_bb (to account for the edge from guard_bb)
543   2. loop2_exit_bb (an exit-phi to keep LOOP in loop-closed form)
544   3. guard2 bb (an exit phi to keep the preceding loop in loop-closed form),
545      if needed (if it wasn't handled by slpeel_update_phis_nodes_for_phi1).
546*/
547
548static void
549slpeel_update_phi_nodes_for_guard2 (edge guard_edge, struct loop *loop,
550                                    bool is_new_loop, basic_block *new_exit_bb)
551{
552  gphi *orig_phi, *new_phi;
553  gphi *update_phi, *update_phi2;
554  tree guard_arg, loop_arg;
555  basic_block new_merge_bb = guard_edge->dest;
556  edge e = EDGE_SUCC (new_merge_bb, 0);
557  basic_block update_bb = e->dest;
558  edge new_exit_e;
559  tree orig_def, orig_def_new_name;
560  tree new_name, new_name2;
561  tree arg;
562  gphi_iterator gsi;
563
564  /* Create new bb between loop and new_merge_bb.  */
565  *new_exit_bb = split_edge (single_exit (loop));
566
567  new_exit_e = EDGE_SUCC (*new_exit_bb, 0);
568
569  for (gsi = gsi_start_phis (update_bb); !gsi_end_p (gsi); gsi_next (&gsi))
570    {
571      tree new_res;
572      update_phi = gsi.phi ();
573      orig_phi = update_phi;
574      orig_def = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
575      /* This loop-closed-phi actually doesn't represent a use
576         out of the loop - the phi arg is a constant.  */
577      if (TREE_CODE (orig_def) != SSA_NAME)
578        continue;
579      orig_def_new_name = get_current_def (orig_def);
580      arg = NULL_TREE;
581
582      /** 1. Handle new-merge-point phis  **/
583
584      /* 1.1. Generate new phi node in NEW_MERGE_BB:  */
585      new_res = copy_ssa_name (PHI_RESULT (orig_phi));
586      new_phi = create_phi_node (new_res, new_merge_bb);
587
588      /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
589            of LOOP. Set the two PHI args in NEW_PHI for these edges:  */
590      new_name = orig_def;
591      new_name2 = NULL_TREE;
592      if (orig_def_new_name)
593        {
594          new_name = orig_def_new_name;
595	  /* Some variables have both loop-entry-phis and loop-exit-phis.
596	     Such variables were given yet newer names by phis placed in
597	     guard_bb by slpeel_update_phi_nodes_for_guard1. I.e:
598	     new_name2 = get_current_def (get_current_def (orig_name)).  */
599          new_name2 = get_current_def (new_name);
600        }
601
602      if (is_new_loop)
603        {
604          guard_arg = orig_def;
605          loop_arg = new_name;
606        }
607      else
608        {
609          guard_arg = new_name;
610          loop_arg = orig_def;
611        }
612      if (new_name2)
613        guard_arg = new_name2;
614
615      add_phi_arg (new_phi, loop_arg, new_exit_e, UNKNOWN_LOCATION);
616      add_phi_arg (new_phi, guard_arg, guard_edge, UNKNOWN_LOCATION);
617
618      /* 1.3. Update phi in successor block.  */
619      gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == orig_def);
620      adjust_phi_and_debug_stmts (update_phi, e, PHI_RESULT (new_phi));
621      update_phi2 = new_phi;
622
623
624      /** 2. Handle loop-closed-ssa-form phis  **/
625
626      /* 2.1. Generate new phi node in NEW_EXIT_BB:  */
627      new_res = copy_ssa_name (PHI_RESULT (orig_phi));
628      new_phi = create_phi_node (new_res, *new_exit_bb);
629
630      /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop.  */
631      add_phi_arg (new_phi, loop_arg, single_exit (loop), UNKNOWN_LOCATION);
632
633      /* 2.3. Update phi in successor of NEW_EXIT_BB:  */
634      gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg);
635      adjust_phi_and_debug_stmts (update_phi2, new_exit_e,
636				  PHI_RESULT (new_phi));
637
638
639      /** 3. Handle loop-closed-ssa-form phis for first loop  **/
640
641      /* 3.1. Find the relevant names that need an exit-phi in
642	 GUARD_BB, i.e. names for which
643	 slpeel_update_phi_nodes_for_guard1 had not already created a
644	 phi node. This is the case for names that are used outside
645	 the loop (and therefore need an exit phi) but are not updated
646	 across loop iterations (and therefore don't have a
647	 loop-header-phi).
648
649	 slpeel_update_phi_nodes_for_guard1 is responsible for
650	 creating loop-exit phis in GUARD_BB for names that have a
651	 loop-header-phi.  When such a phi is created we also record
652	 the new name in its current definition.  If this new name
653	 exists, then guard_arg was set to this new name (see 1.2
654	 above).  Therefore, if guard_arg is not this new name, this
655	 is an indication that an exit-phi in GUARD_BB was not yet
656	 created, so we take care of it here.  */
657      if (guard_arg == new_name2)
658	continue;
659      arg = guard_arg;
660
661      /* 3.2. Generate new phi node in GUARD_BB:  */
662      new_res = copy_ssa_name (PHI_RESULT (orig_phi));
663      new_phi = create_phi_node (new_res, guard_edge->src);
664
665      /* 3.3. GUARD_BB has one incoming edge:  */
666      gcc_assert (EDGE_COUNT (guard_edge->src->preds) == 1);
667      add_phi_arg (new_phi, arg, EDGE_PRED (guard_edge->src, 0),
668		   UNKNOWN_LOCATION);
669
670      /* 3.4. Update phi in successor of GUARD_BB:  */
671      gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, guard_edge)
672                                                                == guard_arg);
673      adjust_phi_and_debug_stmts (update_phi2, guard_edge,
674				  PHI_RESULT (new_phi));
675    }
676}
677
678
679/* Make the LOOP iterate NITERS times. This is done by adding a new IV
680   that starts at zero, increases by one and its limit is NITERS.
681
682   Assumption: the exit-condition of LOOP is the last stmt in the loop.  */
683
684void
685slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters)
686{
687  tree indx_before_incr, indx_after_incr;
688  gcond *cond_stmt;
689  gcond *orig_cond;
690  edge exit_edge = single_exit (loop);
691  gimple_stmt_iterator loop_cond_gsi;
692  gimple_stmt_iterator incr_gsi;
693  bool insert_after;
694  tree init = build_int_cst (TREE_TYPE (niters), 0);
695  tree step = build_int_cst (TREE_TYPE (niters), 1);
696  source_location loop_loc;
697  enum tree_code code;
698
699  orig_cond = get_loop_exit_condition (loop);
700  gcc_assert (orig_cond);
701  loop_cond_gsi = gsi_for_stmt (orig_cond);
702
703  standard_iv_increment_position (loop, &incr_gsi, &insert_after);
704  create_iv (init, step, NULL_TREE, loop,
705             &incr_gsi, insert_after, &indx_before_incr, &indx_after_incr);
706
707  indx_after_incr = force_gimple_operand_gsi (&loop_cond_gsi, indx_after_incr,
708					      true, NULL_TREE, true,
709					      GSI_SAME_STMT);
710  niters = force_gimple_operand_gsi (&loop_cond_gsi, niters, true, NULL_TREE,
711				     true, GSI_SAME_STMT);
712
713  code = (exit_edge->flags & EDGE_TRUE_VALUE) ? GE_EXPR : LT_EXPR;
714  cond_stmt = gimple_build_cond (code, indx_after_incr, niters, NULL_TREE,
715				 NULL_TREE);
716
717  gsi_insert_before (&loop_cond_gsi, cond_stmt, GSI_SAME_STMT);
718
719  /* Remove old loop exit test:  */
720  gsi_remove (&loop_cond_gsi, true);
721  free_stmt_vec_info (orig_cond);
722
723  loop_loc = find_loop_location (loop);
724  if (dump_enabled_p ())
725    {
726      if (LOCATION_LOCUS (loop_loc) != UNKNOWN_LOCATION)
727	dump_printf (MSG_NOTE, "\nloop at %s:%d: ", LOCATION_FILE (loop_loc),
728		     LOCATION_LINE (loop_loc));
729      dump_gimple_stmt (MSG_NOTE, TDF_SLIM, cond_stmt, 0);
730      dump_printf (MSG_NOTE, "\n");
731    }
732  loop->nb_iterations = niters;
733}
734
735/* Helper routine of slpeel_tree_duplicate_loop_to_edge_cfg.
736   For all PHI arguments in FROM->dest and TO->dest from those
737   edges ensure that TO->dest PHI arguments have current_def
738   to that in from.  */
739
740static void
741slpeel_duplicate_current_defs_from_edges (edge from, edge to)
742{
743  gimple_stmt_iterator gsi_from, gsi_to;
744
745  for (gsi_from = gsi_start_phis (from->dest),
746       gsi_to = gsi_start_phis (to->dest);
747       !gsi_end_p (gsi_from) && !gsi_end_p (gsi_to);
748       gsi_next (&gsi_from), gsi_next (&gsi_to))
749    {
750      gimple from_phi = gsi_stmt (gsi_from);
751      gimple to_phi = gsi_stmt (gsi_to);
752      tree from_arg = PHI_ARG_DEF_FROM_EDGE (from_phi, from);
753      tree to_arg = PHI_ARG_DEF_FROM_EDGE (to_phi, to);
754      if (TREE_CODE (from_arg) == SSA_NAME
755	  && TREE_CODE (to_arg) == SSA_NAME
756	  && get_current_def (to_arg) == NULL_TREE)
757	set_current_def (to_arg, get_current_def (from_arg));
758    }
759}
760
761
762/* Given LOOP this function generates a new copy of it and puts it
763   on E which is either the entry or exit of LOOP.  If SCALAR_LOOP is
764   non-NULL, assume LOOP and SCALAR_LOOP are equivalent and copy the
765   basic blocks from SCALAR_LOOP instead of LOOP, but to either the
766   entry or exit of LOOP.  */
767
768struct loop *
769slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop,
770					struct loop *scalar_loop, edge e)
771{
772  struct loop *new_loop;
773  basic_block *new_bbs, *bbs;
774  bool at_exit;
775  bool was_imm_dom;
776  basic_block exit_dest;
777  edge exit, new_exit;
778
779  exit = single_exit (loop);
780  at_exit = (e == exit);
781  if (!at_exit && e != loop_preheader_edge (loop))
782    return NULL;
783
784  if (scalar_loop == NULL)
785    scalar_loop = loop;
786
787  bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + 1);
788  get_loop_body_with_size (scalar_loop, bbs, scalar_loop->num_nodes);
789
790  /* Check whether duplication is possible.  */
791  if (!can_copy_bbs_p (bbs, scalar_loop->num_nodes))
792    {
793      free (bbs);
794      return NULL;
795    }
796
797  /* Generate new loop structure.  */
798  new_loop = duplicate_loop (scalar_loop, loop_outer (scalar_loop));
799  duplicate_subloops (scalar_loop, new_loop);
800
801  exit_dest = exit->dest;
802  was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
803					  exit_dest) == loop->header ?
804		 true : false);
805
806  /* Also copy the pre-header, this avoids jumping through hoops to
807     duplicate the loop entry PHI arguments.  Create an empty
808     pre-header unconditionally for this.  */
809  basic_block preheader = split_edge (loop_preheader_edge (scalar_loop));
810  edge entry_e = single_pred_edge (preheader);
811  bbs[scalar_loop->num_nodes] = preheader;
812  new_bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + 1);
813
814  exit = single_exit (scalar_loop);
815  copy_bbs (bbs, scalar_loop->num_nodes + 1, new_bbs,
816	    &exit, 1, &new_exit, NULL,
817	    e->src, true);
818  exit = single_exit (loop);
819  basic_block new_preheader = new_bbs[scalar_loop->num_nodes];
820
821  add_phi_args_after_copy (new_bbs, scalar_loop->num_nodes + 1, NULL);
822
823  if (scalar_loop != loop)
824    {
825      /* If we copied from SCALAR_LOOP rather than LOOP, SSA_NAMEs from
826	 SCALAR_LOOP will have current_def set to SSA_NAMEs in the new_loop,
827	 but LOOP will not.  slpeel_update_phi_nodes_for_guard{1,2} expects
828	 the LOOP SSA_NAMEs (on the exit edge and edge from latch to
829	 header) to have current_def set, so copy them over.  */
830      slpeel_duplicate_current_defs_from_edges (single_exit (scalar_loop),
831						exit);
832      slpeel_duplicate_current_defs_from_edges (EDGE_SUCC (scalar_loop->latch,
833							   0),
834						EDGE_SUCC (loop->latch, 0));
835    }
836
837  if (at_exit) /* Add the loop copy at exit.  */
838    {
839      if (scalar_loop != loop)
840	{
841	  gphi_iterator gsi;
842	  new_exit = redirect_edge_and_branch (new_exit, exit_dest);
843
844	  for (gsi = gsi_start_phis (exit_dest); !gsi_end_p (gsi);
845	       gsi_next (&gsi))
846	    {
847	      gphi *phi = gsi.phi ();
848	      tree orig_arg = PHI_ARG_DEF_FROM_EDGE (phi, e);
849	      location_t orig_locus
850		= gimple_phi_arg_location_from_edge (phi, e);
851
852	      add_phi_arg (phi, orig_arg, new_exit, orig_locus);
853	    }
854	}
855      redirect_edge_and_branch_force (e, new_preheader);
856      flush_pending_stmts (e);
857      set_immediate_dominator (CDI_DOMINATORS, new_preheader, e->src);
858      if (was_imm_dom)
859	set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_exit->src);
860
861      /* And remove the non-necessary forwarder again.  Keep the other
862         one so we have a proper pre-header for the loop at the exit edge.  */
863      redirect_edge_pred (single_succ_edge (preheader),
864			  single_pred (preheader));
865      delete_basic_block (preheader);
866      set_immediate_dominator (CDI_DOMINATORS, scalar_loop->header,
867			       loop_preheader_edge (scalar_loop)->src);
868    }
869  else /* Add the copy at entry.  */
870    {
871      if (scalar_loop != loop)
872	{
873	  /* Remove the non-necessary forwarder of scalar_loop again.  */
874	  redirect_edge_pred (single_succ_edge (preheader),
875			      single_pred (preheader));
876	  delete_basic_block (preheader);
877	  set_immediate_dominator (CDI_DOMINATORS, scalar_loop->header,
878				   loop_preheader_edge (scalar_loop)->src);
879	  preheader = split_edge (loop_preheader_edge (loop));
880	  entry_e = single_pred_edge (preheader);
881	}
882
883      redirect_edge_and_branch_force (entry_e, new_preheader);
884      flush_pending_stmts (entry_e);
885      set_immediate_dominator (CDI_DOMINATORS, new_preheader, entry_e->src);
886
887      redirect_edge_and_branch_force (new_exit, preheader);
888      flush_pending_stmts (new_exit);
889      set_immediate_dominator (CDI_DOMINATORS, preheader, new_exit->src);
890
891      /* And remove the non-necessary forwarder again.  Keep the other
892         one so we have a proper pre-header for the loop at the exit edge.  */
893      redirect_edge_pred (single_succ_edge (new_preheader),
894			  single_pred (new_preheader));
895      delete_basic_block (new_preheader);
896      set_immediate_dominator (CDI_DOMINATORS, new_loop->header,
897			       loop_preheader_edge (new_loop)->src);
898    }
899
900  for (unsigned i = 0; i < scalar_loop->num_nodes + 1; i++)
901    rename_variables_in_bb (new_bbs[i]);
902
903  if (scalar_loop != loop)
904    {
905      /* Update new_loop->header PHIs, so that on the preheader
906	 edge they are the ones from loop rather than scalar_loop.  */
907      gphi_iterator gsi_orig, gsi_new;
908      edge orig_e = loop_preheader_edge (loop);
909      edge new_e = loop_preheader_edge (new_loop);
910
911      for (gsi_orig = gsi_start_phis (loop->header),
912	   gsi_new = gsi_start_phis (new_loop->header);
913	   !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_new);
914	   gsi_next (&gsi_orig), gsi_next (&gsi_new))
915	{
916	  gphi *orig_phi = gsi_orig.phi ();
917	  gphi *new_phi = gsi_new.phi ();
918	  tree orig_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, orig_e);
919	  location_t orig_locus
920	    = gimple_phi_arg_location_from_edge (orig_phi, orig_e);
921
922	  add_phi_arg (new_phi, orig_arg, new_e, orig_locus);
923	}
924    }
925
926  free (new_bbs);
927  free (bbs);
928
929#ifdef ENABLE_CHECKING
930  verify_dominators (CDI_DOMINATORS);
931#endif
932
933  return new_loop;
934}
935
936
937/* Given the condition statement COND, put it as the last statement
938   of GUARD_BB; EXIT_BB is the basic block to skip the loop;
939   Assumes that this is the single exit of the guarded loop.
940   Returns the skip edge, inserts new stmts on the COND_EXPR_STMT_LIST.  */
941
942static edge
943slpeel_add_loop_guard (basic_block guard_bb, tree cond,
944		       gimple_seq cond_expr_stmt_list,
945		       basic_block exit_bb, basic_block dom_bb,
946		       int probability)
947{
948  gimple_stmt_iterator gsi;
949  edge new_e, enter_e;
950  gcond *cond_stmt;
951  gimple_seq gimplify_stmt_list = NULL;
952
953  enter_e = EDGE_SUCC (guard_bb, 0);
954  enter_e->flags &= ~EDGE_FALLTHRU;
955  enter_e->flags |= EDGE_FALSE_VALUE;
956  gsi = gsi_last_bb (guard_bb);
957
958  cond = force_gimple_operand_1 (cond, &gimplify_stmt_list, is_gimple_condexpr,
959				 NULL_TREE);
960  if (gimplify_stmt_list)
961    gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list);
962  cond_stmt = gimple_build_cond_from_tree (cond, NULL_TREE, NULL_TREE);
963  if (cond_expr_stmt_list)
964    gsi_insert_seq_after (&gsi, cond_expr_stmt_list, GSI_NEW_STMT);
965
966  gsi = gsi_last_bb (guard_bb);
967  gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
968
969  /* Add new edge to connect guard block to the merge/loop-exit block.  */
970  new_e = make_edge (guard_bb, exit_bb, EDGE_TRUE_VALUE);
971
972  new_e->count = guard_bb->count;
973  new_e->probability = probability;
974  new_e->count = apply_probability (enter_e->count, probability);
975  enter_e->count -= new_e->count;
976  enter_e->probability = inverse_probability (probability);
977  set_immediate_dominator (CDI_DOMINATORS, exit_bb, dom_bb);
978  return new_e;
979}
980
981
982/* This function verifies that the following restrictions apply to LOOP:
983   (1) it is innermost
984   (2) it consists of exactly 2 basic blocks - header, and an empty latch.
985   (3) it is single entry, single exit
986   (4) its exit condition is the last stmt in the header
987   (5) E is the entry/exit edge of LOOP.
988 */
989
990bool
991slpeel_can_duplicate_loop_p (const struct loop *loop, const_edge e)
992{
993  edge exit_e = single_exit (loop);
994  edge entry_e = loop_preheader_edge (loop);
995  gcond *orig_cond = get_loop_exit_condition (loop);
996  gimple_stmt_iterator loop_exit_gsi = gsi_last_bb (exit_e->src);
997
998  if (loop->inner
999      /* All loops have an outer scope; the only case loop->outer is NULL is for
1000         the function itself.  */
1001      || !loop_outer (loop)
1002      || loop->num_nodes != 2
1003      || !empty_block_p (loop->latch)
1004      || !single_exit (loop)
1005      /* Verify that new loop exit condition can be trivially modified.  */
1006      || (!orig_cond || orig_cond != gsi_stmt (loop_exit_gsi))
1007      || (e != exit_e && e != entry_e))
1008    return false;
1009
1010  return true;
1011}
1012
1013#ifdef ENABLE_CHECKING
1014static void
1015slpeel_verify_cfg_after_peeling (struct loop *first_loop,
1016                                 struct loop *second_loop)
1017{
1018  basic_block loop1_exit_bb = single_exit (first_loop)->dest;
1019  basic_block loop2_entry_bb = loop_preheader_edge (second_loop)->src;
1020  basic_block loop1_entry_bb = loop_preheader_edge (first_loop)->src;
1021
1022  /* A guard that controls whether the second_loop is to be executed or skipped
1023     is placed in first_loop->exit.  first_loop->exit therefore has two
1024     successors - one is the preheader of second_loop, and the other is a bb
1025     after second_loop.
1026   */
1027  gcc_assert (EDGE_COUNT (loop1_exit_bb->succs) == 2);
1028
1029  /* 1. Verify that one of the successors of first_loop->exit is the preheader
1030        of second_loop.  */
1031
1032  /* The preheader of new_loop is expected to have two predecessors:
1033     first_loop->exit and the block that precedes first_loop.  */
1034
1035  gcc_assert (EDGE_COUNT (loop2_entry_bb->preds) == 2
1036              && ((EDGE_PRED (loop2_entry_bb, 0)->src == loop1_exit_bb
1037                   && EDGE_PRED (loop2_entry_bb, 1)->src == loop1_entry_bb)
1038               || (EDGE_PRED (loop2_entry_bb, 1)->src ==  loop1_exit_bb
1039                   && EDGE_PRED (loop2_entry_bb, 0)->src == loop1_entry_bb)));
1040
1041  /* Verify that the other successor of first_loop->exit is after the
1042     second_loop.  */
1043  /* TODO */
1044}
1045#endif
1046
1047/* If the run time cost model check determines that vectorization is
1048   not profitable and hence scalar loop should be generated then set
1049   FIRST_NITERS to prologue peeled iterations. This will allow all the
1050   iterations to be executed in the prologue peeled scalar loop.  */
1051
1052static void
1053set_prologue_iterations (basic_block bb_before_first_loop,
1054			 tree *first_niters,
1055			 struct loop *loop,
1056			 unsigned int th,
1057			 int probability)
1058{
1059  edge e;
1060  basic_block cond_bb, then_bb;
1061  tree var, prologue_after_cost_adjust_name;
1062  gimple_stmt_iterator gsi;
1063  gphi *newphi;
1064  edge e_true, e_false, e_fallthru;
1065  gcond *cond_stmt;
1066  gimple_seq stmts = NULL;
1067  tree cost_pre_condition = NULL_TREE;
1068  tree scalar_loop_iters =
1069    unshare_expr (LOOP_VINFO_NITERS_UNCHANGED (loop_vec_info_for_loop (loop)));
1070
1071  e = single_pred_edge (bb_before_first_loop);
1072  cond_bb = split_edge (e);
1073
1074  e = single_pred_edge (bb_before_first_loop);
1075  then_bb = split_edge (e);
1076  set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
1077
1078  e_false = make_single_succ_edge (cond_bb, bb_before_first_loop,
1079				   EDGE_FALSE_VALUE);
1080  set_immediate_dominator (CDI_DOMINATORS, bb_before_first_loop, cond_bb);
1081
1082  e_true = EDGE_PRED (then_bb, 0);
1083  e_true->flags &= ~EDGE_FALLTHRU;
1084  e_true->flags |= EDGE_TRUE_VALUE;
1085
1086  e_true->probability = probability;
1087  e_false->probability = inverse_probability (probability);
1088  e_true->count = apply_probability (cond_bb->count, probability);
1089  e_false->count = cond_bb->count - e_true->count;
1090  then_bb->frequency = EDGE_FREQUENCY (e_true);
1091  then_bb->count = e_true->count;
1092
1093  e_fallthru = EDGE_SUCC (then_bb, 0);
1094  e_fallthru->count = then_bb->count;
1095
1096  gsi = gsi_last_bb (cond_bb);
1097  cost_pre_condition =
1098    fold_build2 (LE_EXPR, boolean_type_node, scalar_loop_iters,
1099	         build_int_cst (TREE_TYPE (scalar_loop_iters), th));
1100  cost_pre_condition =
1101    force_gimple_operand_gsi_1 (&gsi, cost_pre_condition, is_gimple_condexpr,
1102				NULL_TREE, false, GSI_CONTINUE_LINKING);
1103  cond_stmt = gimple_build_cond_from_tree (cost_pre_condition,
1104					   NULL_TREE, NULL_TREE);
1105  gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
1106
1107  var = create_tmp_var (TREE_TYPE (scalar_loop_iters),
1108			"prologue_after_cost_adjust");
1109  prologue_after_cost_adjust_name =
1110    force_gimple_operand (scalar_loop_iters, &stmts, false, var);
1111
1112  gsi = gsi_last_bb (then_bb);
1113  if (stmts)
1114    gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT);
1115
1116  newphi = create_phi_node (var, bb_before_first_loop);
1117  add_phi_arg (newphi, prologue_after_cost_adjust_name, e_fallthru,
1118	       UNKNOWN_LOCATION);
1119  add_phi_arg (newphi, *first_niters, e_false, UNKNOWN_LOCATION);
1120
1121  *first_niters = PHI_RESULT (newphi);
1122}
1123
1124/* Function slpeel_tree_peel_loop_to_edge.
1125
1126   Peel the first (last) iterations of LOOP into a new prolog (epilog) loop
1127   that is placed on the entry (exit) edge E of LOOP. After this transformation
1128   we have two loops one after the other - first-loop iterates FIRST_NITERS
1129   times, and second-loop iterates the remainder NITERS - FIRST_NITERS times.
1130   If the cost model indicates that it is profitable to emit a scalar
1131   loop instead of the vector one, then the prolog (epilog) loop will iterate
1132   for the entire unchanged scalar iterations of the loop.
1133
1134   Input:
1135   - LOOP: the loop to be peeled.
1136   - SCALAR_LOOP: if non-NULL, the alternate loop from which basic blocks
1137	should be copied.
1138   - E: the exit or entry edge of LOOP.
1139        If it is the entry edge, we peel the first iterations of LOOP. In this
1140        case first-loop is LOOP, and second-loop is the newly created loop.
1141        If it is the exit edge, we peel the last iterations of LOOP. In this
1142        case, first-loop is the newly created loop, and second-loop is LOOP.
1143   - NITERS: the number of iterations that LOOP iterates.
1144   - FIRST_NITERS: the number of iterations that the first-loop should iterate.
1145   - UPDATE_FIRST_LOOP_COUNT:  specified whether this function is responsible
1146        for updating the loop bound of the first-loop to FIRST_NITERS.  If it
1147        is false, the caller of this function may want to take care of this
1148        (this can be useful if we don't want new stmts added to first-loop).
1149   - TH: cost model profitability threshold of iterations for vectorization.
1150   - CHECK_PROFITABILITY: specify whether cost model check has not occurred
1151                          during versioning and hence needs to occur during
1152			  prologue generation or whether cost model check
1153			  has not occurred during prologue generation and hence
1154			  needs to occur during epilogue generation.
1155   - BOUND1 is the upper bound on number of iterations of the first loop (if known)
1156   - BOUND2 is the upper bound on number of iterations of the second loop (if known)
1157
1158
1159   Output:
1160   The function returns a pointer to the new loop-copy, or NULL if it failed
1161   to perform the transformation.
1162
1163   The function generates two if-then-else guards: one before the first loop,
1164   and the other before the second loop:
1165   The first guard is:
1166     if (FIRST_NITERS == 0) then skip the first loop,
1167     and go directly to the second loop.
1168   The second guard is:
1169     if (FIRST_NITERS == NITERS) then skip the second loop.
1170
1171   If the optional COND_EXPR and COND_EXPR_STMT_LIST arguments are given
1172   then the generated condition is combined with COND_EXPR and the
1173   statements in COND_EXPR_STMT_LIST are emitted together with it.
1174
1175   FORNOW only simple loops are supported (see slpeel_can_duplicate_loop_p).
1176   FORNOW the resulting code will not be in loop-closed-ssa form.
1177*/
1178
1179static struct loop *
1180slpeel_tree_peel_loop_to_edge (struct loop *loop, struct loop *scalar_loop,
1181			       edge e, tree *first_niters,
1182			       tree niters, bool update_first_loop_count,
1183			       unsigned int th, bool check_profitability,
1184			       tree cond_expr, gimple_seq cond_expr_stmt_list,
1185			       int bound1, int bound2)
1186{
1187  struct loop *new_loop = NULL, *first_loop, *second_loop;
1188  edge skip_e;
1189  tree pre_condition = NULL_TREE;
1190  basic_block bb_before_second_loop, bb_after_second_loop;
1191  basic_block bb_before_first_loop;
1192  basic_block bb_between_loops;
1193  basic_block new_exit_bb;
1194  gphi_iterator gsi;
1195  edge exit_e = single_exit (loop);
1196  source_location loop_loc;
1197  /* There are many aspects to how likely the first loop is going to be executed.
1198     Without histogram we can't really do good job.  Simply set it to
1199     2/3, so the first loop is not reordered to the end of function and
1200     the hot path through stays short.  */
1201  int first_guard_probability = 2 * REG_BR_PROB_BASE / 3;
1202  int second_guard_probability = 2 * REG_BR_PROB_BASE / 3;
1203  int probability_of_second_loop;
1204
1205  if (!slpeel_can_duplicate_loop_p (loop, e))
1206    return NULL;
1207
1208  /* We might have a queued need to update virtual SSA form.  As we
1209     delete the update SSA machinery below after doing a regular
1210     incremental SSA update during loop copying make sure we don't
1211     lose that fact.
1212     ???  Needing to update virtual SSA form by renaming is unfortunate
1213     but not all of the vectorizer code inserting new loads / stores
1214     properly assigns virtual operands to those statements.  */
1215  update_ssa (TODO_update_ssa_only_virtuals);
1216
1217  /* If the loop has a virtual PHI, but exit bb doesn't, create a virtual PHI
1218     in the exit bb and rename all the uses after the loop.  This simplifies
1219     the *guard[12] routines, which assume loop closed SSA form for all PHIs
1220     (but normally loop closed SSA form doesn't require virtual PHIs to be
1221     in the same form).  Doing this early simplifies the checking what
1222     uses should be renamed.  */
1223  for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
1224    if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi))))
1225      {
1226	gphi *phi = gsi.phi ();
1227	for (gsi = gsi_start_phis (exit_e->dest);
1228	     !gsi_end_p (gsi); gsi_next (&gsi))
1229	  if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi))))
1230	    break;
1231	if (gsi_end_p (gsi))
1232	  {
1233	    tree new_vop = copy_ssa_name (PHI_RESULT (phi));
1234	    gphi *new_phi = create_phi_node (new_vop, exit_e->dest);
1235	    tree vop = PHI_ARG_DEF_FROM_EDGE (phi, EDGE_SUCC (loop->latch, 0));
1236	    imm_use_iterator imm_iter;
1237	    gimple stmt;
1238	    use_operand_p use_p;
1239
1240	    add_phi_arg (new_phi, vop, exit_e, UNKNOWN_LOCATION);
1241	    gimple_phi_set_result (new_phi, new_vop);
1242	    FOR_EACH_IMM_USE_STMT (stmt, imm_iter, vop)
1243	      if (stmt != new_phi && gimple_bb (stmt) != loop->header)
1244		FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1245		  SET_USE (use_p, new_vop);
1246	  }
1247	break;
1248      }
1249
1250  /* 1. Generate a copy of LOOP and put it on E (E is the entry/exit of LOOP).
1251        Resulting CFG would be:
1252
1253        first_loop:
1254        do {
1255        } while ...
1256
1257        second_loop:
1258        do {
1259        } while ...
1260
1261        orig_exit_bb:
1262   */
1263
1264  if (!(new_loop = slpeel_tree_duplicate_loop_to_edge_cfg (loop, scalar_loop,
1265							   e)))
1266    {
1267      loop_loc = find_loop_location (loop);
1268      dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
1269                       "tree_duplicate_loop_to_edge_cfg failed.\n");
1270      return NULL;
1271    }
1272
1273  if (MAY_HAVE_DEBUG_STMTS)
1274    {
1275      gcc_assert (!adjust_vec.exists ());
1276      adjust_vec.create (32);
1277    }
1278
1279  if (e == exit_e)
1280    {
1281      /* NEW_LOOP was placed after LOOP.  */
1282      first_loop = loop;
1283      second_loop = new_loop;
1284    }
1285  else
1286    {
1287      /* NEW_LOOP was placed before LOOP.  */
1288      first_loop = new_loop;
1289      second_loop = loop;
1290    }
1291
1292  /* 2.  Add the guard code in one of the following ways:
1293
1294     2.a Add the guard that controls whether the first loop is executed.
1295         This occurs when this function is invoked for prologue or epilogue
1296	 generation and when the cost model check can be done at compile time.
1297
1298         Resulting CFG would be:
1299
1300         bb_before_first_loop:
1301         if (FIRST_NITERS == 0) GOTO bb_before_second_loop
1302                                GOTO first-loop
1303
1304         first_loop:
1305         do {
1306         } while ...
1307
1308         bb_before_second_loop:
1309
1310         second_loop:
1311         do {
1312         } while ...
1313
1314         orig_exit_bb:
1315
1316     2.b Add the cost model check that allows the prologue
1317         to iterate for the entire unchanged scalar
1318         iterations of the loop in the event that the cost
1319         model indicates that the scalar loop is more
1320         profitable than the vector one. This occurs when
1321	 this function is invoked for prologue generation
1322	 and the cost model check needs to be done at run
1323	 time.
1324
1325         Resulting CFG after prologue peeling would be:
1326
1327         if (scalar_loop_iterations <= th)
1328           FIRST_NITERS = scalar_loop_iterations
1329
1330         bb_before_first_loop:
1331         if (FIRST_NITERS == 0) GOTO bb_before_second_loop
1332                                GOTO first-loop
1333
1334         first_loop:
1335         do {
1336         } while ...
1337
1338         bb_before_second_loop:
1339
1340         second_loop:
1341         do {
1342         } while ...
1343
1344         orig_exit_bb:
1345
1346     2.c Add the cost model check that allows the epilogue
1347         to iterate for the entire unchanged scalar
1348         iterations of the loop in the event that the cost
1349         model indicates that the scalar loop is more
1350         profitable than the vector one. This occurs when
1351	 this function is invoked for epilogue generation
1352	 and the cost model check needs to be done at run
1353	 time.  This check is combined with any pre-existing
1354	 check in COND_EXPR to avoid versioning.
1355
1356         Resulting CFG after prologue peeling would be:
1357
1358         bb_before_first_loop:
1359         if ((scalar_loop_iterations <= th)
1360             ||
1361             FIRST_NITERS == 0) GOTO bb_before_second_loop
1362                                GOTO first-loop
1363
1364         first_loop:
1365         do {
1366         } while ...
1367
1368         bb_before_second_loop:
1369
1370         second_loop:
1371         do {
1372         } while ...
1373
1374         orig_exit_bb:
1375  */
1376
1377  bb_before_first_loop = split_edge (loop_preheader_edge (first_loop));
1378  /* Loop copying insterted a forwarder block for us here.  */
1379  bb_before_second_loop = single_exit (first_loop)->dest;
1380
1381  probability_of_second_loop = (inverse_probability (first_guard_probability)
1382			        + combine_probabilities (second_guard_probability,
1383                                                         first_guard_probability));
1384  /* Theoretically preheader edge of first loop and exit edge should have
1385     same frequencies.  Loop exit probablities are however easy to get wrong.
1386     It is safer to copy value from original loop entry.  */
1387  bb_before_second_loop->frequency
1388     = combine_probabilities (bb_before_first_loop->frequency,
1389                              probability_of_second_loop);
1390  bb_before_second_loop->count
1391     = apply_probability (bb_before_first_loop->count,
1392			  probability_of_second_loop);
1393  single_succ_edge (bb_before_second_loop)->count
1394     = bb_before_second_loop->count;
1395
1396  /* Epilogue peeling.  */
1397  if (!update_first_loop_count)
1398    {
1399      loop_vec_info loop_vinfo = loop_vec_info_for_loop (loop);
1400      tree scalar_loop_iters = LOOP_VINFO_NITERSM1 (loop_vinfo);
1401      unsigned limit = LOOP_VINFO_VECT_FACTOR (loop_vinfo) - 1;
1402      if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
1403	limit = limit + 1;
1404      if (check_profitability
1405	  && th > limit)
1406	limit = th;
1407      pre_condition =
1408	fold_build2 (LT_EXPR, boolean_type_node, scalar_loop_iters,
1409		     build_int_cst (TREE_TYPE (scalar_loop_iters), limit));
1410      if (cond_expr)
1411	{
1412	  pre_condition =
1413	    fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
1414			 pre_condition,
1415			 fold_build1 (TRUTH_NOT_EXPR, boolean_type_node,
1416				      cond_expr));
1417	}
1418    }
1419
1420  /* Prologue peeling.  */
1421  else
1422    {
1423      if (check_profitability)
1424	set_prologue_iterations (bb_before_first_loop, first_niters,
1425				 loop, th, first_guard_probability);
1426
1427      pre_condition =
1428	fold_build2 (LE_EXPR, boolean_type_node, *first_niters,
1429		     build_int_cst (TREE_TYPE (*first_niters), 0));
1430    }
1431
1432  skip_e = slpeel_add_loop_guard (bb_before_first_loop, pre_condition,
1433				  cond_expr_stmt_list,
1434                                  bb_before_second_loop, bb_before_first_loop,
1435				  inverse_probability (first_guard_probability));
1436  scale_loop_profile (first_loop, first_guard_probability,
1437		      check_profitability && (int)th > bound1 ? th : bound1);
1438  slpeel_update_phi_nodes_for_guard1 (skip_e, first_loop,
1439				      first_loop == new_loop,
1440				      &new_exit_bb);
1441
1442
1443  /* 3. Add the guard that controls whether the second loop is executed.
1444        Resulting CFG would be:
1445
1446        bb_before_first_loop:
1447        if (FIRST_NITERS == 0) GOTO bb_before_second_loop (skip first loop)
1448                               GOTO first-loop
1449
1450        first_loop:
1451        do {
1452        } while ...
1453
1454        bb_between_loops:
1455        if (FIRST_NITERS == NITERS) GOTO bb_after_second_loop (skip second loop)
1456                                    GOTO bb_before_second_loop
1457
1458        bb_before_second_loop:
1459
1460        second_loop:
1461        do {
1462        } while ...
1463
1464        bb_after_second_loop:
1465
1466        orig_exit_bb:
1467   */
1468
1469  bb_between_loops = new_exit_bb;
1470  bb_after_second_loop = split_edge (single_exit (second_loop));
1471
1472  pre_condition =
1473	fold_build2 (EQ_EXPR, boolean_type_node, *first_niters, niters);
1474  skip_e = slpeel_add_loop_guard (bb_between_loops, pre_condition, NULL,
1475                                  bb_after_second_loop, bb_before_first_loop,
1476				  inverse_probability (second_guard_probability));
1477  scale_loop_profile (second_loop, probability_of_second_loop, bound2);
1478  slpeel_update_phi_nodes_for_guard2 (skip_e, second_loop,
1479                                     second_loop == new_loop, &new_exit_bb);
1480
1481  /* 4. Make first-loop iterate FIRST_NITERS times, if requested.
1482   */
1483  if (update_first_loop_count)
1484    slpeel_make_loop_iterate_ntimes (first_loop, *first_niters);
1485
1486  delete_update_ssa ();
1487
1488  adjust_vec_debug_stmts ();
1489
1490  return new_loop;
1491}
1492
1493/* Function vect_get_loop_location.
1494
1495   Extract the location of the loop in the source code.
1496   If the loop is not well formed for vectorization, an estimated
1497   location is calculated.
1498   Return the loop location if succeed and NULL if not.  */
1499
1500source_location
1501find_loop_location (struct loop *loop)
1502{
1503  gimple stmt = NULL;
1504  basic_block bb;
1505  gimple_stmt_iterator si;
1506
1507  if (!loop)
1508    return UNKNOWN_LOCATION;
1509
1510  stmt = get_loop_exit_condition (loop);
1511
1512  if (stmt
1513      && LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
1514    return gimple_location (stmt);
1515
1516  /* If we got here the loop is probably not "well formed",
1517     try to estimate the loop location */
1518
1519  if (!loop->header)
1520    return UNKNOWN_LOCATION;
1521
1522  bb = loop->header;
1523
1524  for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1525    {
1526      stmt = gsi_stmt (si);
1527      if (LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
1528        return gimple_location (stmt);
1529    }
1530
1531  return UNKNOWN_LOCATION;
1532}
1533
1534
1535/* Function vect_can_advance_ivs_p
1536
1537   In case the number of iterations that LOOP iterates is unknown at compile
1538   time, an epilog loop will be generated, and the loop induction variables
1539   (IVs) will be "advanced" to the value they are supposed to take just before
1540   the epilog loop.  Here we check that the access function of the loop IVs
1541   and the expression that represents the loop bound are simple enough.
1542   These restrictions will be relaxed in the future.  */
1543
1544bool
1545vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
1546{
1547  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1548  basic_block bb = loop->header;
1549  gimple phi;
1550  gphi_iterator gsi;
1551
1552  /* Analyze phi functions of the loop header.  */
1553
1554  if (dump_enabled_p ())
1555    dump_printf_loc (MSG_NOTE, vect_location, "vect_can_advance_ivs_p:\n");
1556  for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1557    {
1558      tree evolution_part;
1559
1560      phi = gsi.phi ();
1561      if (dump_enabled_p ())
1562	{
1563          dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: ");
1564          dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
1565          dump_printf (MSG_NOTE, "\n");
1566	}
1567
1568      /* Skip virtual phi's. The data dependences that are associated with
1569         virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.  */
1570
1571      if (virtual_operand_p (PHI_RESULT (phi)))
1572	{
1573	  if (dump_enabled_p ())
1574	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1575                             "virtual phi. skip.\n");
1576	  continue;
1577	}
1578
1579      /* Skip reduction phis.  */
1580
1581      if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
1582        {
1583          if (dump_enabled_p ())
1584            dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1585                             "reduc phi. skip.\n");
1586          continue;
1587        }
1588
1589      /* Analyze the evolution function.  */
1590
1591      evolution_part
1592	= STMT_VINFO_LOOP_PHI_EVOLUTION_PART (vinfo_for_stmt (phi));
1593      if (evolution_part == NULL_TREE)
1594        {
1595	  if (dump_enabled_p ())
1596	    dump_printf (MSG_MISSED_OPTIMIZATION,
1597			 "No access function or evolution.\n");
1598	  return false;
1599        }
1600
1601      /* FORNOW: We do not transform initial conditions of IVs
1602	 which evolution functions are a polynomial of degree >= 2.  */
1603
1604      if (tree_is_chrec (evolution_part))
1605	return false;
1606    }
1607
1608  return true;
1609}
1610
1611
1612/*   Function vect_update_ivs_after_vectorizer.
1613
1614     "Advance" the induction variables of LOOP to the value they should take
1615     after the execution of LOOP.  This is currently necessary because the
1616     vectorizer does not handle induction variables that are used after the
1617     loop.  Such a situation occurs when the last iterations of LOOP are
1618     peeled, because:
1619     1. We introduced new uses after LOOP for IVs that were not originally used
1620        after LOOP: the IVs of LOOP are now used by an epilog loop.
1621     2. LOOP is going to be vectorized; this means that it will iterate N/VF
1622        times, whereas the loop IVs should be bumped N times.
1623
1624     Input:
1625     - LOOP - a loop that is going to be vectorized. The last few iterations
1626              of LOOP were peeled.
1627     - NITERS - the number of iterations that LOOP executes (before it is
1628                vectorized). i.e, the number of times the ivs should be bumped.
1629     - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
1630                  coming out from LOOP on which there are uses of the LOOP ivs
1631		  (this is the path from LOOP->exit to epilog_loop->preheader).
1632
1633                  The new definitions of the ivs are placed in LOOP->exit.
1634                  The phi args associated with the edge UPDATE_E in the bb
1635                  UPDATE_E->dest are updated accordingly.
1636
1637     Assumption 1: Like the rest of the vectorizer, this function assumes
1638     a single loop exit that has a single predecessor.
1639
1640     Assumption 2: The phi nodes in the LOOP header and in update_bb are
1641     organized in the same order.
1642
1643     Assumption 3: The access function of the ivs is simple enough (see
1644     vect_can_advance_ivs_p).  This assumption will be relaxed in the future.
1645
1646     Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
1647     coming out of LOOP on which the ivs of LOOP are used (this is the path
1648     that leads to the epilog loop; other paths skip the epilog loop).  This
1649     path starts with the edge UPDATE_E, and its destination (denoted update_bb)
1650     needs to have its phis updated.
1651 */
1652
1653static void
1654vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo, tree niters,
1655				  edge update_e)
1656{
1657  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1658  basic_block exit_bb = single_exit (loop)->dest;
1659  gphi *phi, *phi1;
1660  gphi_iterator gsi, gsi1;
1661  basic_block update_bb = update_e->dest;
1662
1663  gcc_checking_assert (vect_can_advance_ivs_p (loop_vinfo));
1664
1665  /* Make sure there exists a single-predecessor exit bb:  */
1666  gcc_assert (single_pred_p (exit_bb));
1667
1668  for (gsi = gsi_start_phis (loop->header), gsi1 = gsi_start_phis (update_bb);
1669       !gsi_end_p (gsi) && !gsi_end_p (gsi1);
1670       gsi_next (&gsi), gsi_next (&gsi1))
1671    {
1672      tree init_expr;
1673      tree step_expr, off;
1674      tree type;
1675      tree var, ni, ni_name;
1676      gimple_stmt_iterator last_gsi;
1677      stmt_vec_info stmt_info;
1678
1679      phi = gsi.phi ();
1680      phi1 = gsi1.phi ();
1681      if (dump_enabled_p ())
1682        {
1683          dump_printf_loc (MSG_NOTE, vect_location,
1684                           "vect_update_ivs_after_vectorizer: phi: ");
1685	  dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
1686          dump_printf (MSG_NOTE, "\n");
1687        }
1688
1689      /* Skip virtual phi's.  */
1690      if (virtual_operand_p (PHI_RESULT (phi)))
1691	{
1692	  if (dump_enabled_p ())
1693	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1694                             "virtual phi. skip.\n");
1695	  continue;
1696	}
1697
1698      /* Skip reduction phis.  */
1699      stmt_info = vinfo_for_stmt (phi);
1700      if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
1701        {
1702	  if (dump_enabled_p ())
1703	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1704                             "reduc phi. skip.\n");
1705          continue;
1706        }
1707
1708      type = TREE_TYPE (gimple_phi_result (phi));
1709      step_expr = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_info);
1710      step_expr = unshare_expr (step_expr);
1711
1712      /* FORNOW: We do not support IVs whose evolution function is a polynomial
1713         of degree >= 2 or exponential.  */
1714      gcc_assert (!tree_is_chrec (step_expr));
1715
1716      init_expr = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1717
1718      off = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
1719			 fold_convert (TREE_TYPE (step_expr), niters),
1720			 step_expr);
1721      if (POINTER_TYPE_P (type))
1722	ni = fold_build_pointer_plus (init_expr, off);
1723      else
1724	ni = fold_build2 (PLUS_EXPR, type,
1725			  init_expr, fold_convert (type, off));
1726
1727      var = create_tmp_var (type, "tmp");
1728
1729      last_gsi = gsi_last_bb (exit_bb);
1730      ni_name = force_gimple_operand_gsi (&last_gsi, ni, false, var,
1731					  true, GSI_SAME_STMT);
1732
1733      /* Fix phi expressions in the successor bb.  */
1734      adjust_phi_and_debug_stmts (phi1, update_e, ni_name);
1735    }
1736}
1737
1738/* Function vect_do_peeling_for_loop_bound
1739
1740   Peel the last iterations of the loop represented by LOOP_VINFO.
1741   The peeled iterations form a new epilog loop.  Given that the loop now
1742   iterates NITERS times, the new epilog loop iterates
1743   NITERS % VECTORIZATION_FACTOR times.
1744
1745   The original loop will later be made to iterate
1746   NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO).
1747
1748   COND_EXPR and COND_EXPR_STMT_LIST are combined with a new generated
1749   test.  */
1750
1751void
1752vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo,
1753				tree ni_name, tree ratio_mult_vf_name,
1754				unsigned int th, bool check_profitability)
1755{
1756  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1757  struct loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
1758  struct loop *new_loop;
1759  edge update_e;
1760  basic_block preheader;
1761  int loop_num;
1762  int max_iter;
1763  tree cond_expr = NULL_TREE;
1764  gimple_seq cond_expr_stmt_list = NULL;
1765
1766  if (dump_enabled_p ())
1767    dump_printf_loc (MSG_NOTE, vect_location,
1768                     "=== vect_do_peeling_for_loop_bound ===\n");
1769
1770  initialize_original_copy_tables ();
1771
1772  loop_num  = loop->num;
1773
1774  new_loop
1775    = slpeel_tree_peel_loop_to_edge (loop, scalar_loop, single_exit (loop),
1776				     &ratio_mult_vf_name, ni_name, false,
1777				     th, check_profitability,
1778				     cond_expr, cond_expr_stmt_list,
1779				     0, LOOP_VINFO_VECT_FACTOR (loop_vinfo));
1780  gcc_assert (new_loop);
1781  gcc_assert (loop_num == loop->num);
1782#ifdef ENABLE_CHECKING
1783  slpeel_verify_cfg_after_peeling (loop, new_loop);
1784#endif
1785
1786  /* A guard that controls whether the new_loop is to be executed or skipped
1787     is placed in LOOP->exit.  LOOP->exit therefore has two successors - one
1788     is the preheader of NEW_LOOP, where the IVs from LOOP are used.  The other
1789     is a bb after NEW_LOOP, where these IVs are not used.  Find the edge that
1790     is on the path where the LOOP IVs are used and need to be updated.  */
1791
1792  preheader = loop_preheader_edge (new_loop)->src;
1793  if (EDGE_PRED (preheader, 0)->src == single_exit (loop)->dest)
1794    update_e = EDGE_PRED (preheader, 0);
1795  else
1796    update_e = EDGE_PRED (preheader, 1);
1797
1798  /* Update IVs of original loop as if they were advanced
1799     by ratio_mult_vf_name steps.  */
1800  vect_update_ivs_after_vectorizer (loop_vinfo, ratio_mult_vf_name, update_e);
1801
1802  /* For vectorization factor N, we need to copy last N-1 values in epilogue
1803     and this means N-2 loopback edge executions.
1804
1805     PEELING_FOR_GAPS works by subtracting last iteration and thus the epilogue
1806     will execute at least LOOP_VINFO_VECT_FACTOR times.  */
1807  max_iter = (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
1808	      ? LOOP_VINFO_VECT_FACTOR (loop_vinfo) * 2
1809	      : LOOP_VINFO_VECT_FACTOR (loop_vinfo)) - 2;
1810  if (check_profitability)
1811    max_iter = MAX (max_iter, (int) th - 1);
1812  record_niter_bound (new_loop, max_iter, false, true);
1813  dump_printf (MSG_NOTE,
1814               "Setting upper bound of nb iterations for epilogue "
1815               "loop to %d\n", max_iter);
1816
1817  /* After peeling we have to reset scalar evolution analyzer.  */
1818  scev_reset ();
1819
1820  free_original_copy_tables ();
1821}
1822
1823
1824/* Function vect_gen_niters_for_prolog_loop
1825
1826   Set the number of iterations for the loop represented by LOOP_VINFO
1827   to the minimum between LOOP_NITERS (the original iteration count of the loop)
1828   and the misalignment of DR - the data reference recorded in
1829   LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO).  As a result, after the execution of
1830   this loop, the data reference DR will refer to an aligned location.
1831
1832   The following computation is generated:
1833
1834   If the misalignment of DR is known at compile time:
1835     addr_mis = int mis = DR_MISALIGNMENT (dr);
1836   Else, compute address misalignment in bytes:
1837     addr_mis = addr & (vectype_align - 1)
1838
1839   prolog_niters = min (LOOP_NITERS, ((VF - addr_mis/elem_size)&(VF-1))/step)
1840
1841   (elem_size = element type size; an element is the scalar element whose type
1842   is the inner type of the vectype)
1843
1844   When the step of the data-ref in the loop is not 1 (as in interleaved data
1845   and SLP), the number of iterations of the prolog must be divided by the step
1846   (which is equal to the size of interleaved group).
1847
1848   The above formulas assume that VF == number of elements in the vector. This
1849   may not hold when there are multiple-types in the loop.
1850   In this case, for some data-references in the loop the VF does not represent
1851   the number of elements that fit in the vector.  Therefore, instead of VF we
1852   use TYPE_VECTOR_SUBPARTS.  */
1853
1854static tree
1855vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters, int *bound)
1856{
1857  struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
1858  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1859  tree var;
1860  gimple_seq stmts;
1861  tree iters, iters_name;
1862  edge pe;
1863  basic_block new_bb;
1864  gimple dr_stmt = DR_STMT (dr);
1865  stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
1866  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1867  int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
1868  tree niters_type = TREE_TYPE (loop_niters);
1869  int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1870
1871  pe = loop_preheader_edge (loop);
1872
1873  if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
1874    {
1875      int npeel = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
1876
1877      if (dump_enabled_p ())
1878        dump_printf_loc (MSG_NOTE, vect_location,
1879                         "known peeling = %d.\n", npeel);
1880
1881      iters = build_int_cst (niters_type, npeel);
1882      *bound = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
1883    }
1884  else
1885    {
1886      gimple_seq new_stmts = NULL;
1887      bool negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
1888      tree offset = negative
1889	  ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : NULL_TREE;
1890      tree start_addr = vect_create_addr_base_for_vector_ref (dr_stmt,
1891						&new_stmts, offset, loop);
1892      tree type = unsigned_type_for (TREE_TYPE (start_addr));
1893      tree vectype_align_minus_1 = build_int_cst (type, vectype_align - 1);
1894      HOST_WIDE_INT elem_size =
1895                int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
1896      tree elem_size_log = build_int_cst (type, exact_log2 (elem_size));
1897      tree nelements_minus_1 = build_int_cst (type, nelements - 1);
1898      tree nelements_tree = build_int_cst (type, nelements);
1899      tree byte_misalign;
1900      tree elem_misalign;
1901
1902      new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmts);
1903      gcc_assert (!new_bb);
1904
1905      /* Create:  byte_misalign = addr & (vectype_align - 1)  */
1906      byte_misalign =
1907        fold_build2 (BIT_AND_EXPR, type, fold_convert (type, start_addr),
1908                     vectype_align_minus_1);
1909
1910      /* Create:  elem_misalign = byte_misalign / element_size  */
1911      elem_misalign =
1912        fold_build2 (RSHIFT_EXPR, type, byte_misalign, elem_size_log);
1913
1914      /* Create:  (niters_type) (nelements - elem_misalign)&(nelements - 1)  */
1915      if (negative)
1916	iters = fold_build2 (MINUS_EXPR, type, elem_misalign, nelements_tree);
1917      else
1918	iters = fold_build2 (MINUS_EXPR, type, nelements_tree, elem_misalign);
1919      iters = fold_build2 (BIT_AND_EXPR, type, iters, nelements_minus_1);
1920      iters = fold_convert (niters_type, iters);
1921      *bound = nelements;
1922    }
1923
1924  /* Create:  prolog_loop_niters = min (iters, loop_niters) */
1925  /* If the loop bound is known at compile time we already verified that it is
1926     greater than vf; since the misalignment ('iters') is at most vf, there's
1927     no need to generate the MIN_EXPR in this case.  */
1928  if (TREE_CODE (loop_niters) != INTEGER_CST)
1929    iters = fold_build2 (MIN_EXPR, niters_type, iters, loop_niters);
1930
1931  if (dump_enabled_p ())
1932    {
1933      dump_printf_loc (MSG_NOTE, vect_location,
1934                       "niters for prolog loop: ");
1935      dump_generic_expr (MSG_NOTE, TDF_SLIM, iters);
1936      dump_printf (MSG_NOTE, "\n");
1937    }
1938
1939  var = create_tmp_var (niters_type, "prolog_loop_niters");
1940  stmts = NULL;
1941  iters_name = force_gimple_operand (iters, &stmts, false, var);
1942
1943  /* Insert stmt on loop preheader edge.  */
1944  if (stmts)
1945    {
1946      basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
1947      gcc_assert (!new_bb);
1948    }
1949
1950  return iters_name;
1951}
1952
1953
1954/* Function vect_update_init_of_dr
1955
1956   NITERS iterations were peeled from LOOP.  DR represents a data reference
1957   in LOOP.  This function updates the information recorded in DR to
1958   account for the fact that the first NITERS iterations had already been
1959   executed.  Specifically, it updates the OFFSET field of DR.  */
1960
1961static void
1962vect_update_init_of_dr (struct data_reference *dr, tree niters)
1963{
1964  tree offset = DR_OFFSET (dr);
1965
1966  niters = fold_build2 (MULT_EXPR, sizetype,
1967			fold_convert (sizetype, niters),
1968			fold_convert (sizetype, DR_STEP (dr)));
1969  offset = fold_build2 (PLUS_EXPR, sizetype,
1970			fold_convert (sizetype, offset), niters);
1971  DR_OFFSET (dr) = offset;
1972}
1973
1974
1975/* Function vect_update_inits_of_drs
1976
1977   NITERS iterations were peeled from the loop represented by LOOP_VINFO.
1978   This function updates the information recorded for the data references in
1979   the loop to account for the fact that the first NITERS iterations had
1980   already been executed.  Specifically, it updates the initial_condition of
1981   the access_function of all the data_references in the loop.  */
1982
1983static void
1984vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
1985{
1986  unsigned int i;
1987  vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1988  struct data_reference *dr;
1989
1990 if (dump_enabled_p ())
1991    dump_printf_loc (MSG_NOTE, vect_location,
1992                     "=== vect_update_inits_of_dr ===\n");
1993
1994  FOR_EACH_VEC_ELT (datarefs, i, dr)
1995    vect_update_init_of_dr (dr, niters);
1996}
1997
1998
1999/* Function vect_do_peeling_for_alignment
2000
2001   Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
2002   'niters' is set to the misalignment of one of the data references in the
2003   loop, thereby forcing it to refer to an aligned location at the beginning
2004   of the execution of this loop.  The data reference for which we are
2005   peeling is recorded in LOOP_VINFO_UNALIGNED_DR.  */
2006
2007void
2008vect_do_peeling_for_alignment (loop_vec_info loop_vinfo, tree ni_name,
2009			       unsigned int th, bool check_profitability)
2010{
2011  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2012  struct loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
2013  tree niters_of_prolog_loop;
2014  tree wide_prolog_niters;
2015  struct loop *new_loop;
2016  int max_iter;
2017  int bound = 0;
2018
2019  if (dump_enabled_p ())
2020    dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2021                     "loop peeled for vectorization to enhance"
2022                     " alignment\n");
2023
2024  initialize_original_copy_tables ();
2025
2026  gimple_seq stmts = NULL;
2027  gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
2028  niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo,
2029							   ni_name,
2030							   &bound);
2031
2032  /* Peel the prolog loop and iterate it niters_of_prolog_loop.  */
2033  new_loop =
2034    slpeel_tree_peel_loop_to_edge (loop, scalar_loop,
2035				   loop_preheader_edge (loop),
2036				   &niters_of_prolog_loop, ni_name, true,
2037				   th, check_profitability, NULL_TREE, NULL,
2038				   bound, 0);
2039
2040  gcc_assert (new_loop);
2041#ifdef ENABLE_CHECKING
2042  slpeel_verify_cfg_after_peeling (new_loop, loop);
2043#endif
2044  /* For vectorization factor N, we need to copy at most N-1 values
2045     for alignment and this means N-2 loopback edge executions.  */
2046  max_iter = LOOP_VINFO_VECT_FACTOR (loop_vinfo) - 2;
2047  if (check_profitability)
2048    max_iter = MAX (max_iter, (int) th - 1);
2049  record_niter_bound (new_loop, max_iter, false, true);
2050  dump_printf (MSG_NOTE,
2051               "Setting upper bound of nb iterations for prologue "
2052               "loop to %d\n", max_iter);
2053
2054  /* Update number of times loop executes.  */
2055  LOOP_VINFO_NITERS (loop_vinfo) = fold_build2 (MINUS_EXPR,
2056		TREE_TYPE (ni_name), ni_name, niters_of_prolog_loop);
2057  LOOP_VINFO_NITERSM1 (loop_vinfo) = fold_build2 (MINUS_EXPR,
2058		TREE_TYPE (ni_name),
2059		LOOP_VINFO_NITERSM1 (loop_vinfo), niters_of_prolog_loop);
2060
2061  if (types_compatible_p (sizetype, TREE_TYPE (niters_of_prolog_loop)))
2062    wide_prolog_niters = niters_of_prolog_loop;
2063  else
2064    {
2065      gimple_seq seq = NULL;
2066      edge pe = loop_preheader_edge (loop);
2067      tree wide_iters = fold_convert (sizetype, niters_of_prolog_loop);
2068      tree var = create_tmp_var (sizetype, "prolog_loop_adjusted_niters");
2069      wide_prolog_niters = force_gimple_operand (wide_iters, &seq, false,
2070                                                 var);
2071      if (seq)
2072	{
2073	  /* Insert stmt on loop preheader edge.  */
2074          basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2075          gcc_assert (!new_bb);
2076        }
2077    }
2078
2079  /* Update the init conditions of the access functions of all data refs.  */
2080  vect_update_inits_of_drs (loop_vinfo, wide_prolog_niters);
2081
2082  /* After peeling we have to reset scalar evolution analyzer.  */
2083  scev_reset ();
2084
2085  free_original_copy_tables ();
2086}
2087
2088
2089/* Function vect_create_cond_for_align_checks.
2090
2091   Create a conditional expression that represents the alignment checks for
2092   all of data references (array element references) whose alignment must be
2093   checked at runtime.
2094
2095   Input:
2096   COND_EXPR  - input conditional expression.  New conditions will be chained
2097                with logical AND operation.
2098   LOOP_VINFO - two fields of the loop information are used.
2099                LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
2100                LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
2101
2102   Output:
2103   COND_EXPR_STMT_LIST - statements needed to construct the conditional
2104                         expression.
2105   The returned value is the conditional expression to be used in the if
2106   statement that controls which version of the loop gets executed at runtime.
2107
2108   The algorithm makes two assumptions:
2109     1) The number of bytes "n" in a vector is a power of 2.
2110     2) An address "a" is aligned if a%n is zero and that this
2111        test can be done as a&(n-1) == 0.  For example, for 16
2112        byte vectors the test is a&0xf == 0.  */
2113
2114static void
2115vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
2116                                   tree *cond_expr,
2117				   gimple_seq *cond_expr_stmt_list)
2118{
2119  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2120  vec<gimple> may_misalign_stmts
2121    = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
2122  gimple ref_stmt;
2123  int mask = LOOP_VINFO_PTR_MASK (loop_vinfo);
2124  tree mask_cst;
2125  unsigned int i;
2126  tree int_ptrsize_type;
2127  char tmp_name[20];
2128  tree or_tmp_name = NULL_TREE;
2129  tree and_tmp_name;
2130  gimple and_stmt;
2131  tree ptrsize_zero;
2132  tree part_cond_expr;
2133
2134  /* Check that mask is one less than a power of 2, i.e., mask is
2135     all zeros followed by all ones.  */
2136  gcc_assert ((mask != 0) && ((mask & (mask+1)) == 0));
2137
2138  int_ptrsize_type = signed_type_for (ptr_type_node);
2139
2140  /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address
2141     of the first vector of the i'th data reference. */
2142
2143  FOR_EACH_VEC_ELT (may_misalign_stmts, i, ref_stmt)
2144    {
2145      gimple_seq new_stmt_list = NULL;
2146      tree addr_base;
2147      tree addr_tmp_name;
2148      tree new_or_tmp_name;
2149      gimple addr_stmt, or_stmt;
2150      stmt_vec_info stmt_vinfo = vinfo_for_stmt (ref_stmt);
2151      tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2152      bool negative = tree_int_cst_compare
2153	(DR_STEP (STMT_VINFO_DATA_REF (stmt_vinfo)), size_zero_node) < 0;
2154      tree offset = negative
2155	? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : NULL_TREE;
2156
2157      /* create: addr_tmp = (int)(address_of_first_vector) */
2158      addr_base =
2159	vect_create_addr_base_for_vector_ref (ref_stmt, &new_stmt_list,
2160					      offset, loop);
2161      if (new_stmt_list != NULL)
2162	gimple_seq_add_seq (cond_expr_stmt_list, new_stmt_list);
2163
2164      sprintf (tmp_name, "addr2int%d", i);
2165      addr_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
2166      addr_stmt = gimple_build_assign (addr_tmp_name, NOP_EXPR, addr_base);
2167      gimple_seq_add_stmt (cond_expr_stmt_list, addr_stmt);
2168
2169      /* The addresses are OR together.  */
2170
2171      if (or_tmp_name != NULL_TREE)
2172        {
2173          /* create: or_tmp = or_tmp | addr_tmp */
2174          sprintf (tmp_name, "orptrs%d", i);
2175	  new_or_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
2176	  or_stmt = gimple_build_assign (new_or_tmp_name, BIT_IOR_EXPR,
2177					 or_tmp_name, addr_tmp_name);
2178	  gimple_seq_add_stmt (cond_expr_stmt_list, or_stmt);
2179          or_tmp_name = new_or_tmp_name;
2180        }
2181      else
2182        or_tmp_name = addr_tmp_name;
2183
2184    } /* end for i */
2185
2186  mask_cst = build_int_cst (int_ptrsize_type, mask);
2187
2188  /* create: and_tmp = or_tmp & mask  */
2189  and_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, "andmask");
2190
2191  and_stmt = gimple_build_assign (and_tmp_name, BIT_AND_EXPR,
2192				  or_tmp_name, mask_cst);
2193  gimple_seq_add_stmt (cond_expr_stmt_list, and_stmt);
2194
2195  /* Make and_tmp the left operand of the conditional test against zero.
2196     if and_tmp has a nonzero bit then some address is unaligned.  */
2197  ptrsize_zero = build_int_cst (int_ptrsize_type, 0);
2198  part_cond_expr = fold_build2 (EQ_EXPR, boolean_type_node,
2199				and_tmp_name, ptrsize_zero);
2200  if (*cond_expr)
2201    *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2202			      *cond_expr, part_cond_expr);
2203  else
2204    *cond_expr = part_cond_expr;
2205}
2206
2207/* Function vect_create_cond_for_alias_checks.
2208
2209   Create a conditional expression that represents the run-time checks for
2210   overlapping of address ranges represented by a list of data references
2211   relations passed as input.
2212
2213   Input:
2214   COND_EXPR  - input conditional expression.  New conditions will be chained
2215                with logical AND operation.  If it is NULL, then the function
2216                is used to return the number of alias checks.
2217   LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
2218	        to be checked.
2219
2220   Output:
2221   COND_EXPR - conditional expression.
2222
2223   The returned COND_EXPR is the conditional expression to be used in the if
2224   statement that controls which version of the loop gets executed at runtime.
2225*/
2226
2227void
2228vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo, tree * cond_expr)
2229{
2230  vec<dr_with_seg_len_pair_t> comp_alias_ddrs =
2231    LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo);
2232  tree part_cond_expr;
2233
2234  /* Create expression
2235     ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
2236     || (load_ptr_0 + load_segment_length_0) <= store_ptr_0))
2237     &&
2238     ...
2239     &&
2240     ((store_ptr_n + store_segment_length_n) <= load_ptr_n)
2241     || (load_ptr_n + load_segment_length_n) <= store_ptr_n))  */
2242
2243  if (comp_alias_ddrs.is_empty ())
2244    return;
2245
2246  for (size_t i = 0, s = comp_alias_ddrs.length (); i < s; ++i)
2247    {
2248      const dr_with_seg_len& dr_a = comp_alias_ddrs[i].first;
2249      const dr_with_seg_len& dr_b = comp_alias_ddrs[i].second;
2250      tree segment_length_a = dr_a.seg_len;
2251      tree segment_length_b = dr_b.seg_len;
2252
2253      tree addr_base_a
2254	= fold_build_pointer_plus (DR_BASE_ADDRESS (dr_a.dr), dr_a.offset);
2255      tree addr_base_b
2256	= fold_build_pointer_plus (DR_BASE_ADDRESS (dr_b.dr), dr_b.offset);
2257
2258      if (dump_enabled_p ())
2259	{
2260	  dump_printf_loc (MSG_NOTE, vect_location,
2261			   "create runtime check for data references ");
2262	  dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a.dr));
2263	  dump_printf (MSG_NOTE, " and ");
2264	  dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b.dr));
2265	  dump_printf (MSG_NOTE, "\n");
2266	}
2267
2268      tree seg_a_min = addr_base_a;
2269      tree seg_a_max = fold_build_pointer_plus (addr_base_a, segment_length_a);
2270      /* For negative step, we need to adjust address range by TYPE_SIZE_UNIT
2271	 bytes, e.g., int a[3] -> a[1] range is [a+4, a+16) instead of
2272	 [a, a+12) */
2273      if (tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0)
2274	{
2275	  tree unit_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_a.dr)));
2276	  seg_a_min = fold_build_pointer_plus (seg_a_max, unit_size);
2277	  seg_a_max = fold_build_pointer_plus (addr_base_a, unit_size);
2278	}
2279
2280      tree seg_b_min = addr_base_b;
2281      tree seg_b_max = fold_build_pointer_plus (addr_base_b, segment_length_b);
2282      if (tree_int_cst_compare (DR_STEP (dr_b.dr), size_zero_node) < 0)
2283	{
2284	  tree unit_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_b.dr)));
2285	  seg_b_min = fold_build_pointer_plus (seg_b_max, unit_size);
2286	  seg_b_max = fold_build_pointer_plus (addr_base_b, unit_size);
2287	}
2288
2289      part_cond_expr =
2290      	fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
2291	  fold_build2 (LE_EXPR, boolean_type_node, seg_a_max, seg_b_min),
2292	  fold_build2 (LE_EXPR, boolean_type_node, seg_b_max, seg_a_min));
2293
2294      if (*cond_expr)
2295	*cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2296				  *cond_expr, part_cond_expr);
2297      else
2298	*cond_expr = part_cond_expr;
2299    }
2300
2301  if (dump_enabled_p ())
2302    dump_printf_loc (MSG_NOTE, vect_location,
2303		     "created %u versioning for alias checks.\n",
2304		     comp_alias_ddrs.length ());
2305
2306  comp_alias_ddrs.release ();
2307}
2308
2309
2310/* Function vect_loop_versioning.
2311
2312   If the loop has data references that may or may not be aligned or/and
2313   has data reference relations whose independence was not proven then
2314   two versions of the loop need to be generated, one which is vectorized
2315   and one which isn't.  A test is then generated to control which of the
2316   loops is executed.  The test checks for the alignment of all of the
2317   data references that may or may not be aligned.  An additional
2318   sequence of runtime tests is generated for each pairs of DDRs whose
2319   independence was not proven.  The vectorized version of loop is
2320   executed only if both alias and alignment tests are passed.
2321
2322   The test generated to check which version of loop is executed
2323   is modified to also check for profitability as indicated by the
2324   cost model initially.
2325
2326   The versioning precondition(s) are placed in *COND_EXPR and
2327   *COND_EXPR_STMT_LIST.  */
2328
2329void
2330vect_loop_versioning (loop_vec_info loop_vinfo,
2331		      unsigned int th, bool check_profitability)
2332{
2333  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2334  struct loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
2335  basic_block condition_bb;
2336  gphi_iterator gsi;
2337  gimple_stmt_iterator cond_exp_gsi;
2338  basic_block merge_bb;
2339  basic_block new_exit_bb;
2340  edge new_exit_e, e;
2341  gphi *orig_phi, *new_phi;
2342  tree cond_expr = NULL_TREE;
2343  gimple_seq cond_expr_stmt_list = NULL;
2344  tree arg;
2345  unsigned prob = 4 * REG_BR_PROB_BASE / 5;
2346  gimple_seq gimplify_stmt_list = NULL;
2347  tree scalar_loop_iters = LOOP_VINFO_NITERS (loop_vinfo);
2348  bool version_align = LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo);
2349  bool version_alias = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo);
2350
2351  if (check_profitability)
2352    {
2353      cond_expr = fold_build2 (GT_EXPR, boolean_type_node, scalar_loop_iters,
2354			       build_int_cst (TREE_TYPE (scalar_loop_iters), th));
2355      cond_expr = force_gimple_operand_1 (cond_expr, &cond_expr_stmt_list,
2356					  is_gimple_condexpr, NULL_TREE);
2357    }
2358
2359  if (version_align)
2360    vect_create_cond_for_align_checks (loop_vinfo, &cond_expr,
2361				       &cond_expr_stmt_list);
2362
2363  if (version_alias)
2364    vect_create_cond_for_alias_checks (loop_vinfo, &cond_expr);
2365
2366  cond_expr = force_gimple_operand_1 (cond_expr, &gimplify_stmt_list,
2367				      is_gimple_condexpr, NULL_TREE);
2368  gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list);
2369
2370  initialize_original_copy_tables ();
2371  if (scalar_loop)
2372    {
2373      edge scalar_e;
2374      basic_block preheader, scalar_preheader;
2375
2376      /* We don't want to scale SCALAR_LOOP's frequencies, we need to
2377	 scale LOOP's frequencies instead.  */
2378      loop_version (scalar_loop, cond_expr, &condition_bb,
2379		    prob, REG_BR_PROB_BASE, REG_BR_PROB_BASE - prob, true);
2380      scale_loop_frequencies (loop, prob, REG_BR_PROB_BASE);
2381      /* CONDITION_BB was created above SCALAR_LOOP's preheader,
2382	 while we need to move it above LOOP's preheader.  */
2383      e = loop_preheader_edge (loop);
2384      scalar_e = loop_preheader_edge (scalar_loop);
2385      gcc_assert (empty_block_p (e->src)
2386		  && single_pred_p (e->src));
2387      gcc_assert (empty_block_p (scalar_e->src)
2388		  && single_pred_p (scalar_e->src));
2389      gcc_assert (single_pred_p (condition_bb));
2390      preheader = e->src;
2391      scalar_preheader = scalar_e->src;
2392      scalar_e = find_edge (condition_bb, scalar_preheader);
2393      e = single_pred_edge (preheader);
2394      redirect_edge_and_branch_force (single_pred_edge (condition_bb),
2395				      scalar_preheader);
2396      redirect_edge_and_branch_force (scalar_e, preheader);
2397      redirect_edge_and_branch_force (e, condition_bb);
2398      set_immediate_dominator (CDI_DOMINATORS, condition_bb,
2399			       single_pred (condition_bb));
2400      set_immediate_dominator (CDI_DOMINATORS, scalar_preheader,
2401			       single_pred (scalar_preheader));
2402      set_immediate_dominator (CDI_DOMINATORS, preheader,
2403			       condition_bb);
2404    }
2405  else
2406    loop_version (loop, cond_expr, &condition_bb,
2407		  prob, prob, REG_BR_PROB_BASE - prob, true);
2408
2409  if (LOCATION_LOCUS (vect_location) != UNKNOWN_LOCATION
2410      && dump_enabled_p ())
2411    {
2412      if (version_alias)
2413        dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2414                         "loop versioned for vectorization because of "
2415			 "possible aliasing\n");
2416      if (version_align)
2417        dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2418                         "loop versioned for vectorization to enhance "
2419			 "alignment\n");
2420
2421    }
2422  free_original_copy_tables ();
2423
2424  /* Loop versioning violates an assumption we try to maintain during
2425     vectorization - that the loop exit block has a single predecessor.
2426     After versioning, the exit block of both loop versions is the same
2427     basic block (i.e. it has two predecessors). Just in order to simplify
2428     following transformations in the vectorizer, we fix this situation
2429     here by adding a new (empty) block on the exit-edge of the loop,
2430     with the proper loop-exit phis to maintain loop-closed-form.
2431     If loop versioning wasn't done from loop, but scalar_loop instead,
2432     merge_bb will have already just a single successor.  */
2433
2434  merge_bb = single_exit (loop)->dest;
2435  if (scalar_loop == NULL || EDGE_COUNT (merge_bb->preds) >= 2)
2436    {
2437      gcc_assert (EDGE_COUNT (merge_bb->preds) >= 2);
2438      new_exit_bb = split_edge (single_exit (loop));
2439      new_exit_e = single_exit (loop);
2440      e = EDGE_SUCC (new_exit_bb, 0);
2441
2442      for (gsi = gsi_start_phis (merge_bb); !gsi_end_p (gsi); gsi_next (&gsi))
2443	{
2444	  tree new_res;
2445	  orig_phi = gsi.phi ();
2446	  new_res = copy_ssa_name (PHI_RESULT (orig_phi));
2447	  new_phi = create_phi_node (new_res, new_exit_bb);
2448	  arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
2449	  add_phi_arg (new_phi, arg, new_exit_e,
2450		       gimple_phi_arg_location_from_edge (orig_phi, e));
2451	  adjust_phi_and_debug_stmts (orig_phi, e, PHI_RESULT (new_phi));
2452	}
2453    }
2454
2455  /* End loop-exit-fixes after versioning.  */
2456
2457  if (cond_expr_stmt_list)
2458    {
2459      cond_exp_gsi = gsi_last_bb (condition_bb);
2460      gsi_insert_seq_before (&cond_exp_gsi, cond_expr_stmt_list,
2461			     GSI_SAME_STMT);
2462    }
2463  update_ssa (TODO_update_ssa);
2464}
2465