1169689Skan/* Loop Vectorization
2169689Skan   Copyright (C) 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
3169689Skan   Contributed by Dorit Naishlos <dorit@il.ibm.com>
4169689Skan
5169689SkanThis file is part of GCC.
6169689Skan
7169689SkanGCC is free software; you can redistribute it and/or modify it under
8169689Skanthe terms of the GNU General Public License as published by the Free
9169689SkanSoftware Foundation; either version 2, or (at your option) any later
10169689Skanversion.
11169689Skan
12169689SkanGCC is distributed in the hope that it will be useful, but WITHOUT ANY
13169689SkanWARRANTY; without even the implied warranty of MERCHANTABILITY or
14169689SkanFITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15169689Skanfor more details.
16169689Skan
17169689SkanYou should have received a copy of the GNU General Public License
18169689Skanalong with GCC; see the file COPYING.  If not, write to the Free
19169689SkanSoftware Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
20169689Skan02110-1301, USA.  */
21169689Skan
22169689Skan/* Loop Vectorization Pass.
23169689Skan
24169689Skan   This pass tries to vectorize loops. This first implementation focuses on
25169689Skan   simple inner-most loops, with no conditional control flow, and a set of
26169689Skan   simple operations which vector form can be expressed using existing
27169689Skan   tree codes (PLUS, MULT etc).
28169689Skan
29169689Skan   For example, the vectorizer transforms the following simple loop:
30169689Skan
31169689Skan	short a[N]; short b[N]; short c[N]; int i;
32169689Skan
33169689Skan	for (i=0; i<N; i++){
34169689Skan	  a[i] = b[i] + c[i];
35169689Skan	}
36169689Skan
37169689Skan   as if it was manually vectorized by rewriting the source code into:
38169689Skan
39169689Skan	typedef int __attribute__((mode(V8HI))) v8hi;
40169689Skan	short a[N];  short b[N]; short c[N];   int i;
41169689Skan	v8hi *pa = (v8hi*)a, *pb = (v8hi*)b, *pc = (v8hi*)c;
42169689Skan	v8hi va, vb, vc;
43169689Skan
44169689Skan	for (i=0; i<N/8; i++){
45169689Skan	  vb = pb[i];
46169689Skan	  vc = pc[i];
47169689Skan	  va = vb + vc;
48169689Skan	  pa[i] = va;
49169689Skan	}
50169689Skan
51169689Skan	The main entry to this pass is vectorize_loops(), in which
52169689Skan   the vectorizer applies a set of analyses on a given set of loops,
53169689Skan   followed by the actual vectorization transformation for the loops that
54169689Skan   had successfully passed the analysis phase.
55169689Skan
56169689Skan	Throughout this pass we make a distinction between two types of
57169689Skan   data: scalars (which are represented by SSA_NAMES), and memory references
58169689Skan   ("data-refs"). These two types of data require different handling both
59169689Skan   during analysis and transformation. The types of data-refs that the
60169689Skan   vectorizer currently supports are ARRAY_REFS which base is an array DECL
61169689Skan   (not a pointer), and INDIRECT_REFS through pointers; both array and pointer
62169689Skan   accesses are required to have a  simple (consecutive) access pattern.
63169689Skan
64169689Skan   Analysis phase:
65169689Skan   ===============
66169689Skan	The driver for the analysis phase is vect_analyze_loop_nest().
67169689Skan   It applies a set of analyses, some of which rely on the scalar evolution
68169689Skan   analyzer (scev) developed by Sebastian Pop.
69169689Skan
70169689Skan	During the analysis phase the vectorizer records some information
71169689Skan   per stmt in a "stmt_vec_info" struct which is attached to each stmt in the
72169689Skan   loop, as well as general information about the loop as a whole, which is
73169689Skan   recorded in a "loop_vec_info" struct attached to each loop.
74169689Skan
75169689Skan   Transformation phase:
76169689Skan   =====================
77169689Skan	The loop transformation phase scans all the stmts in the loop, and
78169689Skan   creates a vector stmt (or a sequence of stmts) for each scalar stmt S in
79169689Skan   the loop that needs to be vectorized. It insert the vector code sequence
80169689Skan   just before the scalar stmt S, and records a pointer to the vector code
81169689Skan   in STMT_VINFO_VEC_STMT (stmt_info) (stmt_info is the stmt_vec_info struct
82169689Skan   attached to S). This pointer will be used for the vectorization of following
83169689Skan   stmts which use the def of stmt S. Stmt S is removed if it writes to memory;
84169689Skan   otherwise, we rely on dead code elimination for removing it.
85169689Skan
86169689Skan	For example, say stmt S1 was vectorized into stmt VS1:
87169689Skan
88169689Skan   VS1: vb = px[i];
89169689Skan   S1:	b = x[i];    STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
90169689Skan   S2:  a = b;
91169689Skan
92169689Skan   To vectorize stmt S2, the vectorizer first finds the stmt that defines
93169689Skan   the operand 'b' (S1), and gets the relevant vector def 'vb' from the
94169689Skan   vector stmt VS1 pointed to by STMT_VINFO_VEC_STMT (stmt_info (S1)). The
95169689Skan   resulting sequence would be:
96169689Skan
97169689Skan   VS1: vb = px[i];
98169689Skan   S1:	b = x[i];	STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
99169689Skan   VS2: va = vb;
100169689Skan   S2:  a = b;          STMT_VINFO_VEC_STMT (stmt_info (S2)) = VS2
101169689Skan
102169689Skan	Operands that are not SSA_NAMEs, are data-refs that appear in
103169689Skan   load/store operations (like 'x[i]' in S1), and are handled differently.
104169689Skan
105169689Skan   Target modeling:
106169689Skan   =================
107169689Skan	Currently the only target specific information that is used is the
108169689Skan   size of the vector (in bytes) - "UNITS_PER_SIMD_WORD". Targets that can
109169689Skan   support different sizes of vectors, for now will need to specify one value
110169689Skan   for "UNITS_PER_SIMD_WORD". More flexibility will be added in the future.
111169689Skan
112169689Skan	Since we only vectorize operations which vector form can be
113169689Skan   expressed using existing tree codes, to verify that an operation is
114169689Skan   supported, the vectorizer checks the relevant optab at the relevant
115169689Skan   machine_mode (e.g, add_optab->handlers[(int) V8HImode].insn_code). If
116169689Skan   the value found is CODE_FOR_nothing, then there's no target support, and
117169689Skan   we can't vectorize the stmt.
118169689Skan
119169689Skan   For additional information on this project see:
120169689Skan   http://gcc.gnu.org/projects/tree-ssa/vectorization.html
121169689Skan*/
122169689Skan
123169689Skan#include "config.h"
124169689Skan#include "system.h"
125169689Skan#include "coretypes.h"
126169689Skan#include "tm.h"
127169689Skan#include "ggc.h"
128169689Skan#include "tree.h"
129169689Skan#include "target.h"
130169689Skan#include "rtl.h"
131169689Skan#include "basic-block.h"
132169689Skan#include "diagnostic.h"
133169689Skan#include "tree-flow.h"
134169689Skan#include "tree-dump.h"
135169689Skan#include "timevar.h"
136169689Skan#include "cfgloop.h"
137169689Skan#include "cfglayout.h"
138169689Skan#include "expr.h"
139169689Skan#include "optabs.h"
140169689Skan#include "params.h"
141169689Skan#include "toplev.h"
142169689Skan#include "tree-chrec.h"
143169689Skan#include "tree-data-ref.h"
144169689Skan#include "tree-scalar-evolution.h"
145169689Skan#include "input.h"
146169689Skan#include "tree-vectorizer.h"
147169689Skan#include "tree-pass.h"
148169689Skan
149169689Skan/*************************************************************************
150169689Skan  Simple Loop Peeling Utilities
151169689Skan *************************************************************************/
152169689Skanstatic struct loop *slpeel_tree_duplicate_loop_to_edge_cfg
153169689Skan  (struct loop *, struct loops *, edge);
154169689Skanstatic void slpeel_update_phis_for_duplicate_loop
155169689Skan  (struct loop *, struct loop *, bool after);
156169689Skanstatic void slpeel_update_phi_nodes_for_guard1
157169689Skan  (edge, struct loop *, bool, basic_block *, bitmap *);
158169689Skanstatic void slpeel_update_phi_nodes_for_guard2
159169689Skan  (edge, struct loop *, bool, basic_block *);
160169689Skanstatic edge slpeel_add_loop_guard (basic_block, tree, basic_block, basic_block);
161169689Skan
162169689Skanstatic void rename_use_op (use_operand_p);
163169689Skanstatic void rename_variables_in_bb (basic_block);
164169689Skanstatic void rename_variables_in_loop (struct loop *);
165169689Skan
166169689Skan/*************************************************************************
167169689Skan  General Vectorization Utilities
168169689Skan *************************************************************************/
169169689Skanstatic void vect_set_dump_settings (void);
170169689Skan
171169689Skan/* vect_dump will be set to stderr or dump_file if exist.  */
172169689SkanFILE *vect_dump;
173169689Skan
174169689Skan/* vect_verbosity_level set to an invalid value
175169689Skan   to mark that it's uninitialized.  */
176169689Skanenum verbosity_levels vect_verbosity_level = MAX_VERBOSITY_LEVEL;
177169689Skan
178169689Skan/* Number of loops, at the beginning of vectorization.  */
179169689Skanunsigned int vect_loops_num;
180169689Skan
181169689Skan/* Loop location.  */
182169689Skanstatic LOC vect_loop_location;
183169689Skan
184169689Skan/* Bitmap of virtual variables to be renamed.  */
185169689Skanbitmap vect_vnames_to_rename;
186169689Skan
187169689Skan/*************************************************************************
188169689Skan  Simple Loop Peeling Utilities
189169689Skan
190169689Skan  Utilities to support loop peeling for vectorization purposes.
191169689Skan *************************************************************************/
192169689Skan
193169689Skan
194169689Skan/* Renames the use *OP_P.  */
195169689Skan
196169689Skanstatic void
197169689Skanrename_use_op (use_operand_p op_p)
198169689Skan{
199169689Skan  tree new_name;
200169689Skan
201169689Skan  if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
202169689Skan    return;
203169689Skan
204169689Skan  new_name = get_current_def (USE_FROM_PTR (op_p));
205169689Skan
206169689Skan  /* Something defined outside of the loop.  */
207169689Skan  if (!new_name)
208169689Skan    return;
209169689Skan
210169689Skan  /* An ordinary ssa name defined in the loop.  */
211169689Skan
212169689Skan  SET_USE (op_p, new_name);
213169689Skan}
214169689Skan
215169689Skan
216169689Skan/* Renames the variables in basic block BB.  */
217169689Skan
218169689Skanstatic void
219169689Skanrename_variables_in_bb (basic_block bb)
220169689Skan{
221169689Skan  tree phi;
222169689Skan  block_stmt_iterator bsi;
223169689Skan  tree stmt;
224169689Skan  use_operand_p use_p;
225169689Skan  ssa_op_iter iter;
226169689Skan  edge e;
227169689Skan  edge_iterator ei;
228169689Skan  struct loop *loop = bb->loop_father;
229169689Skan
230169689Skan  for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
231169689Skan    {
232169689Skan      stmt = bsi_stmt (bsi);
233169689Skan      FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter,
234169689Skan				 (SSA_OP_ALL_USES | SSA_OP_ALL_KILLS))
235169689Skan	rename_use_op (use_p);
236169689Skan    }
237169689Skan
238169689Skan  FOR_EACH_EDGE (e, ei, bb->succs)
239169689Skan    {
240169689Skan      if (!flow_bb_inside_loop_p (loop, e->dest))
241169689Skan	continue;
242169689Skan      for (phi = phi_nodes (e->dest); phi; phi = PHI_CHAIN (phi))
243169689Skan        rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e));
244169689Skan    }
245169689Skan}
246169689Skan
247169689Skan
248169689Skan/* Renames variables in new generated LOOP.  */
249169689Skan
250169689Skanstatic void
251169689Skanrename_variables_in_loop (struct loop *loop)
252169689Skan{
253169689Skan  unsigned i;
254169689Skan  basic_block *bbs;
255169689Skan
256169689Skan  bbs = get_loop_body (loop);
257169689Skan
258169689Skan  for (i = 0; i < loop->num_nodes; i++)
259169689Skan    rename_variables_in_bb (bbs[i]);
260169689Skan
261169689Skan  free (bbs);
262169689Skan}
263169689Skan
264169689Skan
265169689Skan/* Update the PHI nodes of NEW_LOOP.
266169689Skan
267169689Skan   NEW_LOOP is a duplicate of ORIG_LOOP.
268169689Skan   AFTER indicates whether NEW_LOOP executes before or after ORIG_LOOP:
269169689Skan   AFTER is true if NEW_LOOP executes after ORIG_LOOP, and false if it
270169689Skan   executes before it.  */
271169689Skan
272169689Skanstatic void
273169689Skanslpeel_update_phis_for_duplicate_loop (struct loop *orig_loop,
274169689Skan				       struct loop *new_loop, bool after)
275169689Skan{
276169689Skan  tree new_ssa_name;
277169689Skan  tree phi_new, phi_orig;
278169689Skan  tree def;
279169689Skan  edge orig_loop_latch = loop_latch_edge (orig_loop);
280169689Skan  edge orig_entry_e = loop_preheader_edge (orig_loop);
281169689Skan  edge new_loop_exit_e = new_loop->single_exit;
282169689Skan  edge new_loop_entry_e = loop_preheader_edge (new_loop);
283169689Skan  edge entry_arg_e = (after ? orig_loop_latch : orig_entry_e);
284169689Skan
285169689Skan  /*
286169689Skan     step 1. For each loop-header-phi:
287169689Skan             Add the first phi argument for the phi in NEW_LOOP
288169689Skan            (the one associated with the entry of NEW_LOOP)
289169689Skan
290169689Skan     step 2. For each loop-header-phi:
291169689Skan             Add the second phi argument for the phi in NEW_LOOP
292169689Skan            (the one associated with the latch of NEW_LOOP)
293169689Skan
294169689Skan     step 3. Update the phis in the successor block of NEW_LOOP.
295169689Skan
296169689Skan        case 1: NEW_LOOP was placed before ORIG_LOOP:
297169689Skan                The successor block of NEW_LOOP is the header of ORIG_LOOP.
298169689Skan                Updating the phis in the successor block can therefore be done
299169689Skan                along with the scanning of the loop header phis, because the
300169689Skan                header blocks of ORIG_LOOP and NEW_LOOP have exactly the same
301169689Skan                phi nodes, organized in the same order.
302169689Skan
303169689Skan        case 2: NEW_LOOP was placed after ORIG_LOOP:
304169689Skan                The successor block of NEW_LOOP is the original exit block of
305169689Skan                ORIG_LOOP - the phis to be updated are the loop-closed-ssa phis.
306169689Skan                We postpone updating these phis to a later stage (when
307169689Skan                loop guards are added).
308169689Skan   */
309169689Skan
310169689Skan
311169689Skan  /* Scan the phis in the headers of the old and new loops
312169689Skan     (they are organized in exactly the same order).  */
313169689Skan
314169689Skan  for (phi_new = phi_nodes (new_loop->header),
315169689Skan       phi_orig = phi_nodes (orig_loop->header);
316169689Skan       phi_new && phi_orig;
317169689Skan       phi_new = PHI_CHAIN (phi_new), phi_orig = PHI_CHAIN (phi_orig))
318169689Skan    {
319169689Skan      /* step 1.  */
320169689Skan      def = PHI_ARG_DEF_FROM_EDGE (phi_orig, entry_arg_e);
321169689Skan      add_phi_arg (phi_new, def, new_loop_entry_e);
322169689Skan
323169689Skan      /* step 2.  */
324169689Skan      def = PHI_ARG_DEF_FROM_EDGE (phi_orig, orig_loop_latch);
325169689Skan      if (TREE_CODE (def) != SSA_NAME)
326169689Skan        continue;
327169689Skan
328169689Skan      new_ssa_name = get_current_def (def);
329169689Skan      if (!new_ssa_name)
330169689Skan	{
331169689Skan	  /* This only happens if there are no definitions
332169689Skan	     inside the loop. use the phi_result in this case.  */
333169689Skan	  new_ssa_name = PHI_RESULT (phi_new);
334169689Skan	}
335169689Skan
336169689Skan      /* An ordinary ssa name defined in the loop.  */
337169689Skan      add_phi_arg (phi_new, new_ssa_name, loop_latch_edge (new_loop));
338169689Skan
339169689Skan      /* step 3 (case 1).  */
340169689Skan      if (!after)
341169689Skan        {
342169689Skan          gcc_assert (new_loop_exit_e == orig_entry_e);
343169689Skan          SET_PHI_ARG_DEF (phi_orig,
344169689Skan                           new_loop_exit_e->dest_idx,
345169689Skan                           new_ssa_name);
346169689Skan        }
347169689Skan    }
348169689Skan}
349169689Skan
350169689Skan
351169689Skan/* Update PHI nodes for a guard of the LOOP.
352169689Skan
353169689Skan   Input:
354169689Skan   - LOOP, GUARD_EDGE: LOOP is a loop for which we added guard code that
355169689Skan        controls whether LOOP is to be executed.  GUARD_EDGE is the edge that
356169689Skan        originates from the guard-bb, skips LOOP and reaches the (unique) exit
357169689Skan        bb of LOOP.  This loop-exit-bb is an empty bb with one successor.
358169689Skan        We denote this bb NEW_MERGE_BB because before the guard code was added
359169689Skan        it had a single predecessor (the LOOP header), and now it became a merge
360169689Skan        point of two paths - the path that ends with the LOOP exit-edge, and
361169689Skan        the path that ends with GUARD_EDGE.
362169689Skan   - NEW_EXIT_BB: New basic block that is added by this function between LOOP
363169689Skan        and NEW_MERGE_BB. It is used to place loop-closed-ssa-form exit-phis.
364169689Skan
365169689Skan   ===> The CFG before the guard-code was added:
366169689Skan        LOOP_header_bb:
367169689Skan          loop_body
368169689Skan          if (exit_loop) goto update_bb
369169689Skan          else           goto LOOP_header_bb
370169689Skan        update_bb:
371169689Skan
372169689Skan   ==> The CFG after the guard-code was added:
373169689Skan        guard_bb:
374169689Skan          if (LOOP_guard_condition) goto new_merge_bb
375169689Skan          else                      goto LOOP_header_bb
376169689Skan        LOOP_header_bb:
377169689Skan          loop_body
378169689Skan          if (exit_loop_condition) goto new_merge_bb
379169689Skan          else                     goto LOOP_header_bb
380169689Skan        new_merge_bb:
381169689Skan          goto update_bb
382169689Skan        update_bb:
383169689Skan
384169689Skan   ==> The CFG after this function:
385169689Skan        guard_bb:
386169689Skan          if (LOOP_guard_condition) goto new_merge_bb
387169689Skan          else                      goto LOOP_header_bb
388169689Skan        LOOP_header_bb:
389169689Skan          loop_body
390169689Skan          if (exit_loop_condition) goto new_exit_bb
391169689Skan          else                     goto LOOP_header_bb
392169689Skan        new_exit_bb:
393169689Skan        new_merge_bb:
394169689Skan          goto update_bb
395169689Skan        update_bb:
396169689Skan
397169689Skan   This function:
398169689Skan   1. creates and updates the relevant phi nodes to account for the new
399169689Skan      incoming edge (GUARD_EDGE) into NEW_MERGE_BB. This involves:
400169689Skan      1.1. Create phi nodes at NEW_MERGE_BB.
401169689Skan      1.2. Update the phi nodes at the successor of NEW_MERGE_BB (denoted
402169689Skan           UPDATE_BB).  UPDATE_BB was the exit-bb of LOOP before NEW_MERGE_BB
403169689Skan   2. preserves loop-closed-ssa-form by creating the required phi nodes
404169689Skan      at the exit of LOOP (i.e, in NEW_EXIT_BB).
405169689Skan
406169689Skan   There are two flavors to this function:
407169689Skan
408169689Skan   slpeel_update_phi_nodes_for_guard1:
409169689Skan     Here the guard controls whether we enter or skip LOOP, where LOOP is a
410169689Skan     prolog_loop (loop1 below), and the new phis created in NEW_MERGE_BB are
411169689Skan     for variables that have phis in the loop header.
412169689Skan
413169689Skan   slpeel_update_phi_nodes_for_guard2:
414169689Skan     Here the guard controls whether we enter or skip LOOP, where LOOP is an
415169689Skan     epilog_loop (loop2 below), and the new phis created in NEW_MERGE_BB are
416169689Skan     for variables that have phis in the loop exit.
417169689Skan
418169689Skan   I.E., the overall structure is:
419169689Skan
420169689Skan        loop1_preheader_bb:
421169689Skan                guard1 (goto loop1/merg1_bb)
422169689Skan        loop1
423169689Skan        loop1_exit_bb:
424169689Skan                guard2 (goto merge1_bb/merge2_bb)
425169689Skan        merge1_bb
426169689Skan        loop2
427169689Skan        loop2_exit_bb
428169689Skan        merge2_bb
429169689Skan        next_bb
430169689Skan
431169689Skan   slpeel_update_phi_nodes_for_guard1 takes care of creating phis in
432169689Skan   loop1_exit_bb and merge1_bb. These are entry phis (phis for the vars
433169689Skan   that have phis in loop1->header).
434169689Skan
435169689Skan   slpeel_update_phi_nodes_for_guard2 takes care of creating phis in
436169689Skan   loop2_exit_bb and merge2_bb. These are exit phis (phis for the vars
437169689Skan   that have phis in next_bb). It also adds some of these phis to
438169689Skan   loop1_exit_bb.
439169689Skan
440169689Skan   slpeel_update_phi_nodes_for_guard1 is always called before
441169689Skan   slpeel_update_phi_nodes_for_guard2. They are both needed in order
442169689Skan   to create correct data-flow and loop-closed-ssa-form.
443169689Skan
444169689Skan   Generally slpeel_update_phi_nodes_for_guard1 creates phis for variables
445169689Skan   that change between iterations of a loop (and therefore have a phi-node
446169689Skan   at the loop entry), whereas slpeel_update_phi_nodes_for_guard2 creates
447169689Skan   phis for variables that are used out of the loop (and therefore have
448169689Skan   loop-closed exit phis). Some variables may be both updated between
449169689Skan   iterations and used after the loop. This is why in loop1_exit_bb we
450169689Skan   may need both entry_phis (created by slpeel_update_phi_nodes_for_guard1)
451169689Skan   and exit phis (created by slpeel_update_phi_nodes_for_guard2).
452169689Skan
453169689Skan   - IS_NEW_LOOP: if IS_NEW_LOOP is true, then LOOP is a newly created copy of
454169689Skan     an original loop. i.e., we have:
455169689Skan
456169689Skan           orig_loop
457169689Skan           guard_bb (goto LOOP/new_merge)
458169689Skan           new_loop <-- LOOP
459169689Skan           new_exit
460169689Skan           new_merge
461169689Skan           next_bb
462169689Skan
463169689Skan     If IS_NEW_LOOP is false, then LOOP is an original loop, in which case we
464169689Skan     have:
465169689Skan
466169689Skan           new_loop
467169689Skan           guard_bb (goto LOOP/new_merge)
468169689Skan           orig_loop <-- LOOP
469169689Skan           new_exit
470169689Skan           new_merge
471169689Skan           next_bb
472169689Skan
473169689Skan     The SSA names defined in the original loop have a current
474169689Skan     reaching definition that that records the corresponding new
475169689Skan     ssa-name used in the new duplicated loop copy.
476169689Skan  */
477169689Skan
478169689Skan/* Function slpeel_update_phi_nodes_for_guard1
479169689Skan
480169689Skan   Input:
481169689Skan   - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above.
482169689Skan   - DEFS - a bitmap of ssa names to mark new names for which we recorded
483169689Skan            information.
484169689Skan
485169689Skan   In the context of the overall structure, we have:
486169689Skan
487169689Skan        loop1_preheader_bb:
488169689Skan                guard1 (goto loop1/merg1_bb)
489169689SkanLOOP->  loop1
490169689Skan        loop1_exit_bb:
491169689Skan                guard2 (goto merge1_bb/merge2_bb)
492169689Skan        merge1_bb
493169689Skan        loop2
494169689Skan        loop2_exit_bb
495169689Skan        merge2_bb
496169689Skan        next_bb
497169689Skan
498169689Skan   For each name updated between loop iterations (i.e - for each name that has
499169689Skan   an entry (loop-header) phi in LOOP) we create a new phi in:
500169689Skan   1. merge1_bb (to account for the edge from guard1)
501169689Skan   2. loop1_exit_bb (an exit-phi to keep LOOP in loop-closed form)
502169689Skan*/
503169689Skan
504169689Skanstatic void
505169689Skanslpeel_update_phi_nodes_for_guard1 (edge guard_edge, struct loop *loop,
506169689Skan                                    bool is_new_loop, basic_block *new_exit_bb,
507169689Skan                                    bitmap *defs)
508169689Skan{
509169689Skan  tree orig_phi, new_phi;
510169689Skan  tree update_phi, update_phi2;
511169689Skan  tree guard_arg, loop_arg;
512169689Skan  basic_block new_merge_bb = guard_edge->dest;
513169689Skan  edge e = EDGE_SUCC (new_merge_bb, 0);
514169689Skan  basic_block update_bb = e->dest;
515169689Skan  basic_block orig_bb = loop->header;
516169689Skan  edge new_exit_e;
517169689Skan  tree current_new_name;
518169689Skan  tree name;
519169689Skan
520169689Skan  /* Create new bb between loop and new_merge_bb.  */
521169689Skan  *new_exit_bb = split_edge (loop->single_exit);
522169689Skan  add_bb_to_loop (*new_exit_bb, loop->outer);
523169689Skan
524169689Skan  new_exit_e = EDGE_SUCC (*new_exit_bb, 0);
525169689Skan
526169689Skan  for (orig_phi = phi_nodes (orig_bb), update_phi = phi_nodes (update_bb);
527169689Skan       orig_phi && update_phi;
528169689Skan       orig_phi = PHI_CHAIN (orig_phi), update_phi = PHI_CHAIN (update_phi))
529169689Skan    {
530169689Skan      /* Virtual phi; Mark it for renaming. We actually want to call
531169689Skan	 mar_sym_for_renaming, but since all ssa renaming datastructures
532169689Skan	 are going to be freed before we get to call ssa_upate, we just
533169689Skan	 record this name for now in a bitmap, and will mark it for
534169689Skan	 renaming later.  */
535169689Skan      name = PHI_RESULT (orig_phi);
536169689Skan      if (!is_gimple_reg (SSA_NAME_VAR (name)))
537169689Skan        bitmap_set_bit (vect_vnames_to_rename, SSA_NAME_VERSION (name));
538169689Skan
539169689Skan      /** 1. Handle new-merge-point phis  **/
540169689Skan
541169689Skan      /* 1.1. Generate new phi node in NEW_MERGE_BB:  */
542169689Skan      new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
543169689Skan                                 new_merge_bb);
544169689Skan
545169689Skan      /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
546169689Skan            of LOOP. Set the two phi args in NEW_PHI for these edges:  */
547169689Skan      loop_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, EDGE_SUCC (loop->latch, 0));
548169689Skan      guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, loop_preheader_edge (loop));
549169689Skan
550169689Skan      add_phi_arg (new_phi, loop_arg, new_exit_e);
551169689Skan      add_phi_arg (new_phi, guard_arg, guard_edge);
552169689Skan
553169689Skan      /* 1.3. Update phi in successor block.  */
554169689Skan      gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == loop_arg
555169689Skan                  || PHI_ARG_DEF_FROM_EDGE (update_phi, e) == guard_arg);
556169689Skan      SET_PHI_ARG_DEF (update_phi, e->dest_idx, PHI_RESULT (new_phi));
557169689Skan      update_phi2 = new_phi;
558169689Skan
559169689Skan
560169689Skan      /** 2. Handle loop-closed-ssa-form phis  **/
561169689Skan
562169689Skan      /* 2.1. Generate new phi node in NEW_EXIT_BB:  */
563169689Skan      new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
564169689Skan                                 *new_exit_bb);
565169689Skan
566169689Skan      /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop.  */
567169689Skan      add_phi_arg (new_phi, loop_arg, loop->single_exit);
568169689Skan
569169689Skan      /* 2.3. Update phi in successor of NEW_EXIT_BB:  */
570169689Skan      gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg);
571169689Skan      SET_PHI_ARG_DEF (update_phi2, new_exit_e->dest_idx, PHI_RESULT (new_phi));
572169689Skan
573169689Skan      /* 2.4. Record the newly created name with set_current_def.
574169689Skan         We want to find a name such that
575169689Skan                name = get_current_def (orig_loop_name)
576169689Skan         and to set its current definition as follows:
577169689Skan                set_current_def (name, new_phi_name)
578169689Skan
579169689Skan         If LOOP is a new loop then loop_arg is already the name we're
580169689Skan         looking for. If LOOP is the original loop, then loop_arg is
581169689Skan         the orig_loop_name and the relevant name is recorded in its
582169689Skan         current reaching definition.  */
583169689Skan      if (is_new_loop)
584169689Skan        current_new_name = loop_arg;
585169689Skan      else
586169689Skan        {
587169689Skan          current_new_name = get_current_def (loop_arg);
588169689Skan	  /* current_def is not available only if the variable does not
589169689Skan	     change inside the loop, in which case we also don't care
590169689Skan	     about recording a current_def for it because we won't be
591169689Skan	     trying to create loop-exit-phis for it.  */
592169689Skan	  if (!current_new_name)
593169689Skan	    continue;
594169689Skan        }
595169689Skan      gcc_assert (get_current_def (current_new_name) == NULL_TREE);
596169689Skan
597169689Skan      set_current_def (current_new_name, PHI_RESULT (new_phi));
598169689Skan      bitmap_set_bit (*defs, SSA_NAME_VERSION (current_new_name));
599169689Skan    }
600169689Skan
601169689Skan  set_phi_nodes (new_merge_bb, phi_reverse (phi_nodes (new_merge_bb)));
602169689Skan}
603169689Skan
604169689Skan
605169689Skan/* Function slpeel_update_phi_nodes_for_guard2
606169689Skan
607169689Skan   Input:
608169689Skan   - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above.
609169689Skan
610169689Skan   In the context of the overall structure, we have:
611169689Skan
612169689Skan        loop1_preheader_bb:
613169689Skan                guard1 (goto loop1/merg1_bb)
614169689Skan        loop1
615169689Skan        loop1_exit_bb:
616169689Skan                guard2 (goto merge1_bb/merge2_bb)
617169689Skan        merge1_bb
618169689SkanLOOP->  loop2
619169689Skan        loop2_exit_bb
620169689Skan        merge2_bb
621169689Skan        next_bb
622169689Skan
623169689Skan   For each name used out side the loop (i.e - for each name that has an exit
624169689Skan   phi in next_bb) we create a new phi in:
625169689Skan   1. merge2_bb (to account for the edge from guard_bb)
626169689Skan   2. loop2_exit_bb (an exit-phi to keep LOOP in loop-closed form)
627169689Skan   3. guard2 bb (an exit phi to keep the preceding loop in loop-closed form),
628169689Skan      if needed (if it wasn't handled by slpeel_update_phis_nodes_for_phi1).
629169689Skan*/
630169689Skan
631169689Skanstatic void
632169689Skanslpeel_update_phi_nodes_for_guard2 (edge guard_edge, struct loop *loop,
633169689Skan                                    bool is_new_loop, basic_block *new_exit_bb)
634169689Skan{
635169689Skan  tree orig_phi, new_phi;
636169689Skan  tree update_phi, update_phi2;
637169689Skan  tree guard_arg, loop_arg;
638169689Skan  basic_block new_merge_bb = guard_edge->dest;
639169689Skan  edge e = EDGE_SUCC (new_merge_bb, 0);
640169689Skan  basic_block update_bb = e->dest;
641169689Skan  edge new_exit_e;
642169689Skan  tree orig_def, orig_def_new_name;
643169689Skan  tree new_name, new_name2;
644169689Skan  tree arg;
645169689Skan
646169689Skan  /* Create new bb between loop and new_merge_bb.  */
647169689Skan  *new_exit_bb = split_edge (loop->single_exit);
648169689Skan  add_bb_to_loop (*new_exit_bb, loop->outer);
649169689Skan
650169689Skan  new_exit_e = EDGE_SUCC (*new_exit_bb, 0);
651169689Skan
652169689Skan  for (update_phi = phi_nodes (update_bb); update_phi;
653169689Skan       update_phi = PHI_CHAIN (update_phi))
654169689Skan    {
655169689Skan      orig_phi = update_phi;
656169689Skan      orig_def = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
657169689Skan      /* This loop-closed-phi actually doesn't represent a use
658169689Skan         out of the loop - the phi arg is a constant.  */
659169689Skan      if (TREE_CODE (orig_def) != SSA_NAME)
660169689Skan        continue;
661169689Skan      orig_def_new_name = get_current_def (orig_def);
662169689Skan      arg = NULL_TREE;
663169689Skan
664169689Skan      /** 1. Handle new-merge-point phis  **/
665169689Skan
666169689Skan      /* 1.1. Generate new phi node in NEW_MERGE_BB:  */
667169689Skan      new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
668169689Skan                                 new_merge_bb);
669169689Skan
670169689Skan      /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
671169689Skan            of LOOP. Set the two PHI args in NEW_PHI for these edges:  */
672169689Skan      new_name = orig_def;
673169689Skan      new_name2 = NULL_TREE;
674169689Skan      if (orig_def_new_name)
675169689Skan        {
676169689Skan          new_name = orig_def_new_name;
677169689Skan	  /* Some variables have both loop-entry-phis and loop-exit-phis.
678169689Skan	     Such variables were given yet newer names by phis placed in
679169689Skan	     guard_bb by slpeel_update_phi_nodes_for_guard1. I.e:
680169689Skan	     new_name2 = get_current_def (get_current_def (orig_name)).  */
681169689Skan          new_name2 = get_current_def (new_name);
682169689Skan        }
683169689Skan
684169689Skan      if (is_new_loop)
685169689Skan        {
686169689Skan          guard_arg = orig_def;
687169689Skan          loop_arg = new_name;
688169689Skan        }
689169689Skan      else
690169689Skan        {
691169689Skan          guard_arg = new_name;
692169689Skan          loop_arg = orig_def;
693169689Skan        }
694169689Skan      if (new_name2)
695169689Skan        guard_arg = new_name2;
696169689Skan
697169689Skan      add_phi_arg (new_phi, loop_arg, new_exit_e);
698169689Skan      add_phi_arg (new_phi, guard_arg, guard_edge);
699169689Skan
700169689Skan      /* 1.3. Update phi in successor block.  */
701169689Skan      gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == orig_def);
702169689Skan      SET_PHI_ARG_DEF (update_phi, e->dest_idx, PHI_RESULT (new_phi));
703169689Skan      update_phi2 = new_phi;
704169689Skan
705169689Skan
706169689Skan      /** 2. Handle loop-closed-ssa-form phis  **/
707169689Skan
708169689Skan      /* 2.1. Generate new phi node in NEW_EXIT_BB:  */
709169689Skan      new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
710169689Skan                                 *new_exit_bb);
711169689Skan
712169689Skan      /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop.  */
713169689Skan      add_phi_arg (new_phi, loop_arg, loop->single_exit);
714169689Skan
715169689Skan      /* 2.3. Update phi in successor of NEW_EXIT_BB:  */
716169689Skan      gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg);
717169689Skan      SET_PHI_ARG_DEF (update_phi2, new_exit_e->dest_idx, PHI_RESULT (new_phi));
718169689Skan
719169689Skan
720169689Skan      /** 3. Handle loop-closed-ssa-form phis for first loop  **/
721169689Skan
722169689Skan      /* 3.1. Find the relevant names that need an exit-phi in
723169689Skan	 GUARD_BB, i.e. names for which
724169689Skan	 slpeel_update_phi_nodes_for_guard1 had not already created a
725169689Skan	 phi node. This is the case for names that are used outside
726169689Skan	 the loop (and therefore need an exit phi) but are not updated
727169689Skan	 across loop iterations (and therefore don't have a
728169689Skan	 loop-header-phi).
729169689Skan
730169689Skan	 slpeel_update_phi_nodes_for_guard1 is responsible for
731169689Skan	 creating loop-exit phis in GUARD_BB for names that have a
732169689Skan	 loop-header-phi.  When such a phi is created we also record
733169689Skan	 the new name in its current definition.  If this new name
734169689Skan	 exists, then guard_arg was set to this new name (see 1.2
735169689Skan	 above).  Therefore, if guard_arg is not this new name, this
736169689Skan	 is an indication that an exit-phi in GUARD_BB was not yet
737169689Skan	 created, so we take care of it here.  */
738169689Skan      if (guard_arg == new_name2)
739169689Skan	continue;
740169689Skan      arg = guard_arg;
741169689Skan
742169689Skan      /* 3.2. Generate new phi node in GUARD_BB:  */
743169689Skan      new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
744169689Skan                                 guard_edge->src);
745169689Skan
746169689Skan      /* 3.3. GUARD_BB has one incoming edge:  */
747169689Skan      gcc_assert (EDGE_COUNT (guard_edge->src->preds) == 1);
748169689Skan      add_phi_arg (new_phi, arg, EDGE_PRED (guard_edge->src, 0));
749169689Skan
750169689Skan      /* 3.4. Update phi in successor of GUARD_BB:  */
751169689Skan      gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, guard_edge)
752169689Skan                                                                == guard_arg);
753169689Skan      SET_PHI_ARG_DEF (update_phi2, guard_edge->dest_idx, PHI_RESULT (new_phi));
754169689Skan    }
755169689Skan
756169689Skan  set_phi_nodes (new_merge_bb, phi_reverse (phi_nodes (new_merge_bb)));
757169689Skan}
758169689Skan
759169689Skan
760169689Skan/* Make the LOOP iterate NITERS times. This is done by adding a new IV
761169689Skan   that starts at zero, increases by one and its limit is NITERS.
762169689Skan
763169689Skan   Assumption: the exit-condition of LOOP is the last stmt in the loop.  */
764169689Skan
765169689Skanvoid
766169689Skanslpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters)
767169689Skan{
768169689Skan  tree indx_before_incr, indx_after_incr, cond_stmt, cond;
769169689Skan  tree orig_cond;
770169689Skan  edge exit_edge = loop->single_exit;
771169689Skan  block_stmt_iterator loop_cond_bsi;
772169689Skan  block_stmt_iterator incr_bsi;
773169689Skan  bool insert_after;
774169689Skan  tree begin_label = tree_block_label (loop->latch);
775169689Skan  tree exit_label = tree_block_label (loop->single_exit->dest);
776169689Skan  tree init = build_int_cst (TREE_TYPE (niters), 0);
777169689Skan  tree step = build_int_cst (TREE_TYPE (niters), 1);
778169689Skan  tree then_label;
779169689Skan  tree else_label;
780169689Skan  LOC loop_loc;
781169689Skan
782169689Skan  orig_cond = get_loop_exit_condition (loop);
783169689Skan  gcc_assert (orig_cond);
784169689Skan  loop_cond_bsi = bsi_for_stmt (orig_cond);
785169689Skan
786169689Skan  standard_iv_increment_position (loop, &incr_bsi, &insert_after);
787169689Skan  create_iv (init, step, NULL_TREE, loop,
788169689Skan             &incr_bsi, insert_after, &indx_before_incr, &indx_after_incr);
789169689Skan
790169689Skan  if (exit_edge->flags & EDGE_TRUE_VALUE) /* 'then' edge exits the loop.  */
791169689Skan    {
792169689Skan      cond = build2 (GE_EXPR, boolean_type_node, indx_after_incr, niters);
793169689Skan      then_label = build1 (GOTO_EXPR, void_type_node, exit_label);
794169689Skan      else_label = build1 (GOTO_EXPR, void_type_node, begin_label);
795169689Skan    }
796169689Skan  else /* 'then' edge loops back.  */
797169689Skan    {
798169689Skan      cond = build2 (LT_EXPR, boolean_type_node, indx_after_incr, niters);
799169689Skan      then_label = build1 (GOTO_EXPR, void_type_node, begin_label);
800169689Skan      else_label = build1 (GOTO_EXPR, void_type_node, exit_label);
801169689Skan    }
802169689Skan
803169689Skan  cond_stmt = build3 (COND_EXPR, TREE_TYPE (orig_cond), cond,
804169689Skan		     then_label, else_label);
805169689Skan  bsi_insert_before (&loop_cond_bsi, cond_stmt, BSI_SAME_STMT);
806169689Skan
807169689Skan  /* Remove old loop exit test:  */
808169689Skan  bsi_remove (&loop_cond_bsi, true);
809169689Skan
810169689Skan  loop_loc = find_loop_location (loop);
811169689Skan  if (dump_file && (dump_flags & TDF_DETAILS))
812169689Skan    {
813169689Skan      if (loop_loc != UNKNOWN_LOC)
814169689Skan        fprintf (dump_file, "\nloop at %s:%d: ",
815169689Skan                 LOC_FILE (loop_loc), LOC_LINE (loop_loc));
816169689Skan      print_generic_expr (dump_file, cond_stmt, TDF_SLIM);
817169689Skan    }
818169689Skan
819169689Skan  loop->nb_iterations = niters;
820169689Skan}
821169689Skan
822169689Skan
823169689Skan/* Given LOOP this function generates a new copy of it and puts it
824169689Skan   on E which is either the entry or exit of LOOP.  */
825169689Skan
826169689Skanstatic struct loop *
827169689Skanslpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, struct loops *loops,
828169689Skan					edge e)
829169689Skan{
830169689Skan  struct loop *new_loop;
831169689Skan  basic_block *new_bbs, *bbs;
832169689Skan  bool at_exit;
833169689Skan  bool was_imm_dom;
834169689Skan  basic_block exit_dest;
835169689Skan  tree phi, phi_arg;
836169689Skan
837169689Skan  at_exit = (e == loop->single_exit);
838169689Skan  if (!at_exit && e != loop_preheader_edge (loop))
839169689Skan    return NULL;
840169689Skan
841169689Skan  bbs = get_loop_body (loop);
842169689Skan
843169689Skan  /* Check whether duplication is possible.  */
844169689Skan  if (!can_copy_bbs_p (bbs, loop->num_nodes))
845169689Skan    {
846169689Skan      free (bbs);
847169689Skan      return NULL;
848169689Skan    }
849169689Skan
850169689Skan  /* Generate new loop structure.  */
851169689Skan  new_loop = duplicate_loop (loops, loop, loop->outer);
852169689Skan  if (!new_loop)
853169689Skan    {
854169689Skan      free (bbs);
855169689Skan      return NULL;
856169689Skan    }
857169689Skan
858169689Skan  exit_dest = loop->single_exit->dest;
859169689Skan  was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
860169689Skan					  exit_dest) == loop->header ?
861169689Skan		 true : false);
862169689Skan
863169689Skan  new_bbs = XNEWVEC (basic_block, loop->num_nodes);
864169689Skan
865169689Skan  copy_bbs (bbs, loop->num_nodes, new_bbs,
866169689Skan	    &loop->single_exit, 1, &new_loop->single_exit, NULL,
867169689Skan	    e->src);
868169689Skan
869169689Skan  /* Duplicating phi args at exit bbs as coming
870169689Skan     also from exit of duplicated loop.  */
871169689Skan  for (phi = phi_nodes (exit_dest); phi; phi = PHI_CHAIN (phi))
872169689Skan    {
873169689Skan      phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, loop->single_exit);
874169689Skan      if (phi_arg)
875169689Skan	{
876169689Skan	  edge new_loop_exit_edge;
877169689Skan
878169689Skan	  if (EDGE_SUCC (new_loop->header, 0)->dest == new_loop->latch)
879169689Skan	    new_loop_exit_edge = EDGE_SUCC (new_loop->header, 1);
880169689Skan	  else
881169689Skan	    new_loop_exit_edge = EDGE_SUCC (new_loop->header, 0);
882169689Skan
883169689Skan	  add_phi_arg (phi, phi_arg, new_loop_exit_edge);
884169689Skan	}
885169689Skan    }
886169689Skan
887169689Skan  if (at_exit) /* Add the loop copy at exit.  */
888169689Skan    {
889169689Skan      redirect_edge_and_branch_force (e, new_loop->header);
890169689Skan      set_immediate_dominator (CDI_DOMINATORS, new_loop->header, e->src);
891169689Skan      if (was_imm_dom)
892169689Skan	set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_loop->header);
893169689Skan    }
894169689Skan  else /* Add the copy at entry.  */
895169689Skan    {
896169689Skan      edge new_exit_e;
897169689Skan      edge entry_e = loop_preheader_edge (loop);
898169689Skan      basic_block preheader = entry_e->src;
899169689Skan
900169689Skan      if (!flow_bb_inside_loop_p (new_loop,
901169689Skan				  EDGE_SUCC (new_loop->header, 0)->dest))
902169689Skan        new_exit_e = EDGE_SUCC (new_loop->header, 0);
903169689Skan      else
904169689Skan	new_exit_e = EDGE_SUCC (new_loop->header, 1);
905169689Skan
906169689Skan      redirect_edge_and_branch_force (new_exit_e, loop->header);
907169689Skan      set_immediate_dominator (CDI_DOMINATORS, loop->header,
908169689Skan			       new_exit_e->src);
909169689Skan
910169689Skan      /* We have to add phi args to the loop->header here as coming
911169689Skan	 from new_exit_e edge.  */
912169689Skan      for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
913169689Skan	{
914169689Skan	  phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, entry_e);
915169689Skan	  if (phi_arg)
916169689Skan	    add_phi_arg (phi, phi_arg, new_exit_e);
917169689Skan	}
918169689Skan
919169689Skan      redirect_edge_and_branch_force (entry_e, new_loop->header);
920169689Skan      set_immediate_dominator (CDI_DOMINATORS, new_loop->header, preheader);
921169689Skan    }
922169689Skan
923169689Skan  free (new_bbs);
924169689Skan  free (bbs);
925169689Skan
926169689Skan  return new_loop;
927169689Skan}
928169689Skan
929169689Skan
930169689Skan/* Given the condition statement COND, put it as the last statement
931169689Skan   of GUARD_BB; EXIT_BB is the basic block to skip the loop;
932169689Skan   Assumes that this is the single exit of the guarded loop.
933169689Skan   Returns the skip edge.  */
934169689Skan
935169689Skanstatic edge
936169689Skanslpeel_add_loop_guard (basic_block guard_bb, tree cond, basic_block exit_bb,
937169689Skan		        basic_block dom_bb)
938169689Skan{
939169689Skan  block_stmt_iterator bsi;
940169689Skan  edge new_e, enter_e;
941169689Skan  tree cond_stmt, then_label, else_label;
942169689Skan
943169689Skan  enter_e = EDGE_SUCC (guard_bb, 0);
944169689Skan  enter_e->flags &= ~EDGE_FALLTHRU;
945169689Skan  enter_e->flags |= EDGE_FALSE_VALUE;
946169689Skan  bsi = bsi_last (guard_bb);
947169689Skan
948169689Skan  then_label = build1 (GOTO_EXPR, void_type_node,
949169689Skan                       tree_block_label (exit_bb));
950169689Skan  else_label = build1 (GOTO_EXPR, void_type_node,
951169689Skan                       tree_block_label (enter_e->dest));
952169689Skan  cond_stmt = build3 (COND_EXPR, void_type_node, cond,
953169689Skan   		     then_label, else_label);
954169689Skan  bsi_insert_after (&bsi, cond_stmt, BSI_NEW_STMT);
955169689Skan  /* Add new edge to connect guard block to the merge/loop-exit block.  */
956169689Skan  new_e = make_edge (guard_bb, exit_bb, EDGE_TRUE_VALUE);
957169689Skan  set_immediate_dominator (CDI_DOMINATORS, exit_bb, dom_bb);
958169689Skan  return new_e;
959169689Skan}
960169689Skan
961169689Skan
962169689Skan/* This function verifies that the following restrictions apply to LOOP:
963169689Skan   (1) it is innermost
964169689Skan   (2) it consists of exactly 2 basic blocks - header, and an empty latch.
965169689Skan   (3) it is single entry, single exit
966169689Skan   (4) its exit condition is the last stmt in the header
967169689Skan   (5) E is the entry/exit edge of LOOP.
968169689Skan */
969169689Skan
970169689Skanbool
971169689Skanslpeel_can_duplicate_loop_p (struct loop *loop, edge e)
972169689Skan{
973169689Skan  edge exit_e = loop->single_exit;
974169689Skan  edge entry_e = loop_preheader_edge (loop);
975169689Skan  tree orig_cond = get_loop_exit_condition (loop);
976169689Skan  block_stmt_iterator loop_exit_bsi = bsi_last (exit_e->src);
977169689Skan
978169689Skan  if (need_ssa_update_p ())
979169689Skan    return false;
980169689Skan
981169689Skan  if (loop->inner
982169689Skan      /* All loops have an outer scope; the only case loop->outer is NULL is for
983169689Skan         the function itself.  */
984169689Skan      || !loop->outer
985169689Skan      || loop->num_nodes != 2
986169689Skan      || !empty_block_p (loop->latch)
987169689Skan      || !loop->single_exit
988169689Skan      /* Verify that new loop exit condition can be trivially modified.  */
989169689Skan      || (!orig_cond || orig_cond != bsi_stmt (loop_exit_bsi))
990169689Skan      || (e != exit_e && e != entry_e))
991169689Skan    return false;
992169689Skan
993169689Skan  return true;
994169689Skan}
995169689Skan
996169689Skan#ifdef ENABLE_CHECKING
997169689Skanvoid
998169689Skanslpeel_verify_cfg_after_peeling (struct loop *first_loop,
999169689Skan                                 struct loop *second_loop)
1000169689Skan{
1001169689Skan  basic_block loop1_exit_bb = first_loop->single_exit->dest;
1002169689Skan  basic_block loop2_entry_bb = loop_preheader_edge (second_loop)->src;
1003169689Skan  basic_block loop1_entry_bb = loop_preheader_edge (first_loop)->src;
1004169689Skan
1005169689Skan  /* A guard that controls whether the second_loop is to be executed or skipped
1006169689Skan     is placed in first_loop->exit.  first_loopt->exit therefore has two
1007169689Skan     successors - one is the preheader of second_loop, and the other is a bb
1008169689Skan     after second_loop.
1009169689Skan   */
1010169689Skan  gcc_assert (EDGE_COUNT (loop1_exit_bb->succs) == 2);
1011169689Skan
1012169689Skan  /* 1. Verify that one of the successors of first_loopt->exit is the preheader
1013169689Skan        of second_loop.  */
1014169689Skan
1015169689Skan  /* The preheader of new_loop is expected to have two predecessors:
1016169689Skan     first_loop->exit and the block that precedes first_loop.  */
1017169689Skan
1018169689Skan  gcc_assert (EDGE_COUNT (loop2_entry_bb->preds) == 2
1019169689Skan              && ((EDGE_PRED (loop2_entry_bb, 0)->src == loop1_exit_bb
1020169689Skan                   && EDGE_PRED (loop2_entry_bb, 1)->src == loop1_entry_bb)
1021169689Skan               || (EDGE_PRED (loop2_entry_bb, 1)->src ==  loop1_exit_bb
1022169689Skan                   && EDGE_PRED (loop2_entry_bb, 0)->src == loop1_entry_bb)));
1023169689Skan
1024169689Skan  /* Verify that the other successor of first_loopt->exit is after the
1025169689Skan     second_loop.  */
1026169689Skan  /* TODO */
1027169689Skan}
1028169689Skan#endif
1029169689Skan
1030169689Skan/* Function slpeel_tree_peel_loop_to_edge.
1031169689Skan
1032169689Skan   Peel the first (last) iterations of LOOP into a new prolog (epilog) loop
1033169689Skan   that is placed on the entry (exit) edge E of LOOP. After this transformation
1034169689Skan   we have two loops one after the other - first-loop iterates FIRST_NITERS
1035169689Skan   times, and second-loop iterates the remainder NITERS - FIRST_NITERS times.
1036169689Skan
1037169689Skan   Input:
1038169689Skan   - LOOP: the loop to be peeled.
1039169689Skan   - E: the exit or entry edge of LOOP.
1040169689Skan        If it is the entry edge, we peel the first iterations of LOOP. In this
1041169689Skan        case first-loop is LOOP, and second-loop is the newly created loop.
1042169689Skan        If it is the exit edge, we peel the last iterations of LOOP. In this
1043169689Skan        case, first-loop is the newly created loop, and second-loop is LOOP.
1044169689Skan   - NITERS: the number of iterations that LOOP iterates.
1045169689Skan   - FIRST_NITERS: the number of iterations that the first-loop should iterate.
1046169689Skan   - UPDATE_FIRST_LOOP_COUNT:  specified whether this function is responsible
1047169689Skan        for updating the loop bound of the first-loop to FIRST_NITERS.  If it
1048169689Skan        is false, the caller of this function may want to take care of this
1049169689Skan        (this can be useful if we don't want new stmts added to first-loop).
1050169689Skan
1051169689Skan   Output:
1052169689Skan   The function returns a pointer to the new loop-copy, or NULL if it failed
1053169689Skan   to perform the transformation.
1054169689Skan
1055169689Skan   The function generates two if-then-else guards: one before the first loop,
1056169689Skan   and the other before the second loop:
1057169689Skan   The first guard is:
1058169689Skan     if (FIRST_NITERS == 0) then skip the first loop,
1059169689Skan     and go directly to the second loop.
1060169689Skan   The second guard is:
1061169689Skan     if (FIRST_NITERS == NITERS) then skip the second loop.
1062169689Skan
1063169689Skan   FORNOW only simple loops are supported (see slpeel_can_duplicate_loop_p).
1064169689Skan   FORNOW the resulting code will not be in loop-closed-ssa form.
1065169689Skan*/
1066169689Skan
1067169689Skanstruct loop*
1068169689Skanslpeel_tree_peel_loop_to_edge (struct loop *loop, struct loops *loops,
1069169689Skan			       edge e, tree first_niters,
1070169689Skan			       tree niters, bool update_first_loop_count)
1071169689Skan{
1072169689Skan  struct loop *new_loop = NULL, *first_loop, *second_loop;
1073169689Skan  edge skip_e;
1074169689Skan  tree pre_condition;
1075169689Skan  bitmap definitions;
1076169689Skan  basic_block bb_before_second_loop, bb_after_second_loop;
1077169689Skan  basic_block bb_before_first_loop;
1078169689Skan  basic_block bb_between_loops;
1079169689Skan  basic_block new_exit_bb;
1080169689Skan  edge exit_e = loop->single_exit;
1081169689Skan  LOC loop_loc;
1082169689Skan
1083169689Skan  if (!slpeel_can_duplicate_loop_p (loop, e))
1084169689Skan    return NULL;
1085169689Skan
1086169689Skan  /* We have to initialize cfg_hooks. Then, when calling
1087169689Skan   cfg_hooks->split_edge, the function tree_split_edge
1088169689Skan   is actually called and, when calling cfg_hooks->duplicate_block,
1089169689Skan   the function tree_duplicate_bb is called.  */
1090169689Skan  tree_register_cfg_hooks ();
1091169689Skan
1092169689Skan
1093169689Skan  /* 1. Generate a copy of LOOP and put it on E (E is the entry/exit of LOOP).
1094169689Skan        Resulting CFG would be:
1095169689Skan
1096169689Skan        first_loop:
1097169689Skan        do {
1098169689Skan        } while ...
1099169689Skan
1100169689Skan        second_loop:
1101169689Skan        do {
1102169689Skan        } while ...
1103169689Skan
1104169689Skan        orig_exit_bb:
1105169689Skan   */
1106169689Skan
1107169689Skan  if (!(new_loop = slpeel_tree_duplicate_loop_to_edge_cfg (loop, loops, e)))
1108169689Skan    {
1109169689Skan      loop_loc = find_loop_location (loop);
1110169689Skan      if (dump_file && (dump_flags & TDF_DETAILS))
1111169689Skan        {
1112169689Skan          if (loop_loc != UNKNOWN_LOC)
1113169689Skan            fprintf (dump_file, "\n%s:%d: note: ",
1114169689Skan                     LOC_FILE (loop_loc), LOC_LINE (loop_loc));
1115169689Skan          fprintf (dump_file, "tree_duplicate_loop_to_edge_cfg failed.\n");
1116169689Skan        }
1117169689Skan      return NULL;
1118169689Skan    }
1119169689Skan
1120169689Skan  if (e == exit_e)
1121169689Skan    {
1122169689Skan      /* NEW_LOOP was placed after LOOP.  */
1123169689Skan      first_loop = loop;
1124169689Skan      second_loop = new_loop;
1125169689Skan    }
1126169689Skan  else
1127169689Skan    {
1128169689Skan      /* NEW_LOOP was placed before LOOP.  */
1129169689Skan      first_loop = new_loop;
1130169689Skan      second_loop = loop;
1131169689Skan    }
1132169689Skan
1133169689Skan  definitions = ssa_names_to_replace ();
1134169689Skan  slpeel_update_phis_for_duplicate_loop (loop, new_loop, e == exit_e);
1135169689Skan  rename_variables_in_loop (new_loop);
1136169689Skan
1137169689Skan
1138169689Skan  /* 2. Add the guard that controls whether the first loop is executed.
1139169689Skan        Resulting CFG would be:
1140169689Skan
1141169689Skan        bb_before_first_loop:
1142169689Skan        if (FIRST_NITERS == 0) GOTO bb_before_second_loop
1143169689Skan                               GOTO first-loop
1144169689Skan
1145169689Skan        first_loop:
1146169689Skan        do {
1147169689Skan        } while ...
1148169689Skan
1149169689Skan        bb_before_second_loop:
1150169689Skan
1151169689Skan        second_loop:
1152169689Skan        do {
1153169689Skan        } while ...
1154169689Skan
1155169689Skan        orig_exit_bb:
1156169689Skan   */
1157169689Skan
1158169689Skan  bb_before_first_loop = split_edge (loop_preheader_edge (first_loop));
1159169689Skan  add_bb_to_loop (bb_before_first_loop, first_loop->outer);
1160169689Skan  bb_before_second_loop = split_edge (first_loop->single_exit);
1161169689Skan  add_bb_to_loop (bb_before_second_loop, first_loop->outer);
1162169689Skan
1163169689Skan  pre_condition =
1164169689Skan    fold_build2 (LE_EXPR, boolean_type_node, first_niters,
1165169689Skan                 build_int_cst (TREE_TYPE (first_niters), 0));
1166169689Skan  skip_e = slpeel_add_loop_guard (bb_before_first_loop, pre_condition,
1167169689Skan                                  bb_before_second_loop, bb_before_first_loop);
1168169689Skan  slpeel_update_phi_nodes_for_guard1 (skip_e, first_loop,
1169169689Skan				      first_loop == new_loop,
1170169689Skan				      &new_exit_bb, &definitions);
1171169689Skan
1172169689Skan
1173169689Skan  /* 3. Add the guard that controls whether the second loop is executed.
1174169689Skan        Resulting CFG would be:
1175169689Skan
1176169689Skan        bb_before_first_loop:
1177169689Skan        if (FIRST_NITERS == 0) GOTO bb_before_second_loop (skip first loop)
1178169689Skan                               GOTO first-loop
1179169689Skan
1180169689Skan        first_loop:
1181169689Skan        do {
1182169689Skan        } while ...
1183169689Skan
1184169689Skan        bb_between_loops:
1185169689Skan        if (FIRST_NITERS == NITERS) GOTO bb_after_second_loop (skip second loop)
1186169689Skan                                    GOTO bb_before_second_loop
1187169689Skan
1188169689Skan        bb_before_second_loop:
1189169689Skan
1190169689Skan        second_loop:
1191169689Skan        do {
1192169689Skan        } while ...
1193169689Skan
1194169689Skan        bb_after_second_loop:
1195169689Skan
1196169689Skan        orig_exit_bb:
1197169689Skan   */
1198169689Skan
1199169689Skan  bb_between_loops = new_exit_bb;
1200169689Skan  bb_after_second_loop = split_edge (second_loop->single_exit);
1201169689Skan  add_bb_to_loop (bb_after_second_loop, second_loop->outer);
1202169689Skan
1203169689Skan  pre_condition =
1204169689Skan	fold_build2 (EQ_EXPR, boolean_type_node, first_niters, niters);
1205169689Skan  skip_e = slpeel_add_loop_guard (bb_between_loops, pre_condition,
1206169689Skan                                  bb_after_second_loop, bb_before_first_loop);
1207169689Skan  slpeel_update_phi_nodes_for_guard2 (skip_e, second_loop,
1208169689Skan                                     second_loop == new_loop, &new_exit_bb);
1209169689Skan
1210169689Skan  /* 4. Make first-loop iterate FIRST_NITERS times, if requested.
1211169689Skan   */
1212169689Skan  if (update_first_loop_count)
1213169689Skan    slpeel_make_loop_iterate_ntimes (first_loop, first_niters);
1214169689Skan
1215169689Skan  BITMAP_FREE (definitions);
1216169689Skan  delete_update_ssa ();
1217169689Skan
1218169689Skan  return new_loop;
1219169689Skan}
1220169689Skan
1221169689Skan/* Function vect_get_loop_location.
1222169689Skan
1223169689Skan   Extract the location of the loop in the source code.
1224169689Skan   If the loop is not well formed for vectorization, an estimated
1225169689Skan   location is calculated.
1226169689Skan   Return the loop location if succeed and NULL if not.  */
1227169689Skan
1228169689SkanLOC
1229169689Skanfind_loop_location (struct loop *loop)
1230169689Skan{
1231169689Skan  tree node = NULL_TREE;
1232169689Skan  basic_block bb;
1233169689Skan  block_stmt_iterator si;
1234169689Skan
1235169689Skan  if (!loop)
1236169689Skan    return UNKNOWN_LOC;
1237169689Skan
1238169689Skan  node = get_loop_exit_condition (loop);
1239169689Skan
1240169689Skan  if (node && EXPR_P (node) && EXPR_HAS_LOCATION (node)
1241169689Skan      && EXPR_FILENAME (node) && EXPR_LINENO (node))
1242169689Skan    return EXPR_LOC (node);
1243169689Skan
1244169689Skan  /* If we got here the loop is probably not "well formed",
1245169689Skan     try to estimate the loop location */
1246169689Skan
1247169689Skan  if (!loop->header)
1248169689Skan    return UNKNOWN_LOC;
1249169689Skan
1250169689Skan  bb = loop->header;
1251169689Skan
1252169689Skan  for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1253169689Skan    {
1254169689Skan      node = bsi_stmt (si);
1255169689Skan      if (node && EXPR_P (node) && EXPR_HAS_LOCATION (node))
1256169689Skan        return EXPR_LOC (node);
1257169689Skan    }
1258169689Skan
1259169689Skan  return UNKNOWN_LOC;
1260169689Skan}
1261169689Skan
1262169689Skan
1263169689Skan/*************************************************************************
1264169689Skan  Vectorization Debug Information.
1265169689Skan *************************************************************************/
1266169689Skan
1267169689Skan/* Function vect_set_verbosity_level.
1268169689Skan
1269169689Skan   Called from toplev.c upon detection of the
1270169689Skan   -ftree-vectorizer-verbose=N option.  */
1271169689Skan
1272169689Skanvoid
1273169689Skanvect_set_verbosity_level (const char *val)
1274169689Skan{
1275169689Skan   unsigned int vl;
1276169689Skan
1277169689Skan   vl = atoi (val);
1278169689Skan   if (vl < MAX_VERBOSITY_LEVEL)
1279169689Skan     vect_verbosity_level = vl;
1280169689Skan   else
1281169689Skan     vect_verbosity_level = MAX_VERBOSITY_LEVEL - 1;
1282169689Skan}
1283169689Skan
1284169689Skan
1285169689Skan/* Function vect_set_dump_settings.
1286169689Skan
1287169689Skan   Fix the verbosity level of the vectorizer if the
1288169689Skan   requested level was not set explicitly using the flag
1289169689Skan   -ftree-vectorizer-verbose=N.
1290169689Skan   Decide where to print the debugging information (dump_file/stderr).
1291169689Skan   If the user defined the verbosity level, but there is no dump file,
1292169689Skan   print to stderr, otherwise print to the dump file.  */
1293169689Skan
1294169689Skanstatic void
1295169689Skanvect_set_dump_settings (void)
1296169689Skan{
1297169689Skan  vect_dump = dump_file;
1298169689Skan
1299169689Skan  /* Check if the verbosity level was defined by the user:  */
1300169689Skan  if (vect_verbosity_level != MAX_VERBOSITY_LEVEL)
1301169689Skan    {
1302169689Skan      /* If there is no dump file, print to stderr.  */
1303169689Skan      if (!dump_file)
1304169689Skan        vect_dump = stderr;
1305169689Skan      return;
1306169689Skan    }
1307169689Skan
1308169689Skan  /* User didn't specify verbosity level:  */
1309169689Skan  if (dump_file && (dump_flags & TDF_DETAILS))
1310169689Skan    vect_verbosity_level = REPORT_DETAILS;
1311169689Skan  else if (dump_file && (dump_flags & TDF_STATS))
1312169689Skan    vect_verbosity_level = REPORT_UNVECTORIZED_LOOPS;
1313169689Skan  else
1314169689Skan    vect_verbosity_level = REPORT_NONE;
1315169689Skan
1316169689Skan  gcc_assert (dump_file || vect_verbosity_level == REPORT_NONE);
1317169689Skan}
1318169689Skan
1319169689Skan
1320169689Skan/* Function debug_loop_details.
1321169689Skan
1322169689Skan   For vectorization debug dumps.  */
1323169689Skan
1324169689Skanbool
1325169689Skanvect_print_dump_info (enum verbosity_levels vl)
1326169689Skan{
1327169689Skan  if (vl > vect_verbosity_level)
1328169689Skan    return false;
1329169689Skan
1330169689Skan  if (!current_function_decl || !vect_dump)
1331169689Skan    return false;
1332169689Skan
1333169689Skan  if (vect_loop_location == UNKNOWN_LOC)
1334169689Skan    fprintf (vect_dump, "\n%s:%d: note: ",
1335169689Skan		 DECL_SOURCE_FILE (current_function_decl),
1336169689Skan		 DECL_SOURCE_LINE (current_function_decl));
1337169689Skan  else
1338169689Skan    fprintf (vect_dump, "\n%s:%d: note: ",
1339169689Skan	     LOC_FILE (vect_loop_location), LOC_LINE (vect_loop_location));
1340169689Skan
1341169689Skan  return true;
1342169689Skan}
1343169689Skan
1344169689Skan
1345169689Skan/*************************************************************************
1346169689Skan  Vectorization Utilities.
1347169689Skan *************************************************************************/
1348169689Skan
1349169689Skan/* Function new_stmt_vec_info.
1350169689Skan
1351169689Skan   Create and initialize a new stmt_vec_info struct for STMT.  */
1352169689Skan
1353169689Skanstmt_vec_info
1354169689Skannew_stmt_vec_info (tree stmt, loop_vec_info loop_vinfo)
1355169689Skan{
1356169689Skan  stmt_vec_info res;
1357169689Skan  res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
1358169689Skan
1359169689Skan  STMT_VINFO_TYPE (res) = undef_vec_info_type;
1360169689Skan  STMT_VINFO_STMT (res) = stmt;
1361169689Skan  STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
1362169689Skan  STMT_VINFO_RELEVANT_P (res) = 0;
1363169689Skan  STMT_VINFO_LIVE_P (res) = 0;
1364169689Skan  STMT_VINFO_VECTYPE (res) = NULL;
1365169689Skan  STMT_VINFO_VEC_STMT (res) = NULL;
1366169689Skan  STMT_VINFO_IN_PATTERN_P (res) = false;
1367169689Skan  STMT_VINFO_RELATED_STMT (res) = NULL;
1368169689Skan  STMT_VINFO_DATA_REF (res) = NULL;
1369169689Skan  if (TREE_CODE (stmt) == PHI_NODE)
1370169689Skan    STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
1371169689Skan  else
1372169689Skan    STMT_VINFO_DEF_TYPE (res) = vect_loop_def;
1373169689Skan  STMT_VINFO_SAME_ALIGN_REFS (res) = VEC_alloc (dr_p, heap, 5);
1374169689Skan
1375169689Skan  return res;
1376169689Skan}
1377169689Skan
1378169689Skan
1379169689Skan/* Function new_loop_vec_info.
1380169689Skan
1381169689Skan   Create and initialize a new loop_vec_info struct for LOOP, as well as
1382169689Skan   stmt_vec_info structs for all the stmts in LOOP.  */
1383169689Skan
1384169689Skanloop_vec_info
1385169689Skannew_loop_vec_info (struct loop *loop)
1386169689Skan{
1387169689Skan  loop_vec_info res;
1388169689Skan  basic_block *bbs;
1389169689Skan  block_stmt_iterator si;
1390169689Skan  unsigned int i;
1391169689Skan
1392169689Skan  res = (loop_vec_info) xcalloc (1, sizeof (struct _loop_vec_info));
1393169689Skan
1394169689Skan  bbs = get_loop_body (loop);
1395169689Skan
1396169689Skan  /* Create stmt_info for all stmts in the loop.  */
1397169689Skan  for (i = 0; i < loop->num_nodes; i++)
1398169689Skan    {
1399169689Skan      basic_block bb = bbs[i];
1400169689Skan      tree phi;
1401169689Skan
1402169689Skan      for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1403169689Skan        {
1404169689Skan          stmt_ann_t ann = get_stmt_ann (phi);
1405169689Skan          set_stmt_info (ann, new_stmt_vec_info (phi, res));
1406169689Skan        }
1407169689Skan
1408169689Skan      for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1409169689Skan	{
1410169689Skan	  tree stmt = bsi_stmt (si);
1411169689Skan	  stmt_ann_t ann;
1412169689Skan
1413169689Skan	  ann = stmt_ann (stmt);
1414169689Skan	  set_stmt_info (ann, new_stmt_vec_info (stmt, res));
1415169689Skan	}
1416169689Skan    }
1417169689Skan
1418169689Skan  LOOP_VINFO_LOOP (res) = loop;
1419169689Skan  LOOP_VINFO_BBS (res) = bbs;
1420169689Skan  LOOP_VINFO_EXIT_COND (res) = NULL;
1421169689Skan  LOOP_VINFO_NITERS (res) = NULL;
1422169689Skan  LOOP_VINFO_VECTORIZABLE_P (res) = 0;
1423169689Skan  LOOP_PEELING_FOR_ALIGNMENT (res) = 0;
1424169689Skan  LOOP_VINFO_VECT_FACTOR (res) = 0;
1425169689Skan  LOOP_VINFO_DATAREFS (res) = VEC_alloc (data_reference_p, heap, 10);
1426169689Skan  LOOP_VINFO_DDRS (res) = VEC_alloc (ddr_p, heap, 10 * 10);
1427169689Skan  LOOP_VINFO_UNALIGNED_DR (res) = NULL;
1428169689Skan  LOOP_VINFO_MAY_MISALIGN_STMTS (res)
1429169689Skan    = VEC_alloc (tree, heap, PARAM_VALUE (PARAM_VECT_MAX_VERSION_CHECKS));
1430169689Skan
1431169689Skan  return res;
1432169689Skan}
1433169689Skan
1434169689Skan
1435169689Skan/* Function destroy_loop_vec_info.
1436169689Skan
1437169689Skan   Free LOOP_VINFO struct, as well as all the stmt_vec_info structs of all the
1438169689Skan   stmts in the loop.  */
1439169689Skan
1440169689Skanvoid
1441169689Skandestroy_loop_vec_info (loop_vec_info loop_vinfo)
1442169689Skan{
1443169689Skan  struct loop *loop;
1444169689Skan  basic_block *bbs;
1445169689Skan  int nbbs;
1446169689Skan  block_stmt_iterator si;
1447169689Skan  int j;
1448169689Skan
1449169689Skan  if (!loop_vinfo)
1450169689Skan    return;
1451169689Skan
1452169689Skan  loop = LOOP_VINFO_LOOP (loop_vinfo);
1453169689Skan
1454169689Skan  bbs = LOOP_VINFO_BBS (loop_vinfo);
1455169689Skan  nbbs = loop->num_nodes;
1456169689Skan
1457169689Skan  for (j = 0; j < nbbs; j++)
1458169689Skan    {
1459169689Skan      basic_block bb = bbs[j];
1460169689Skan      tree phi;
1461169689Skan      stmt_vec_info stmt_info;
1462169689Skan
1463169689Skan      for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1464169689Skan        {
1465169689Skan          stmt_ann_t ann = stmt_ann (phi);
1466169689Skan
1467169689Skan          stmt_info = vinfo_for_stmt (phi);
1468169689Skan          free (stmt_info);
1469169689Skan          set_stmt_info (ann, NULL);
1470169689Skan        }
1471169689Skan
1472169689Skan      for (si = bsi_start (bb); !bsi_end_p (si); )
1473169689Skan	{
1474169689Skan	  tree stmt = bsi_stmt (si);
1475169689Skan	  stmt_ann_t ann = stmt_ann (stmt);
1476169689Skan	  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1477169689Skan
1478169689Skan	  if (stmt_info)
1479169689Skan	    {
1480169689Skan	      /* Check if this is a "pattern stmt" (introduced by the
1481169689Skan		 vectorizer during the pattern recognition pass).  */
1482169689Skan	      bool remove_stmt_p = false;
1483169689Skan	      tree orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
1484169689Skan	      if (orig_stmt)
1485169689Skan		{
1486169689Skan		  stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt);
1487169689Skan		  if (orig_stmt_info
1488169689Skan		      && STMT_VINFO_IN_PATTERN_P (orig_stmt_info))
1489169689Skan		    remove_stmt_p = true;
1490169689Skan		}
1491169689Skan
1492169689Skan	      /* Free stmt_vec_info.  */
1493169689Skan	      VEC_free (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmt_info));
1494169689Skan	      free (stmt_info);
1495169689Skan	      set_stmt_info (ann, NULL);
1496169689Skan
1497169689Skan	      /* Remove dead "pattern stmts".  */
1498169689Skan	      if (remove_stmt_p)
1499169689Skan	        bsi_remove (&si, true);
1500169689Skan	    }
1501169689Skan	  bsi_next (&si);
1502169689Skan	}
1503169689Skan    }
1504169689Skan
1505169689Skan  free (LOOP_VINFO_BBS (loop_vinfo));
1506169689Skan  free_data_refs (LOOP_VINFO_DATAREFS (loop_vinfo));
1507169689Skan  free_dependence_relations (LOOP_VINFO_DDRS (loop_vinfo));
1508169689Skan  VEC_free (tree, heap, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo));
1509169689Skan
1510169689Skan  free (loop_vinfo);
1511169689Skan}
1512169689Skan
1513169689Skan
1514169689Skan/* Function vect_force_dr_alignment_p.
1515169689Skan
1516169689Skan   Returns whether the alignment of a DECL can be forced to be aligned
1517169689Skan   on ALIGNMENT bit boundary.  */
1518169689Skan
1519169689Skanbool
1520169689Skanvect_can_force_dr_alignment_p (tree decl, unsigned int alignment)
1521169689Skan{
1522169689Skan  if (TREE_CODE (decl) != VAR_DECL)
1523169689Skan    return false;
1524169689Skan
1525169689Skan  if (DECL_EXTERNAL (decl))
1526169689Skan    return false;
1527169689Skan
1528169689Skan  if (TREE_ASM_WRITTEN (decl))
1529169689Skan    return false;
1530169689Skan
1531169689Skan  if (TREE_STATIC (decl))
1532169689Skan    return (alignment <= MAX_OFILE_ALIGNMENT);
1533169689Skan  else
1534169689Skan    /* This is not 100% correct.  The absolute correct stack alignment
1535169689Skan       is STACK_BOUNDARY.  We're supposed to hope, but not assume, that
1536169689Skan       PREFERRED_STACK_BOUNDARY is honored by all translation units.
1537169689Skan       However, until someone implements forced stack alignment, SSE
1538169689Skan       isn't really usable without this.  */
1539169689Skan    return (alignment <= PREFERRED_STACK_BOUNDARY);
1540169689Skan}
1541169689Skan
1542169689Skan
1543169689Skan/* Function get_vectype_for_scalar_type.
1544169689Skan
1545169689Skan   Returns the vector type corresponding to SCALAR_TYPE as supported
1546169689Skan   by the target.  */
1547169689Skan
1548169689Skantree
1549169689Skanget_vectype_for_scalar_type (tree scalar_type)
1550169689Skan{
1551169689Skan  enum machine_mode inner_mode = TYPE_MODE (scalar_type);
1552169689Skan  int nbytes = GET_MODE_SIZE (inner_mode);
1553169689Skan  int nunits;
1554169689Skan  tree vectype;
1555169689Skan
1556169689Skan  if (nbytes == 0 || nbytes >= UNITS_PER_SIMD_WORD)
1557169689Skan    return NULL_TREE;
1558169689Skan
1559169689Skan  /* FORNOW: Only a single vector size per target (UNITS_PER_SIMD_WORD)
1560169689Skan     is expected.  */
1561169689Skan  nunits = UNITS_PER_SIMD_WORD / nbytes;
1562169689Skan
1563169689Skan  vectype = build_vector_type (scalar_type, nunits);
1564169689Skan  if (vect_print_dump_info (REPORT_DETAILS))
1565169689Skan    {
1566169689Skan      fprintf (vect_dump, "get vectype with %d units of type ", nunits);
1567169689Skan      print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
1568169689Skan    }
1569169689Skan
1570169689Skan  if (!vectype)
1571169689Skan    return NULL_TREE;
1572169689Skan
1573169689Skan  if (vect_print_dump_info (REPORT_DETAILS))
1574169689Skan    {
1575169689Skan      fprintf (vect_dump, "vectype: ");
1576169689Skan      print_generic_expr (vect_dump, vectype, TDF_SLIM);
1577169689Skan    }
1578169689Skan
1579169689Skan  if (!VECTOR_MODE_P (TYPE_MODE (vectype))
1580169689Skan      && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
1581169689Skan    {
1582169689Skan      if (vect_print_dump_info (REPORT_DETAILS))
1583169689Skan        fprintf (vect_dump, "mode not supported by target.");
1584169689Skan      return NULL_TREE;
1585169689Skan    }
1586169689Skan
1587169689Skan  return vectype;
1588169689Skan}
1589169689Skan
1590169689Skan
1591169689Skan/* Function vect_supportable_dr_alignment
1592169689Skan
1593169689Skan   Return whether the data reference DR is supported with respect to its
1594169689Skan   alignment.  */
1595169689Skan
1596169689Skanenum dr_alignment_support
1597169689Skanvect_supportable_dr_alignment (struct data_reference *dr)
1598169689Skan{
1599169689Skan  tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr)));
1600169689Skan  enum machine_mode mode = (int) TYPE_MODE (vectype);
1601169689Skan
1602169689Skan  if (aligned_access_p (dr))
1603169689Skan    return dr_aligned;
1604169689Skan
1605169689Skan  /* Possibly unaligned access.  */
1606169689Skan
1607169689Skan  if (DR_IS_READ (dr))
1608169689Skan    {
1609169689Skan      if (vec_realign_load_optab->handlers[mode].insn_code != CODE_FOR_nothing
1610169689Skan	  && (!targetm.vectorize.builtin_mask_for_load
1611169689Skan	      || targetm.vectorize.builtin_mask_for_load ()))
1612169689Skan	return dr_unaligned_software_pipeline;
1613169689Skan
1614169689Skan      if (movmisalign_optab->handlers[mode].insn_code != CODE_FOR_nothing)
1615169689Skan	/* Can't software pipeline the loads, but can at least do them.  */
1616169689Skan	return dr_unaligned_supported;
1617169689Skan    }
1618169689Skan
1619169689Skan  /* Unsupported.  */
1620169689Skan  return dr_unaligned_unsupported;
1621169689Skan}
1622169689Skan
1623169689Skan
1624169689Skan/* Function vect_is_simple_use.
1625169689Skan
1626169689Skan   Input:
1627169689Skan   LOOP - the loop that is being vectorized.
1628169689Skan   OPERAND - operand of a stmt in LOOP.
1629169689Skan   DEF - the defining stmt in case OPERAND is an SSA_NAME.
1630169689Skan
1631169689Skan   Returns whether a stmt with OPERAND can be vectorized.
1632169689Skan   Supportable operands are constants, loop invariants, and operands that are
1633169689Skan   defined by the current iteration of the loop. Unsupportable operands are
1634169689Skan   those that are defined by a previous iteration of the loop (as is the case
1635169689Skan   in reduction/induction computations).  */
1636169689Skan
1637169689Skanbool
1638169689Skanvect_is_simple_use (tree operand, loop_vec_info loop_vinfo, tree *def_stmt,
1639169689Skan		    tree *def, enum vect_def_type *dt)
1640169689Skan{
1641169689Skan  basic_block bb;
1642169689Skan  stmt_vec_info stmt_vinfo;
1643169689Skan  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1644169689Skan
1645169689Skan  *def_stmt = NULL_TREE;
1646169689Skan  *def = NULL_TREE;
1647169689Skan
1648169689Skan  if (vect_print_dump_info (REPORT_DETAILS))
1649169689Skan    {
1650169689Skan      fprintf (vect_dump, "vect_is_simple_use: operand ");
1651169689Skan      print_generic_expr (vect_dump, operand, TDF_SLIM);
1652169689Skan    }
1653169689Skan
1654169689Skan  if (TREE_CODE (operand) == INTEGER_CST || TREE_CODE (operand) == REAL_CST)
1655169689Skan    {
1656169689Skan      *dt = vect_constant_def;
1657169689Skan      return true;
1658169689Skan    }
1659169689Skan
1660169689Skan  if (TREE_CODE (operand) != SSA_NAME)
1661169689Skan    {
1662169689Skan      if (vect_print_dump_info (REPORT_DETAILS))
1663169689Skan        fprintf (vect_dump, "not ssa-name.");
1664169689Skan      return false;
1665169689Skan    }
1666169689Skan
1667169689Skan  *def_stmt = SSA_NAME_DEF_STMT (operand);
1668169689Skan  if (*def_stmt == NULL_TREE )
1669169689Skan    {
1670169689Skan      if (vect_print_dump_info (REPORT_DETAILS))
1671169689Skan        fprintf (vect_dump, "no def_stmt.");
1672169689Skan      return false;
1673169689Skan    }
1674169689Skan
1675169689Skan  if (vect_print_dump_info (REPORT_DETAILS))
1676169689Skan    {
1677169689Skan      fprintf (vect_dump, "def_stmt: ");
1678169689Skan      print_generic_expr (vect_dump, *def_stmt, TDF_SLIM);
1679169689Skan    }
1680169689Skan
1681169689Skan  /* empty stmt is expected only in case of a function argument.
1682169689Skan     (Otherwise - we expect a phi_node or a modify_expr).  */
1683169689Skan  if (IS_EMPTY_STMT (*def_stmt))
1684169689Skan    {
1685169689Skan      tree arg = TREE_OPERAND (*def_stmt, 0);
1686169689Skan      if (TREE_CODE (arg) == INTEGER_CST || TREE_CODE (arg) == REAL_CST)
1687169689Skan        {
1688169689Skan          *def = operand;
1689169689Skan          *dt = vect_invariant_def;
1690169689Skan          return true;
1691169689Skan        }
1692169689Skan
1693169689Skan      if (vect_print_dump_info (REPORT_DETAILS))
1694169689Skan        fprintf (vect_dump, "Unexpected empty stmt.");
1695169689Skan      return false;
1696169689Skan    }
1697169689Skan
1698169689Skan  bb = bb_for_stmt (*def_stmt);
1699169689Skan  if (!flow_bb_inside_loop_p (loop, bb))
1700169689Skan    *dt = vect_invariant_def;
1701169689Skan  else
1702169689Skan    {
1703169689Skan      stmt_vinfo = vinfo_for_stmt (*def_stmt);
1704169689Skan      *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
1705169689Skan    }
1706169689Skan
1707169689Skan  if (*dt == vect_unknown_def_type)
1708169689Skan    {
1709169689Skan      if (vect_print_dump_info (REPORT_DETAILS))
1710169689Skan        fprintf (vect_dump, "Unsupported pattern.");
1711169689Skan      return false;
1712169689Skan    }
1713169689Skan
1714169689Skan  /* stmts inside the loop that have been identified as performing
1715169689Skan     a reduction operation cannot have uses in the loop.  */
1716169689Skan  if (*dt == vect_reduction_def && TREE_CODE (*def_stmt) != PHI_NODE)
1717169689Skan    {
1718169689Skan      if (vect_print_dump_info (REPORT_DETAILS))
1719169689Skan        fprintf (vect_dump, "reduction used in loop.");
1720169689Skan      return false;
1721169689Skan    }
1722169689Skan
1723169689Skan  if (vect_print_dump_info (REPORT_DETAILS))
1724169689Skan    fprintf (vect_dump, "type of def: %d.",*dt);
1725169689Skan
1726169689Skan  switch (TREE_CODE (*def_stmt))
1727169689Skan    {
1728169689Skan    case PHI_NODE:
1729169689Skan      *def = PHI_RESULT (*def_stmt);
1730169689Skan      gcc_assert (*dt == vect_induction_def || *dt == vect_reduction_def
1731169689Skan                  || *dt == vect_invariant_def);
1732169689Skan      break;
1733169689Skan
1734169689Skan    case MODIFY_EXPR:
1735169689Skan      *def = TREE_OPERAND (*def_stmt, 0);
1736169689Skan      gcc_assert (*dt == vect_loop_def || *dt == vect_invariant_def);
1737169689Skan      break;
1738169689Skan
1739169689Skan    default:
1740169689Skan      if (vect_print_dump_info (REPORT_DETAILS))
1741169689Skan        fprintf (vect_dump, "unsupported defining stmt: ");
1742169689Skan      return false;
1743169689Skan    }
1744169689Skan
1745169689Skan  if (*dt == vect_induction_def)
1746169689Skan    {
1747169689Skan      if (vect_print_dump_info (REPORT_DETAILS))
1748169689Skan        fprintf (vect_dump, "induction not supported.");
1749169689Skan      return false;
1750169689Skan    }
1751169689Skan
1752169689Skan  return true;
1753169689Skan}
1754169689Skan
1755169689Skan
1756169689Skan/* Function reduction_code_for_scalar_code
1757169689Skan
1758169689Skan   Input:
1759169689Skan   CODE - tree_code of a reduction operations.
1760169689Skan
1761169689Skan   Output:
1762169689Skan   REDUC_CODE - the corresponding tree-code to be used to reduce the
1763169689Skan      vector of partial results into a single scalar result (which
1764169689Skan      will also reside in a vector).
1765169689Skan
1766169689Skan   Return TRUE if a corresponding REDUC_CODE was found, FALSE otherwise.  */
1767169689Skan
1768169689Skanbool
1769169689Skanreduction_code_for_scalar_code (enum tree_code code,
1770169689Skan                                enum tree_code *reduc_code)
1771169689Skan{
1772169689Skan  switch (code)
1773169689Skan  {
1774169689Skan  case MAX_EXPR:
1775169689Skan    *reduc_code = REDUC_MAX_EXPR;
1776169689Skan    return true;
1777169689Skan
1778169689Skan  case MIN_EXPR:
1779169689Skan    *reduc_code = REDUC_MIN_EXPR;
1780169689Skan    return true;
1781169689Skan
1782169689Skan  case PLUS_EXPR:
1783169689Skan    *reduc_code = REDUC_PLUS_EXPR;
1784169689Skan    return true;
1785169689Skan
1786169689Skan  default:
1787169689Skan    return false;
1788169689Skan  }
1789169689Skan}
1790169689Skan
1791169689Skan
1792169689Skan/* Function vect_is_simple_reduction
1793169689Skan
1794169689Skan   Detect a cross-iteration def-use cucle that represents a simple
1795169689Skan   reduction computation. We look for the following pattern:
1796169689Skan
1797169689Skan   loop_header:
1798169689Skan     a1 = phi < a0, a2 >
1799169689Skan     a3 = ...
1800169689Skan     a2 = operation (a3, a1)
1801169689Skan
1802169689Skan   such that:
1803169689Skan   1. operation is commutative and associative and it is safe to
1804169689Skan      change the order of the computation.
1805169689Skan   2. no uses for a2 in the loop (a2 is used out of the loop)
1806169689Skan   3. no uses of a1 in the loop besides the reduction operation.
1807169689Skan
1808169689Skan   Condition 1 is tested here.
1809169689Skan   Conditions 2,3 are tested in vect_mark_stmts_to_be_vectorized.  */
1810169689Skan
1811169689Skantree
1812169689Skanvect_is_simple_reduction (struct loop *loop, tree phi)
1813169689Skan{
1814169689Skan  edge latch_e = loop_latch_edge (loop);
1815169689Skan  tree loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
1816169689Skan  tree def_stmt, def1, def2;
1817169689Skan  enum tree_code code;
1818169689Skan  int op_type;
1819169689Skan  tree operation, op1, op2;
1820169689Skan  tree type;
1821169689Skan
1822169689Skan  if (TREE_CODE (loop_arg) != SSA_NAME)
1823169689Skan    {
1824169689Skan      if (vect_print_dump_info (REPORT_DETAILS))
1825169689Skan        {
1826169689Skan          fprintf (vect_dump, "reduction: not ssa_name: ");
1827169689Skan          print_generic_expr (vect_dump, loop_arg, TDF_SLIM);
1828169689Skan        }
1829169689Skan      return NULL_TREE;
1830169689Skan    }
1831169689Skan
1832169689Skan  def_stmt = SSA_NAME_DEF_STMT (loop_arg);
1833169689Skan  if (!def_stmt)
1834169689Skan    {
1835169689Skan      if (vect_print_dump_info (REPORT_DETAILS))
1836169689Skan        fprintf (vect_dump, "reduction: no def_stmt.");
1837169689Skan      return NULL_TREE;
1838169689Skan    }
1839169689Skan
1840169689Skan  if (TREE_CODE (def_stmt) != MODIFY_EXPR)
1841169689Skan    {
1842169689Skan      if (vect_print_dump_info (REPORT_DETAILS))
1843169689Skan        {
1844169689Skan          print_generic_expr (vect_dump, def_stmt, TDF_SLIM);
1845169689Skan        }
1846169689Skan      return NULL_TREE;
1847169689Skan    }
1848169689Skan
1849169689Skan  operation = TREE_OPERAND (def_stmt, 1);
1850169689Skan  code = TREE_CODE (operation);
1851169689Skan  if (!commutative_tree_code (code) || !associative_tree_code (code))
1852169689Skan    {
1853169689Skan      if (vect_print_dump_info (REPORT_DETAILS))
1854169689Skan        {
1855169689Skan          fprintf (vect_dump, "reduction: not commutative/associative: ");
1856169689Skan          print_generic_expr (vect_dump, operation, TDF_SLIM);
1857169689Skan        }
1858169689Skan      return NULL_TREE;
1859169689Skan    }
1860169689Skan
1861169689Skan  op_type = TREE_CODE_LENGTH (code);
1862169689Skan  if (op_type != binary_op)
1863169689Skan    {
1864169689Skan      if (vect_print_dump_info (REPORT_DETAILS))
1865169689Skan        {
1866169689Skan          fprintf (vect_dump, "reduction: not binary operation: ");
1867169689Skan          print_generic_expr (vect_dump, operation, TDF_SLIM);
1868169689Skan        }
1869169689Skan      return NULL_TREE;
1870169689Skan    }
1871169689Skan
1872169689Skan  op1 = TREE_OPERAND (operation, 0);
1873169689Skan  op2 = TREE_OPERAND (operation, 1);
1874169689Skan  if (TREE_CODE (op1) != SSA_NAME || TREE_CODE (op2) != SSA_NAME)
1875169689Skan    {
1876169689Skan      if (vect_print_dump_info (REPORT_DETAILS))
1877169689Skan        {
1878169689Skan          fprintf (vect_dump, "reduction: uses not ssa_names: ");
1879169689Skan          print_generic_expr (vect_dump, operation, TDF_SLIM);
1880169689Skan        }
1881169689Skan      return NULL_TREE;
1882169689Skan    }
1883169689Skan
1884169689Skan  /* Check that it's ok to change the order of the computation.  */
1885169689Skan  type = TREE_TYPE (operation);
1886169689Skan  if (TYPE_MAIN_VARIANT (type) != TYPE_MAIN_VARIANT (TREE_TYPE (op1))
1887169689Skan      || TYPE_MAIN_VARIANT (type) != TYPE_MAIN_VARIANT (TREE_TYPE (op2)))
1888169689Skan    {
1889169689Skan      if (vect_print_dump_info (REPORT_DETAILS))
1890169689Skan        {
1891169689Skan          fprintf (vect_dump, "reduction: multiple types: operation type: ");
1892169689Skan          print_generic_expr (vect_dump, type, TDF_SLIM);
1893169689Skan          fprintf (vect_dump, ", operands types: ");
1894169689Skan          print_generic_expr (vect_dump, TREE_TYPE (op1), TDF_SLIM);
1895169689Skan          fprintf (vect_dump, ",");
1896169689Skan          print_generic_expr (vect_dump, TREE_TYPE (op2), TDF_SLIM);
1897169689Skan        }
1898169689Skan      return NULL_TREE;
1899169689Skan    }
1900169689Skan
1901169689Skan  /* CHECKME: check for !flag_finite_math_only too?  */
1902169689Skan  if (SCALAR_FLOAT_TYPE_P (type) && !flag_unsafe_math_optimizations)
1903169689Skan    {
1904169689Skan      /* Changing the order of operations changes the semantics.  */
1905169689Skan      if (vect_print_dump_info (REPORT_DETAILS))
1906169689Skan        {
1907169689Skan          fprintf (vect_dump, "reduction: unsafe fp math optimization: ");
1908169689Skan          print_generic_expr (vect_dump, operation, TDF_SLIM);
1909169689Skan        }
1910169689Skan      return NULL_TREE;
1911169689Skan    }
1912169689Skan  else if (INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_TRAPS (type))
1913169689Skan    {
1914169689Skan      /* Changing the order of operations changes the semantics.  */
1915169689Skan      if (vect_print_dump_info (REPORT_DETAILS))
1916169689Skan        {
1917169689Skan          fprintf (vect_dump, "reduction: unsafe int math optimization: ");
1918169689Skan          print_generic_expr (vect_dump, operation, TDF_SLIM);
1919169689Skan        }
1920169689Skan      return NULL_TREE;
1921169689Skan    }
1922169689Skan
1923169689Skan  /* reduction is safe. we're dealing with one of the following:
1924169689Skan     1) integer arithmetic and no trapv
1925169689Skan     2) floating point arithmetic, and special flags permit this optimization.
1926169689Skan   */
1927169689Skan  def1 = SSA_NAME_DEF_STMT (op1);
1928169689Skan  def2 = SSA_NAME_DEF_STMT (op2);
1929169689Skan  if (!def1 || !def2)
1930169689Skan    {
1931169689Skan      if (vect_print_dump_info (REPORT_DETAILS))
1932169689Skan        {
1933169689Skan          fprintf (vect_dump, "reduction: no defs for operands: ");
1934169689Skan          print_generic_expr (vect_dump, operation, TDF_SLIM);
1935169689Skan        }
1936169689Skan      return NULL_TREE;
1937169689Skan    }
1938169689Skan
1939169689Skan  if (TREE_CODE (def1) == MODIFY_EXPR
1940169689Skan      && flow_bb_inside_loop_p (loop, bb_for_stmt (def1))
1941169689Skan      && def2 == phi)
1942169689Skan    {
1943169689Skan      if (vect_print_dump_info (REPORT_DETAILS))
1944169689Skan        {
1945169689Skan          fprintf (vect_dump, "detected reduction:");
1946169689Skan          print_generic_expr (vect_dump, operation, TDF_SLIM);
1947169689Skan        }
1948169689Skan      return def_stmt;
1949169689Skan    }
1950169689Skan  else if (TREE_CODE (def2) == MODIFY_EXPR
1951169689Skan      && flow_bb_inside_loop_p (loop, bb_for_stmt (def2))
1952169689Skan      && def1 == phi)
1953169689Skan    {
1954169689Skan      /* Swap operands (just for simplicity - so that the rest of the code
1955169689Skan	 can assume that the reduction variable is always the last (second)
1956169689Skan	 argument).  */
1957169689Skan      if (vect_print_dump_info (REPORT_DETAILS))
1958169689Skan        {
1959169689Skan          fprintf (vect_dump, "detected reduction: need to swap operands:");
1960169689Skan          print_generic_expr (vect_dump, operation, TDF_SLIM);
1961169689Skan        }
1962169689Skan      swap_tree_operands (def_stmt, &TREE_OPERAND (operation, 0),
1963169689Skan				    &TREE_OPERAND (operation, 1));
1964169689Skan      return def_stmt;
1965169689Skan    }
1966169689Skan  else
1967169689Skan    {
1968169689Skan      if (vect_print_dump_info (REPORT_DETAILS))
1969169689Skan        {
1970169689Skan          fprintf (vect_dump, "reduction: unknown pattern.");
1971169689Skan          print_generic_expr (vect_dump, operation, TDF_SLIM);
1972169689Skan        }
1973169689Skan      return NULL_TREE;
1974169689Skan    }
1975169689Skan}
1976169689Skan
1977169689Skan
1978169689Skan/* Function vect_is_simple_iv_evolution.
1979169689Skan
1980169689Skan   FORNOW: A simple evolution of an induction variables in the loop is
1981169689Skan   considered a polynomial evolution with constant step.  */
1982169689Skan
1983169689Skanbool
1984169689Skanvect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init,
1985169689Skan			     tree * step)
1986169689Skan{
1987169689Skan  tree init_expr;
1988169689Skan  tree step_expr;
1989169689Skan
1990169689Skan  tree evolution_part = evolution_part_in_loop_num (access_fn, loop_nb);
1991169689Skan
1992169689Skan  /* When there is no evolution in this loop, the evolution function
1993169689Skan     is not "simple".  */
1994169689Skan  if (evolution_part == NULL_TREE)
1995169689Skan    return false;
1996169689Skan
1997169689Skan  /* When the evolution is a polynomial of degree >= 2
1998169689Skan     the evolution function is not "simple".  */
1999169689Skan  if (tree_is_chrec (evolution_part))
2000169689Skan    return false;
2001169689Skan
2002169689Skan  step_expr = evolution_part;
2003169689Skan  init_expr = unshare_expr (initial_condition_in_loop_num (access_fn,
2004169689Skan                                                           loop_nb));
2005169689Skan
2006169689Skan  if (vect_print_dump_info (REPORT_DETAILS))
2007169689Skan    {
2008169689Skan      fprintf (vect_dump, "step: ");
2009169689Skan      print_generic_expr (vect_dump, step_expr, TDF_SLIM);
2010169689Skan      fprintf (vect_dump, ",  init: ");
2011169689Skan      print_generic_expr (vect_dump, init_expr, TDF_SLIM);
2012169689Skan    }
2013169689Skan
2014169689Skan  *init = init_expr;
2015169689Skan  *step = step_expr;
2016169689Skan
2017169689Skan  if (TREE_CODE (step_expr) != INTEGER_CST)
2018169689Skan    {
2019169689Skan      if (vect_print_dump_info (REPORT_DETAILS))
2020169689Skan        fprintf (vect_dump, "step unknown.");
2021169689Skan      return false;
2022169689Skan    }
2023169689Skan
2024169689Skan  return true;
2025169689Skan}
2026169689Skan
2027169689Skan
2028169689Skan/* Function vectorize_loops.
2029169689Skan
2030169689Skan   Entry Point to loop vectorization phase.  */
2031169689Skan
2032169689Skanvoid
2033169689Skanvectorize_loops (struct loops *loops)
2034169689Skan{
2035169689Skan  unsigned int i;
2036169689Skan  unsigned int num_vectorized_loops = 0;
2037169689Skan
2038169689Skan  /* Fix the verbosity level if not defined explicitly by the user.  */
2039169689Skan  vect_set_dump_settings ();
2040169689Skan
2041169689Skan  /* Allocate the bitmap that records which virtual variables that
2042169689Skan     need to be renamed.  */
2043169689Skan  vect_vnames_to_rename = BITMAP_ALLOC (NULL);
2044169689Skan
2045169689Skan  /*  ----------- Analyze loops. -----------  */
2046169689Skan
2047169689Skan  /* If some loop was duplicated, it gets bigger number
2048169689Skan     than all previously defined loops. This fact allows us to run
2049169689Skan     only over initial loops skipping newly generated ones.  */
2050169689Skan  vect_loops_num = loops->num;
2051169689Skan  for (i = 1; i < vect_loops_num; i++)
2052169689Skan    {
2053169689Skan      loop_vec_info loop_vinfo;
2054169689Skan      struct loop *loop = loops->parray[i];
2055169689Skan
2056169689Skan      if (!loop)
2057169689Skan        continue;
2058169689Skan
2059169689Skan      vect_loop_location = find_loop_location (loop);
2060169689Skan      loop_vinfo = vect_analyze_loop (loop);
2061169689Skan      loop->aux = loop_vinfo;
2062169689Skan
2063169689Skan      if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo))
2064169689Skan	continue;
2065169689Skan
2066169689Skan      vect_transform_loop (loop_vinfo, loops);
2067169689Skan      num_vectorized_loops++;
2068169689Skan    }
2069169689Skan  vect_loop_location = UNKNOWN_LOC;
2070169689Skan
2071169689Skan  if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS))
2072169689Skan    fprintf (vect_dump, "vectorized %u loops in function.\n",
2073169689Skan	     num_vectorized_loops);
2074169689Skan
2075169689Skan  /*  ----------- Finalize. -----------  */
2076169689Skan
2077169689Skan  BITMAP_FREE (vect_vnames_to_rename);
2078169689Skan
2079169689Skan  for (i = 1; i < vect_loops_num; i++)
2080169689Skan    {
2081169689Skan      struct loop *loop = loops->parray[i];
2082169689Skan      loop_vec_info loop_vinfo;
2083169689Skan
2084169689Skan      if (!loop)
2085169689Skan	continue;
2086169689Skan      loop_vinfo = loop->aux;
2087169689Skan      destroy_loop_vec_info (loop_vinfo);
2088169689Skan      loop->aux = NULL;
2089169689Skan    }
2090169689Skan}
2091