1/* Reassociation for trees.
2   Copyright (C) 2005-2015 Free Software Foundation, Inc.
3   Contributed by Daniel Berlin <dan@dberlin.org>
4
5This file is part of GCC.
6
7GCC is free software; you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
9the Free Software Foundation; either version 3, or (at your option)
10any later version.
11
12GCC is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
18along with GCC; see the file COPYING3.  If not see
19<http://www.gnu.org/licenses/>.  */
20
21#include "config.h"
22#include "system.h"
23#include "coretypes.h"
24#include "hash-table.h"
25#include "tm.h"
26#include "rtl.h"
27#include "tm_p.h"
28#include "hash-set.h"
29#include "machmode.h"
30#include "vec.h"
31#include "double-int.h"
32#include "input.h"
33#include "alias.h"
34#include "symtab.h"
35#include "wide-int.h"
36#include "inchash.h"
37#include "tree.h"
38#include "fold-const.h"
39#include "stor-layout.h"
40#include "predict.h"
41#include "hard-reg-set.h"
42#include "function.h"
43#include "dominance.h"
44#include "cfg.h"
45#include "cfganal.h"
46#include "basic-block.h"
47#include "gimple-pretty-print.h"
48#include "tree-inline.h"
49#include "hash-map.h"
50#include "tree-ssa-alias.h"
51#include "internal-fn.h"
52#include "gimple-fold.h"
53#include "tree-eh.h"
54#include "gimple-expr.h"
55#include "is-a.h"
56#include "gimple.h"
57#include "gimple-iterator.h"
58#include "gimplify-me.h"
59#include "gimple-ssa.h"
60#include "tree-cfg.h"
61#include "tree-phinodes.h"
62#include "ssa-iterators.h"
63#include "stringpool.h"
64#include "tree-ssanames.h"
65#include "tree-ssa-loop-niter.h"
66#include "tree-ssa-loop.h"
67#include "hashtab.h"
68#include "flags.h"
69#include "statistics.h"
70#include "real.h"
71#include "fixed-value.h"
72#include "insn-config.h"
73#include "expmed.h"
74#include "dojump.h"
75#include "explow.h"
76#include "calls.h"
77#include "emit-rtl.h"
78#include "varasm.h"
79#include "stmt.h"
80#include "expr.h"
81#include "tree-dfa.h"
82#include "tree-ssa.h"
83#include "tree-iterator.h"
84#include "tree-pass.h"
85#include "alloc-pool.h"
86#include "langhooks.h"
87#include "cfgloop.h"
88#include "target.h"
89#include "params.h"
90#include "diagnostic-core.h"
91#include "builtins.h"
92#include "gimplify.h"
93#include "insn-codes.h"
94#include "optabs.h"
95
96/*  This is a simple global reassociation pass.  It is, in part, based
97    on the LLVM pass of the same name (They do some things more/less
98    than we do, in different orders, etc).
99
100    It consists of five steps:
101
102    1. Breaking up subtract operations into addition + negate, where
103    it would promote the reassociation of adds.
104
105    2. Left linearization of the expression trees, so that (A+B)+(C+D)
106    becomes (((A+B)+C)+D), which is easier for us to rewrite later.
107    During linearization, we place the operands of the binary
108    expressions into a vector of operand_entry_t
109
110    3. Optimization of the operand lists, eliminating things like a +
111    -a, a & a, etc.
112
113    3a. Combine repeated factors with the same occurrence counts
114    into a __builtin_powi call that will later be optimized into
115    an optimal number of multiplies.
116
117    4. Rewrite the expression trees we linearized and optimized so
118    they are in proper rank order.
119
120    5. Repropagate negates, as nothing else will clean it up ATM.
121
122    A bit of theory on #4, since nobody seems to write anything down
123    about why it makes sense to do it the way they do it:
124
125    We could do this much nicer theoretically, but don't (for reasons
126    explained after how to do it theoretically nice :P).
127
128    In order to promote the most redundancy elimination, you want
129    binary expressions whose operands are the same rank (or
130    preferably, the same value) exposed to the redundancy eliminator,
131    for possible elimination.
132
133    So the way to do this if we really cared, is to build the new op
134    tree from the leaves to the roots, merging as you go, and putting the
135    new op on the end of the worklist, until you are left with one
136    thing on the worklist.
137
138    IE if you have to rewrite the following set of operands (listed with
139    rank in parentheses), with opcode PLUS_EXPR:
140
141    a (1),  b (1),  c (1),  d (2), e (2)
142
143
144    We start with our merge worklist empty, and the ops list with all of
145    those on it.
146
147    You want to first merge all leaves of the same rank, as much as
148    possible.
149
150    So first build a binary op of
151
152    mergetmp = a + b, and put "mergetmp" on the merge worklist.
153
154    Because there is no three operand form of PLUS_EXPR, c is not going to
155    be exposed to redundancy elimination as a rank 1 operand.
156
157    So you might as well throw it on the merge worklist (you could also
158    consider it to now be a rank two operand, and merge it with d and e,
159    but in this case, you then have evicted e from a binary op. So at
160    least in this situation, you can't win.)
161
162    Then build a binary op of d + e
163    mergetmp2 = d + e
164
165    and put mergetmp2 on the merge worklist.
166
167    so merge worklist = {mergetmp, c, mergetmp2}
168
169    Continue building binary ops of these operations until you have only
170    one operation left on the worklist.
171
172    So we have
173
174    build binary op
175    mergetmp3 = mergetmp + c
176
177    worklist = {mergetmp2, mergetmp3}
178
179    mergetmp4 = mergetmp2 + mergetmp3
180
181    worklist = {mergetmp4}
182
183    because we have one operation left, we can now just set the original
184    statement equal to the result of that operation.
185
186    This will at least expose a + b  and d + e to redundancy elimination
187    as binary operations.
188
189    For extra points, you can reuse the old statements to build the
190    mergetmps, since you shouldn't run out.
191
192    So why don't we do this?
193
194    Because it's expensive, and rarely will help.  Most trees we are
195    reassociating have 3 or less ops.  If they have 2 ops, they already
196    will be written into a nice single binary op.  If you have 3 ops, a
197    single simple check suffices to tell you whether the first two are of the
198    same rank.  If so, you know to order it
199
200    mergetmp = op1 + op2
201    newstmt = mergetmp + op3
202
203    instead of
204    mergetmp = op2 + op3
205    newstmt = mergetmp + op1
206
207    If all three are of the same rank, you can't expose them all in a
208    single binary operator anyway, so the above is *still* the best you
209    can do.
210
211    Thus, this is what we do.  When we have three ops left, we check to see
212    what order to put them in, and call it a day.  As a nod to vector sum
213    reduction, we check if any of the ops are really a phi node that is a
214    destructive update for the associating op, and keep the destructive
215    update together for vector sum reduction recognition.  */
216
217
218/* Statistics */
219static struct
220{
221  int linearized;
222  int constants_eliminated;
223  int ops_eliminated;
224  int rewritten;
225  int pows_encountered;
226  int pows_created;
227} reassociate_stats;
228
229/* Operator, rank pair.  */
230typedef struct operand_entry
231{
232  unsigned int rank;
233  int id;
234  tree op;
235  unsigned int count;
236} *operand_entry_t;
237
238static alloc_pool operand_entry_pool;
239
240/* This is used to assign a unique ID to each struct operand_entry
241   so that qsort results are identical on different hosts.  */
242static int next_operand_entry_id;
243
244/* Starting rank number for a given basic block, so that we can rank
245   operations using unmovable instructions in that BB based on the bb
246   depth.  */
247static long *bb_rank;
248
249/* Operand->rank hashtable.  */
250static hash_map<tree, long> *operand_rank;
251
252/* Vector of SSA_NAMEs on which after reassociate_bb is done with
253   all basic blocks the CFG should be adjusted - basic blocks
254   split right after that SSA_NAME's definition statement and before
255   the only use, which must be a bit ior.  */
256static vec<tree> reassoc_branch_fixups;
257
258/* Forward decls.  */
259static long get_rank (tree);
260static bool reassoc_stmt_dominates_stmt_p (gimple, gimple);
261
262/* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts
263   possibly added by gsi_remove.  */
264
265bool
266reassoc_remove_stmt (gimple_stmt_iterator *gsi)
267{
268  gimple stmt = gsi_stmt (*gsi);
269
270  if (!MAY_HAVE_DEBUG_STMTS || gimple_code (stmt) == GIMPLE_PHI)
271    return gsi_remove (gsi, true);
272
273  gimple_stmt_iterator prev = *gsi;
274  gsi_prev (&prev);
275  unsigned uid = gimple_uid (stmt);
276  basic_block bb = gimple_bb (stmt);
277  bool ret = gsi_remove (gsi, true);
278  if (!gsi_end_p (prev))
279    gsi_next (&prev);
280  else
281    prev = gsi_start_bb (bb);
282  gimple end_stmt = gsi_stmt (*gsi);
283  while ((stmt = gsi_stmt (prev)) != end_stmt)
284    {
285      gcc_assert (stmt && is_gimple_debug (stmt) && gimple_uid (stmt) == 0);
286      gimple_set_uid (stmt, uid);
287      gsi_next (&prev);
288    }
289  return ret;
290}
291
292/* Bias amount for loop-carried phis.  We want this to be larger than
293   the depth of any reassociation tree we can see, but not larger than
294   the rank difference between two blocks.  */
295#define PHI_LOOP_BIAS (1 << 15)
296
297/* Rank assigned to a phi statement.  If STMT is a loop-carried phi of
298   an innermost loop, and the phi has only a single use which is inside
299   the loop, then the rank is the block rank of the loop latch plus an
300   extra bias for the loop-carried dependence.  This causes expressions
301   calculated into an accumulator variable to be independent for each
302   iteration of the loop.  If STMT is some other phi, the rank is the
303   block rank of its containing block.  */
304static long
305phi_rank (gimple stmt)
306{
307  basic_block bb = gimple_bb (stmt);
308  struct loop *father = bb->loop_father;
309  tree res;
310  unsigned i;
311  use_operand_p use;
312  gimple use_stmt;
313
314  /* We only care about real loops (those with a latch).  */
315  if (!father->latch)
316    return bb_rank[bb->index];
317
318  /* Interesting phis must be in headers of innermost loops.  */
319  if (bb != father->header
320      || father->inner)
321    return bb_rank[bb->index];
322
323  /* Ignore virtual SSA_NAMEs.  */
324  res = gimple_phi_result (stmt);
325  if (virtual_operand_p (res))
326    return bb_rank[bb->index];
327
328  /* The phi definition must have a single use, and that use must be
329     within the loop.  Otherwise this isn't an accumulator pattern.  */
330  if (!single_imm_use (res, &use, &use_stmt)
331      || gimple_bb (use_stmt)->loop_father != father)
332    return bb_rank[bb->index];
333
334  /* Look for phi arguments from within the loop.  If found, bias this phi.  */
335  for (i = 0; i < gimple_phi_num_args (stmt); i++)
336    {
337      tree arg = gimple_phi_arg_def (stmt, i);
338      if (TREE_CODE (arg) == SSA_NAME
339	  && !SSA_NAME_IS_DEFAULT_DEF (arg))
340	{
341	  gimple def_stmt = SSA_NAME_DEF_STMT (arg);
342	  if (gimple_bb (def_stmt)->loop_father == father)
343	    return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
344	}
345    }
346
347  /* Must be an uninteresting phi.  */
348  return bb_rank[bb->index];
349}
350
351/* If EXP is an SSA_NAME defined by a PHI statement that represents a
352   loop-carried dependence of an innermost loop, return TRUE; else
353   return FALSE.  */
354static bool
355loop_carried_phi (tree exp)
356{
357  gimple phi_stmt;
358  long block_rank;
359
360  if (TREE_CODE (exp) != SSA_NAME
361      || SSA_NAME_IS_DEFAULT_DEF (exp))
362    return false;
363
364  phi_stmt = SSA_NAME_DEF_STMT (exp);
365
366  if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
367    return false;
368
369  /* Non-loop-carried phis have block rank.  Loop-carried phis have
370     an additional bias added in.  If this phi doesn't have block rank,
371     it's biased and should not be propagated.  */
372  block_rank = bb_rank[gimple_bb (phi_stmt)->index];
373
374  if (phi_rank (phi_stmt) != block_rank)
375    return true;
376
377  return false;
378}
379
380/* Return the maximum of RANK and the rank that should be propagated
381   from expression OP.  For most operands, this is just the rank of OP.
382   For loop-carried phis, the value is zero to avoid undoing the bias
383   in favor of the phi.  */
384static long
385propagate_rank (long rank, tree op)
386{
387  long op_rank;
388
389  if (loop_carried_phi (op))
390    return rank;
391
392  op_rank = get_rank (op);
393
394  return MAX (rank, op_rank);
395}
396
397/* Look up the operand rank structure for expression E.  */
398
399static inline long
400find_operand_rank (tree e)
401{
402  long *slot = operand_rank->get (e);
403  return slot ? *slot : -1;
404}
405
406/* Insert {E,RANK} into the operand rank hashtable.  */
407
408static inline void
409insert_operand_rank (tree e, long rank)
410{
411  gcc_assert (rank > 0);
412  gcc_assert (!operand_rank->put (e, rank));
413}
414
415/* Given an expression E, return the rank of the expression.  */
416
417static long
418get_rank (tree e)
419{
420  /* Constants have rank 0.  */
421  if (is_gimple_min_invariant (e))
422    return 0;
423
424  /* SSA_NAME's have the rank of the expression they are the result
425     of.
426     For globals and uninitialized values, the rank is 0.
427     For function arguments, use the pre-setup rank.
428     For PHI nodes, stores, asm statements, etc, we use the rank of
429     the BB.
430     For simple operations, the rank is the maximum rank of any of
431     its operands, or the bb_rank, whichever is less.
432     I make no claims that this is optimal, however, it gives good
433     results.  */
434
435  /* We make an exception to the normal ranking system to break
436     dependences of accumulator variables in loops.  Suppose we
437     have a simple one-block loop containing:
438
439       x_1 = phi(x_0, x_2)
440       b = a + x_1
441       c = b + d
442       x_2 = c + e
443
444     As shown, each iteration of the calculation into x is fully
445     dependent upon the iteration before it.  We would prefer to
446     see this in the form:
447
448       x_1 = phi(x_0, x_2)
449       b = a + d
450       c = b + e
451       x_2 = c + x_1
452
453     If the loop is unrolled, the calculations of b and c from
454     different iterations can be interleaved.
455
456     To obtain this result during reassociation, we bias the rank
457     of the phi definition x_1 upward, when it is recognized as an
458     accumulator pattern.  The artificial rank causes it to be
459     added last, providing the desired independence.  */
460
461  if (TREE_CODE (e) == SSA_NAME)
462    {
463      gimple stmt;
464      long rank;
465      int i, n;
466      tree op;
467
468      if (SSA_NAME_IS_DEFAULT_DEF (e))
469	return find_operand_rank (e);
470
471      stmt = SSA_NAME_DEF_STMT (e);
472      if (gimple_code (stmt) == GIMPLE_PHI)
473	return phi_rank (stmt);
474
475      if (!is_gimple_assign (stmt)
476	  || gimple_vdef (stmt))
477	return bb_rank[gimple_bb (stmt)->index];
478
479      /* If we already have a rank for this expression, use that.  */
480      rank = find_operand_rank (e);
481      if (rank != -1)
482	return rank;
483
484      /* Otherwise, find the maximum rank for the operands.  As an
485	 exception, remove the bias from loop-carried phis when propagating
486	 the rank so that dependent operations are not also biased.  */
487      rank = 0;
488      if (gimple_assign_single_p (stmt))
489	{
490	  tree rhs = gimple_assign_rhs1 (stmt);
491	  n = TREE_OPERAND_LENGTH (rhs);
492	  if (n == 0)
493	    rank = propagate_rank (rank, rhs);
494	  else
495	    {
496	      for (i = 0; i < n; i++)
497		{
498		  op = TREE_OPERAND (rhs, i);
499
500		  if (op != NULL_TREE)
501		    rank = propagate_rank (rank, op);
502		}
503	    }
504	}
505      else
506	{
507	  n = gimple_num_ops (stmt);
508	  for (i = 1; i < n; i++)
509	    {
510	      op = gimple_op (stmt, i);
511	      gcc_assert (op);
512	      rank = propagate_rank (rank, op);
513	    }
514	}
515
516      if (dump_file && (dump_flags & TDF_DETAILS))
517	{
518	  fprintf (dump_file, "Rank for ");
519	  print_generic_expr (dump_file, e, 0);
520	  fprintf (dump_file, " is %ld\n", (rank + 1));
521	}
522
523      /* Note the rank in the hashtable so we don't recompute it.  */
524      insert_operand_rank (e, (rank + 1));
525      return (rank + 1);
526    }
527
528  /* Globals, etc,  are rank 0 */
529  return 0;
530}
531
532
533/* We want integer ones to end up last no matter what, since they are
534   the ones we can do the most with.  */
535#define INTEGER_CONST_TYPE 1 << 3
536#define FLOAT_CONST_TYPE 1 << 2
537#define OTHER_CONST_TYPE 1 << 1
538
539/* Classify an invariant tree into integer, float, or other, so that
540   we can sort them to be near other constants of the same type.  */
541static inline int
542constant_type (tree t)
543{
544  if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
545    return INTEGER_CONST_TYPE;
546  else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
547    return FLOAT_CONST_TYPE;
548  else
549    return OTHER_CONST_TYPE;
550}
551
552/* qsort comparison function to sort operand entries PA and PB by rank
553   so that the sorted array is ordered by rank in decreasing order.  */
554static int
555sort_by_operand_rank (const void *pa, const void *pb)
556{
557  const operand_entry_t oea = *(const operand_entry_t *)pa;
558  const operand_entry_t oeb = *(const operand_entry_t *)pb;
559
560  /* It's nicer for optimize_expression if constants that are likely
561     to fold when added/multiplied//whatever are put next to each
562     other.  Since all constants have rank 0, order them by type.  */
563  if (oeb->rank == 0 && oea->rank == 0)
564    {
565      if (constant_type (oeb->op) != constant_type (oea->op))
566	return constant_type (oeb->op) - constant_type (oea->op);
567      else
568	/* To make sorting result stable, we use unique IDs to determine
569	   order.  */
570        return oeb->id - oea->id;
571    }
572
573  /* Lastly, make sure the versions that are the same go next to each
574     other.  */
575  if ((oeb->rank - oea->rank == 0)
576      && TREE_CODE (oea->op) == SSA_NAME
577      && TREE_CODE (oeb->op) == SSA_NAME)
578    {
579      /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse
580	 versions of removed SSA_NAMEs, so if possible, prefer to sort
581	 based on basic block and gimple_uid of the SSA_NAME_DEF_STMT.
582	 See PR60418.  */
583      if (!SSA_NAME_IS_DEFAULT_DEF (oea->op)
584	  && !SSA_NAME_IS_DEFAULT_DEF (oeb->op)
585	  && SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
586	{
587	  gimple stmta = SSA_NAME_DEF_STMT (oea->op);
588	  gimple stmtb = SSA_NAME_DEF_STMT (oeb->op);
589	  basic_block bba = gimple_bb (stmta);
590	  basic_block bbb = gimple_bb (stmtb);
591	  if (bbb != bba)
592	    {
593	      if (bb_rank[bbb->index] != bb_rank[bba->index])
594		return bb_rank[bbb->index] - bb_rank[bba->index];
595	    }
596	  else
597	    {
598	      bool da = reassoc_stmt_dominates_stmt_p (stmta, stmtb);
599	      bool db = reassoc_stmt_dominates_stmt_p (stmtb, stmta);
600	      if (da != db)
601		return da ? 1 : -1;
602	    }
603	}
604
605      if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
606	return SSA_NAME_VERSION (oeb->op) - SSA_NAME_VERSION (oea->op);
607      else
608	return oeb->id - oea->id;
609    }
610
611  if (oeb->rank != oea->rank)
612    return oeb->rank - oea->rank;
613  else
614    return oeb->id - oea->id;
615}
616
617/* Add an operand entry to *OPS for the tree operand OP.  */
618
619static void
620add_to_ops_vec (vec<operand_entry_t> *ops, tree op)
621{
622  operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
623
624  oe->op = op;
625  oe->rank = get_rank (op);
626  oe->id = next_operand_entry_id++;
627  oe->count = 1;
628  ops->safe_push (oe);
629}
630
631/* Add an operand entry to *OPS for the tree operand OP with repeat
632   count REPEAT.  */
633
634static void
635add_repeat_to_ops_vec (vec<operand_entry_t> *ops, tree op,
636		       HOST_WIDE_INT repeat)
637{
638  operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
639
640  oe->op = op;
641  oe->rank = get_rank (op);
642  oe->id = next_operand_entry_id++;
643  oe->count = repeat;
644  ops->safe_push (oe);
645
646  reassociate_stats.pows_encountered++;
647}
648
649/* Return true if STMT is reassociable operation containing a binary
650   operation with tree code CODE, and is inside LOOP.  */
651
652static bool
653is_reassociable_op (gimple stmt, enum tree_code code, struct loop *loop)
654{
655  basic_block bb = gimple_bb (stmt);
656
657  if (gimple_bb (stmt) == NULL)
658    return false;
659
660  if (!flow_bb_inside_loop_p (loop, bb))
661    return false;
662
663  if (is_gimple_assign (stmt)
664      && gimple_assign_rhs_code (stmt) == code
665      && has_single_use (gimple_assign_lhs (stmt)))
666    return true;
667
668  return false;
669}
670
671
672/* Given NAME, if NAME is defined by a unary operation OPCODE, return the
673   operand of the negate operation.  Otherwise, return NULL.  */
674
675static tree
676get_unary_op (tree name, enum tree_code opcode)
677{
678  gimple stmt = SSA_NAME_DEF_STMT (name);
679
680  if (!is_gimple_assign (stmt))
681    return NULL_TREE;
682
683  if (gimple_assign_rhs_code (stmt) == opcode)
684    return gimple_assign_rhs1 (stmt);
685  return NULL_TREE;
686}
687
688/* If CURR and LAST are a pair of ops that OPCODE allows us to
689   eliminate through equivalences, do so, remove them from OPS, and
690   return true.  Otherwise, return false.  */
691
692static bool
693eliminate_duplicate_pair (enum tree_code opcode,
694			  vec<operand_entry_t> *ops,
695			  bool *all_done,
696			  unsigned int i,
697			  operand_entry_t curr,
698			  operand_entry_t last)
699{
700
701  /* If we have two of the same op, and the opcode is & |, min, or max,
702     we can eliminate one of them.
703     If we have two of the same op, and the opcode is ^, we can
704     eliminate both of them.  */
705
706  if (last && last->op == curr->op)
707    {
708      switch (opcode)
709	{
710	case MAX_EXPR:
711	case MIN_EXPR:
712	case BIT_IOR_EXPR:
713	case BIT_AND_EXPR:
714	  if (dump_file && (dump_flags & TDF_DETAILS))
715	    {
716	      fprintf (dump_file, "Equivalence: ");
717	      print_generic_expr (dump_file, curr->op, 0);
718	      fprintf (dump_file, " [&|minmax] ");
719	      print_generic_expr (dump_file, last->op, 0);
720	      fprintf (dump_file, " -> ");
721	      print_generic_stmt (dump_file, last->op, 0);
722	    }
723
724	  ops->ordered_remove (i);
725	  reassociate_stats.ops_eliminated ++;
726
727	  return true;
728
729	case BIT_XOR_EXPR:
730	  if (dump_file && (dump_flags & TDF_DETAILS))
731	    {
732	      fprintf (dump_file, "Equivalence: ");
733	      print_generic_expr (dump_file, curr->op, 0);
734	      fprintf (dump_file, " ^ ");
735	      print_generic_expr (dump_file, last->op, 0);
736	      fprintf (dump_file, " -> nothing\n");
737	    }
738
739	  reassociate_stats.ops_eliminated += 2;
740
741	  if (ops->length () == 2)
742	    {
743	      ops->create (0);
744	      add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
745	      *all_done = true;
746	    }
747	  else
748	    {
749	      ops->ordered_remove (i-1);
750	      ops->ordered_remove (i-1);
751	    }
752
753	  return true;
754
755	default:
756	  break;
757	}
758    }
759  return false;
760}
761
762static vec<tree> plus_negates;
763
764/* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
765   expression, look in OPS for a corresponding positive operation to cancel
766   it out.  If we find one, remove the other from OPS, replace
767   OPS[CURRINDEX] with 0 or -1, respectively, and return true.  Otherwise,
768   return false. */
769
770static bool
771eliminate_plus_minus_pair (enum tree_code opcode,
772			   vec<operand_entry_t> *ops,
773			   unsigned int currindex,
774			   operand_entry_t curr)
775{
776  tree negateop;
777  tree notop;
778  unsigned int i;
779  operand_entry_t oe;
780
781  if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
782    return false;
783
784  negateop = get_unary_op (curr->op, NEGATE_EXPR);
785  notop = get_unary_op (curr->op, BIT_NOT_EXPR);
786  if (negateop == NULL_TREE && notop == NULL_TREE)
787    return false;
788
789  /* Any non-negated version will have a rank that is one less than
790     the current rank.  So once we hit those ranks, if we don't find
791     one, we can stop.  */
792
793  for (i = currindex + 1;
794       ops->iterate (i, &oe)
795       && oe->rank >= curr->rank - 1 ;
796       i++)
797    {
798      if (oe->op == negateop)
799	{
800
801	  if (dump_file && (dump_flags & TDF_DETAILS))
802	    {
803	      fprintf (dump_file, "Equivalence: ");
804	      print_generic_expr (dump_file, negateop, 0);
805	      fprintf (dump_file, " + -");
806	      print_generic_expr (dump_file, oe->op, 0);
807	      fprintf (dump_file, " -> 0\n");
808	    }
809
810	  ops->ordered_remove (i);
811	  add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
812	  ops->ordered_remove (currindex);
813	  reassociate_stats.ops_eliminated ++;
814
815	  return true;
816	}
817      else if (oe->op == notop)
818	{
819	  tree op_type = TREE_TYPE (oe->op);
820
821	  if (dump_file && (dump_flags & TDF_DETAILS))
822	    {
823	      fprintf (dump_file, "Equivalence: ");
824	      print_generic_expr (dump_file, notop, 0);
825	      fprintf (dump_file, " + ~");
826	      print_generic_expr (dump_file, oe->op, 0);
827	      fprintf (dump_file, " -> -1\n");
828	    }
829
830	  ops->ordered_remove (i);
831	  add_to_ops_vec (ops, build_int_cst_type (op_type, -1));
832	  ops->ordered_remove (currindex);
833	  reassociate_stats.ops_eliminated ++;
834
835	  return true;
836	}
837    }
838
839  /* CURR->OP is a negate expr in a plus expr: save it for later
840     inspection in repropagate_negates().  */
841  if (negateop != NULL_TREE)
842    plus_negates.safe_push (curr->op);
843
844  return false;
845}
846
847/* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
848   bitwise not expression, look in OPS for a corresponding operand to
849   cancel it out.  If we find one, remove the other from OPS, replace
850   OPS[CURRINDEX] with 0, and return true.  Otherwise, return
851   false. */
852
853static bool
854eliminate_not_pairs (enum tree_code opcode,
855		     vec<operand_entry_t> *ops,
856		     unsigned int currindex,
857		     operand_entry_t curr)
858{
859  tree notop;
860  unsigned int i;
861  operand_entry_t oe;
862
863  if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
864      || TREE_CODE (curr->op) != SSA_NAME)
865    return false;
866
867  notop = get_unary_op (curr->op, BIT_NOT_EXPR);
868  if (notop == NULL_TREE)
869    return false;
870
871  /* Any non-not version will have a rank that is one less than
872     the current rank.  So once we hit those ranks, if we don't find
873     one, we can stop.  */
874
875  for (i = currindex + 1;
876       ops->iterate (i, &oe)
877       && oe->rank >= curr->rank - 1;
878       i++)
879    {
880      if (oe->op == notop)
881	{
882	  if (dump_file && (dump_flags & TDF_DETAILS))
883	    {
884	      fprintf (dump_file, "Equivalence: ");
885	      print_generic_expr (dump_file, notop, 0);
886	      if (opcode == BIT_AND_EXPR)
887		fprintf (dump_file, " & ~");
888	      else if (opcode == BIT_IOR_EXPR)
889		fprintf (dump_file, " | ~");
890	      print_generic_expr (dump_file, oe->op, 0);
891	      if (opcode == BIT_AND_EXPR)
892		fprintf (dump_file, " -> 0\n");
893	      else if (opcode == BIT_IOR_EXPR)
894		fprintf (dump_file, " -> -1\n");
895	    }
896
897	  if (opcode == BIT_AND_EXPR)
898	    oe->op = build_zero_cst (TREE_TYPE (oe->op));
899	  else if (opcode == BIT_IOR_EXPR)
900	    oe->op = build_all_ones_cst (TREE_TYPE (oe->op));
901
902	  reassociate_stats.ops_eliminated += ops->length () - 1;
903	  ops->truncate (0);
904	  ops->quick_push (oe);
905	  return true;
906	}
907    }
908
909  return false;
910}
911
912/* Use constant value that may be present in OPS to try to eliminate
913   operands.  Note that this function is only really used when we've
914   eliminated ops for other reasons, or merged constants.  Across
915   single statements, fold already does all of this, plus more.  There
916   is little point in duplicating logic, so I've only included the
917   identities that I could ever construct testcases to trigger.  */
918
919static void
920eliminate_using_constants (enum tree_code opcode,
921			   vec<operand_entry_t> *ops)
922{
923  operand_entry_t oelast = ops->last ();
924  tree type = TREE_TYPE (oelast->op);
925
926  if (oelast->rank == 0
927      && (INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
928    {
929      switch (opcode)
930	{
931	case BIT_AND_EXPR:
932	  if (integer_zerop (oelast->op))
933	    {
934	      if (ops->length () != 1)
935		{
936		  if (dump_file && (dump_flags & TDF_DETAILS))
937		    fprintf (dump_file, "Found & 0, removing all other ops\n");
938
939		  reassociate_stats.ops_eliminated += ops->length () - 1;
940
941		  ops->truncate (0);
942		  ops->quick_push (oelast);
943		  return;
944		}
945	    }
946	  else if (integer_all_onesp (oelast->op))
947	    {
948	      if (ops->length () != 1)
949		{
950		  if (dump_file && (dump_flags & TDF_DETAILS))
951		    fprintf (dump_file, "Found & -1, removing\n");
952		  ops->pop ();
953		  reassociate_stats.ops_eliminated++;
954		}
955	    }
956	  break;
957	case BIT_IOR_EXPR:
958	  if (integer_all_onesp (oelast->op))
959	    {
960	      if (ops->length () != 1)
961		{
962		  if (dump_file && (dump_flags & TDF_DETAILS))
963		    fprintf (dump_file, "Found | -1, removing all other ops\n");
964
965		  reassociate_stats.ops_eliminated += ops->length () - 1;
966
967		  ops->truncate (0);
968		  ops->quick_push (oelast);
969		  return;
970		}
971	    }
972	  else if (integer_zerop (oelast->op))
973	    {
974	      if (ops->length () != 1)
975		{
976		  if (dump_file && (dump_flags & TDF_DETAILS))
977		    fprintf (dump_file, "Found | 0, removing\n");
978		  ops->pop ();
979		  reassociate_stats.ops_eliminated++;
980		}
981	    }
982	  break;
983	case MULT_EXPR:
984	  if (integer_zerop (oelast->op)
985	      || (FLOAT_TYPE_P (type)
986		  && !HONOR_NANS (type)
987		  && !HONOR_SIGNED_ZEROS (type)
988		  && real_zerop (oelast->op)))
989	    {
990	      if (ops->length () != 1)
991		{
992		  if (dump_file && (dump_flags & TDF_DETAILS))
993		    fprintf (dump_file, "Found * 0, removing all other ops\n");
994
995		  reassociate_stats.ops_eliminated += ops->length () - 1;
996		  ops->truncate (1);
997		  ops->quick_push (oelast);
998		  return;
999		}
1000	    }
1001	  else if (integer_onep (oelast->op)
1002		   || (FLOAT_TYPE_P (type)
1003		       && !HONOR_SNANS (type)
1004		       && real_onep (oelast->op)))
1005	    {
1006	      if (ops->length () != 1)
1007		{
1008		  if (dump_file && (dump_flags & TDF_DETAILS))
1009		    fprintf (dump_file, "Found * 1, removing\n");
1010		  ops->pop ();
1011		  reassociate_stats.ops_eliminated++;
1012		  return;
1013		}
1014	    }
1015	  break;
1016	case BIT_XOR_EXPR:
1017	case PLUS_EXPR:
1018	case MINUS_EXPR:
1019	  if (integer_zerop (oelast->op)
1020	      || (FLOAT_TYPE_P (type)
1021		  && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
1022		  && fold_real_zero_addition_p (type, oelast->op,
1023						opcode == MINUS_EXPR)))
1024	    {
1025	      if (ops->length () != 1)
1026		{
1027		  if (dump_file && (dump_flags & TDF_DETAILS))
1028		    fprintf (dump_file, "Found [|^+] 0, removing\n");
1029		  ops->pop ();
1030		  reassociate_stats.ops_eliminated++;
1031		  return;
1032		}
1033	    }
1034	  break;
1035	default:
1036	  break;
1037	}
1038    }
1039}
1040
1041
1042static void linearize_expr_tree (vec<operand_entry_t> *, gimple,
1043				 bool, bool);
1044
1045/* Structure for tracking and counting operands.  */
1046typedef struct oecount_s {
1047  int cnt;
1048  int id;
1049  enum tree_code oecode;
1050  tree op;
1051} oecount;
1052
1053
1054/* The heap for the oecount hashtable and the sorted list of operands.  */
1055static vec<oecount> cvec;
1056
1057
1058/* Oecount hashtable helpers.  */
1059
1060struct oecount_hasher
1061{
1062  typedef int value_type;
1063  typedef int compare_type;
1064  typedef int store_values_directly;
1065  static inline hashval_t hash (const value_type &);
1066  static inline bool equal (const value_type &, const compare_type &);
1067  static bool is_deleted (int &v) { return v == 1; }
1068  static void mark_deleted (int &e) { e = 1; }
1069  static bool is_empty (int &v) { return v == 0; }
1070  static void mark_empty (int &e) { e = 0; }
1071  static void remove (int &) {}
1072};
1073
1074/* Hash function for oecount.  */
1075
1076inline hashval_t
1077oecount_hasher::hash (const value_type &p)
1078{
1079  const oecount *c = &cvec[p - 42];
1080  return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
1081}
1082
1083/* Comparison function for oecount.  */
1084
1085inline bool
1086oecount_hasher::equal (const value_type &p1, const compare_type &p2)
1087{
1088  const oecount *c1 = &cvec[p1 - 42];
1089  const oecount *c2 = &cvec[p2 - 42];
1090  return (c1->oecode == c2->oecode
1091	  && c1->op == c2->op);
1092}
1093
1094/* Comparison function for qsort sorting oecount elements by count.  */
1095
1096static int
1097oecount_cmp (const void *p1, const void *p2)
1098{
1099  const oecount *c1 = (const oecount *)p1;
1100  const oecount *c2 = (const oecount *)p2;
1101  if (c1->cnt != c2->cnt)
1102    return c1->cnt - c2->cnt;
1103  else
1104    /* If counts are identical, use unique IDs to stabilize qsort.  */
1105    return c1->id - c2->id;
1106}
1107
1108/* Return TRUE iff STMT represents a builtin call that raises OP
1109   to some exponent.  */
1110
1111static bool
1112stmt_is_power_of_op (gimple stmt, tree op)
1113{
1114  tree fndecl;
1115
1116  if (!is_gimple_call (stmt))
1117    return false;
1118
1119  fndecl = gimple_call_fndecl (stmt);
1120
1121  if (!fndecl
1122      || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
1123    return false;
1124
1125  switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1126    {
1127    CASE_FLT_FN (BUILT_IN_POW):
1128    CASE_FLT_FN (BUILT_IN_POWI):
1129      return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1130
1131    default:
1132      return false;
1133    }
1134}
1135
1136/* Given STMT which is a __builtin_pow* call, decrement its exponent
1137   in place and return the result.  Assumes that stmt_is_power_of_op
1138   was previously called for STMT and returned TRUE.  */
1139
1140static HOST_WIDE_INT
1141decrement_power (gimple stmt)
1142{
1143  REAL_VALUE_TYPE c, cint;
1144  HOST_WIDE_INT power;
1145  tree arg1;
1146
1147  switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1148    {
1149    CASE_FLT_FN (BUILT_IN_POW):
1150      arg1 = gimple_call_arg (stmt, 1);
1151      c = TREE_REAL_CST (arg1);
1152      power = real_to_integer (&c) - 1;
1153      real_from_integer (&cint, VOIDmode, power, SIGNED);
1154      gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1155      return power;
1156
1157    CASE_FLT_FN (BUILT_IN_POWI):
1158      arg1 = gimple_call_arg (stmt, 1);
1159      power = TREE_INT_CST_LOW (arg1) - 1;
1160      gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1161      return power;
1162
1163    default:
1164      gcc_unreachable ();
1165    }
1166}
1167
1168/* Find the single immediate use of STMT's LHS, and replace it
1169   with OP.  Remove STMT.  If STMT's LHS is the same as *DEF,
1170   replace *DEF with OP as well.  */
1171
1172static void
1173propagate_op_to_single_use (tree op, gimple stmt, tree *def)
1174{
1175  tree lhs;
1176  gimple use_stmt;
1177  use_operand_p use;
1178  gimple_stmt_iterator gsi;
1179
1180  if (is_gimple_call (stmt))
1181    lhs = gimple_call_lhs (stmt);
1182  else
1183    lhs = gimple_assign_lhs (stmt);
1184
1185  gcc_assert (has_single_use (lhs));
1186  single_imm_use (lhs, &use, &use_stmt);
1187  if (lhs == *def)
1188    *def = op;
1189  SET_USE (use, op);
1190  if (TREE_CODE (op) != SSA_NAME)
1191    update_stmt (use_stmt);
1192  gsi = gsi_for_stmt (stmt);
1193  unlink_stmt_vdef (stmt);
1194  reassoc_remove_stmt (&gsi);
1195  release_defs (stmt);
1196}
1197
1198/* Walks the linear chain with result *DEF searching for an operation
1199   with operand OP and code OPCODE removing that from the chain.  *DEF
1200   is updated if there is only one operand but no operation left.  */
1201
1202static void
1203zero_one_operation (tree *def, enum tree_code opcode, tree op)
1204{
1205  gimple stmt = SSA_NAME_DEF_STMT (*def);
1206
1207  do
1208    {
1209      tree name;
1210
1211      if (opcode == MULT_EXPR
1212	  && stmt_is_power_of_op (stmt, op))
1213	{
1214	  if (decrement_power (stmt) == 1)
1215	    propagate_op_to_single_use (op, stmt, def);
1216	  return;
1217	}
1218
1219      name = gimple_assign_rhs1 (stmt);
1220
1221      /* If this is the operation we look for and one of the operands
1222         is ours simply propagate the other operand into the stmts
1223	 single use.  */
1224      if (gimple_assign_rhs_code (stmt) == opcode
1225	  && (name == op
1226	      || gimple_assign_rhs2 (stmt) == op))
1227	{
1228	  if (name == op)
1229	    name = gimple_assign_rhs2 (stmt);
1230	  propagate_op_to_single_use (name, stmt, def);
1231	  return;
1232	}
1233
1234      /* We might have a multiply of two __builtin_pow* calls, and
1235	 the operand might be hiding in the rightmost one.  */
1236      if (opcode == MULT_EXPR
1237	  && gimple_assign_rhs_code (stmt) == opcode
1238	  && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME
1239	  && has_single_use (gimple_assign_rhs2 (stmt)))
1240	{
1241	  gimple stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1242	  if (stmt_is_power_of_op (stmt2, op))
1243	    {
1244	      if (decrement_power (stmt2) == 1)
1245		propagate_op_to_single_use (op, stmt2, def);
1246	      return;
1247	    }
1248	}
1249
1250      /* Continue walking the chain.  */
1251      gcc_assert (name != op
1252		  && TREE_CODE (name) == SSA_NAME);
1253      stmt = SSA_NAME_DEF_STMT (name);
1254    }
1255  while (1);
1256}
1257
1258/* Returns true if statement S1 dominates statement S2.  Like
1259   stmt_dominates_stmt_p, but uses stmt UIDs to optimize.  */
1260
1261static bool
1262reassoc_stmt_dominates_stmt_p (gimple s1, gimple s2)
1263{
1264  basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1265
1266  /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1267     SSA_NAME.  Assume it lives at the beginning of function and
1268     thus dominates everything.  */
1269  if (!bb1 || s1 == s2)
1270    return true;
1271
1272  /* If bb2 is NULL, it doesn't dominate any stmt with a bb.  */
1273  if (!bb2)
1274    return false;
1275
1276  if (bb1 == bb2)
1277    {
1278      /* PHIs in the same basic block are assumed to be
1279	 executed all in parallel, if only one stmt is a PHI,
1280	 it dominates the other stmt in the same basic block.  */
1281      if (gimple_code (s1) == GIMPLE_PHI)
1282	return true;
1283
1284      if (gimple_code (s2) == GIMPLE_PHI)
1285	return false;
1286
1287      gcc_assert (gimple_uid (s1) && gimple_uid (s2));
1288
1289      if (gimple_uid (s1) < gimple_uid (s2))
1290	return true;
1291
1292      if (gimple_uid (s1) > gimple_uid (s2))
1293	return false;
1294
1295      gimple_stmt_iterator gsi = gsi_for_stmt (s1);
1296      unsigned int uid = gimple_uid (s1);
1297      for (gsi_next (&gsi); !gsi_end_p (gsi); gsi_next (&gsi))
1298	{
1299	  gimple s = gsi_stmt (gsi);
1300	  if (gimple_uid (s) != uid)
1301	    break;
1302	  if (s == s2)
1303	    return true;
1304	}
1305
1306      return false;
1307    }
1308
1309  return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
1310}
1311
1312/* Insert STMT after INSERT_POINT.  */
1313
1314static void
1315insert_stmt_after (gimple stmt, gimple insert_point)
1316{
1317  gimple_stmt_iterator gsi;
1318  basic_block bb;
1319
1320  if (gimple_code (insert_point) == GIMPLE_PHI)
1321    bb = gimple_bb (insert_point);
1322  else if (!stmt_ends_bb_p (insert_point))
1323    {
1324      gsi = gsi_for_stmt (insert_point);
1325      gimple_set_uid (stmt, gimple_uid (insert_point));
1326      gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1327      return;
1328    }
1329  else
1330    /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1331       thus if it must end a basic block, it should be a call that can
1332       throw, or some assignment that can throw.  If it throws, the LHS
1333       of it will not be initialized though, so only valid places using
1334       the SSA_NAME should be dominated by the fallthru edge.  */
1335    bb = find_fallthru_edge (gimple_bb (insert_point)->succs)->dest;
1336  gsi = gsi_after_labels (bb);
1337  if (gsi_end_p (gsi))
1338    {
1339      gimple_stmt_iterator gsi2 = gsi_last_bb (bb);
1340      gimple_set_uid (stmt,
1341		      gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1342    }
1343  else
1344    gimple_set_uid (stmt, gimple_uid (gsi_stmt (gsi)));
1345  gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1346}
1347
1348/* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1349   the result.  Places the statement after the definition of either
1350   OP1 or OP2.  Returns the new statement.  */
1351
1352static gimple
1353build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1354{
1355  gimple op1def = NULL, op2def = NULL;
1356  gimple_stmt_iterator gsi;
1357  tree op;
1358  gassign *sum;
1359
1360  /* Create the addition statement.  */
1361  op = make_ssa_name (type);
1362  sum = gimple_build_assign (op, opcode, op1, op2);
1363
1364  /* Find an insertion place and insert.  */
1365  if (TREE_CODE (op1) == SSA_NAME)
1366    op1def = SSA_NAME_DEF_STMT (op1);
1367  if (TREE_CODE (op2) == SSA_NAME)
1368    op2def = SSA_NAME_DEF_STMT (op2);
1369  if ((!op1def || gimple_nop_p (op1def))
1370      && (!op2def || gimple_nop_p (op2def)))
1371    {
1372      gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1373      if (gsi_end_p (gsi))
1374	{
1375	  gimple_stmt_iterator gsi2
1376	    = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1377	  gimple_set_uid (sum,
1378			  gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1379	}
1380      else
1381	gimple_set_uid (sum, gimple_uid (gsi_stmt (gsi)));
1382      gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1383    }
1384  else
1385    {
1386      gimple insert_point;
1387      if ((!op1def || gimple_nop_p (op1def))
1388	   || (op2def && !gimple_nop_p (op2def)
1389	       && reassoc_stmt_dominates_stmt_p (op1def, op2def)))
1390	insert_point = op2def;
1391      else
1392	insert_point = op1def;
1393      insert_stmt_after (sum, insert_point);
1394    }
1395  update_stmt (sum);
1396
1397  return sum;
1398}
1399
1400/* Perform un-distribution of divisions and multiplications.
1401   A * X + B * X is transformed into (A + B) * X and A / X + B / X
1402   to (A + B) / X for real X.
1403
1404   The algorithm is organized as follows.
1405
1406    - First we walk the addition chain *OPS looking for summands that
1407      are defined by a multiplication or a real division.  This results
1408      in the candidates bitmap with relevant indices into *OPS.
1409
1410    - Second we build the chains of multiplications or divisions for
1411      these candidates, counting the number of occurrences of (operand, code)
1412      pairs in all of the candidates chains.
1413
1414    - Third we sort the (operand, code) pairs by number of occurrence and
1415      process them starting with the pair with the most uses.
1416
1417      * For each such pair we walk the candidates again to build a
1418        second candidate bitmap noting all multiplication/division chains
1419	that have at least one occurrence of (operand, code).
1420
1421      * We build an alternate addition chain only covering these
1422        candidates with one (operand, code) operation removed from their
1423	multiplication/division chain.
1424
1425      * The first candidate gets replaced by the alternate addition chain
1426        multiplied/divided by the operand.
1427
1428      * All candidate chains get disabled for further processing and
1429        processing of (operand, code) pairs continues.
1430
1431  The alternate addition chains built are re-processed by the main
1432  reassociation algorithm which allows optimizing a * x * y + b * y * x
1433  to (a + b ) * x * y in one invocation of the reassociation pass.  */
1434
1435static bool
1436undistribute_ops_list (enum tree_code opcode,
1437		       vec<operand_entry_t> *ops, struct loop *loop)
1438{
1439  unsigned int length = ops->length ();
1440  operand_entry_t oe1;
1441  unsigned i, j;
1442  sbitmap candidates, candidates2;
1443  unsigned nr_candidates, nr_candidates2;
1444  sbitmap_iterator sbi0;
1445  vec<operand_entry_t> *subops;
1446  bool changed = false;
1447  int next_oecount_id = 0;
1448
1449  if (length <= 1
1450      || opcode != PLUS_EXPR)
1451    return false;
1452
1453  /* Build a list of candidates to process.  */
1454  candidates = sbitmap_alloc (length);
1455  bitmap_clear (candidates);
1456  nr_candidates = 0;
1457  FOR_EACH_VEC_ELT (*ops, i, oe1)
1458    {
1459      enum tree_code dcode;
1460      gimple oe1def;
1461
1462      if (TREE_CODE (oe1->op) != SSA_NAME)
1463	continue;
1464      oe1def = SSA_NAME_DEF_STMT (oe1->op);
1465      if (!is_gimple_assign (oe1def))
1466	continue;
1467      dcode = gimple_assign_rhs_code (oe1def);
1468      if ((dcode != MULT_EXPR
1469	   && dcode != RDIV_EXPR)
1470	  || !is_reassociable_op (oe1def, dcode, loop))
1471	continue;
1472
1473      bitmap_set_bit (candidates, i);
1474      nr_candidates++;
1475    }
1476
1477  if (nr_candidates < 2)
1478    {
1479      sbitmap_free (candidates);
1480      return false;
1481    }
1482
1483  if (dump_file && (dump_flags & TDF_DETAILS))
1484    {
1485      fprintf (dump_file, "searching for un-distribute opportunities ");
1486      print_generic_expr (dump_file,
1487	(*ops)[bitmap_first_set_bit (candidates)]->op, 0);
1488      fprintf (dump_file, " %d\n", nr_candidates);
1489    }
1490
1491  /* Build linearized sub-operand lists and the counting table.  */
1492  cvec.create (0);
1493
1494  hash_table<oecount_hasher> ctable (15);
1495
1496  /* ??? Macro arguments cannot have multi-argument template types in
1497     them.  This typedef is needed to workaround that limitation.  */
1498  typedef vec<operand_entry_t> vec_operand_entry_t_heap;
1499  subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
1500  EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1501    {
1502      gimple oedef;
1503      enum tree_code oecode;
1504      unsigned j;
1505
1506      oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
1507      oecode = gimple_assign_rhs_code (oedef);
1508      linearize_expr_tree (&subops[i], oedef,
1509			   associative_tree_code (oecode), false);
1510
1511      FOR_EACH_VEC_ELT (subops[i], j, oe1)
1512	{
1513	  oecount c;
1514	  int *slot;
1515	  int idx;
1516	  c.oecode = oecode;
1517	  c.cnt = 1;
1518	  c.id = next_oecount_id++;
1519	  c.op = oe1->op;
1520	  cvec.safe_push (c);
1521	  idx = cvec.length () + 41;
1522	  slot = ctable.find_slot (idx, INSERT);
1523	  if (!*slot)
1524	    {
1525	      *slot = idx;
1526	    }
1527	  else
1528	    {
1529	      cvec.pop ();
1530	      cvec[*slot - 42].cnt++;
1531	    }
1532	}
1533    }
1534
1535  /* Sort the counting table.  */
1536  cvec.qsort (oecount_cmp);
1537
1538  if (dump_file && (dump_flags & TDF_DETAILS))
1539    {
1540      oecount *c;
1541      fprintf (dump_file, "Candidates:\n");
1542      FOR_EACH_VEC_ELT (cvec, j, c)
1543	{
1544	  fprintf (dump_file, "  %u %s: ", c->cnt,
1545		   c->oecode == MULT_EXPR
1546		   ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1547	  print_generic_expr (dump_file, c->op, 0);
1548	  fprintf (dump_file, "\n");
1549	}
1550    }
1551
1552  /* Process the (operand, code) pairs in order of most occurrence.  */
1553  candidates2 = sbitmap_alloc (length);
1554  while (!cvec.is_empty ())
1555    {
1556      oecount *c = &cvec.last ();
1557      if (c->cnt < 2)
1558	break;
1559
1560      /* Now collect the operands in the outer chain that contain
1561         the common operand in their inner chain.  */
1562      bitmap_clear (candidates2);
1563      nr_candidates2 = 0;
1564      EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1565	{
1566	  gimple oedef;
1567	  enum tree_code oecode;
1568	  unsigned j;
1569	  tree op = (*ops)[i]->op;
1570
1571	  /* If we undistributed in this chain already this may be
1572	     a constant.  */
1573	  if (TREE_CODE (op) != SSA_NAME)
1574	    continue;
1575
1576	  oedef = SSA_NAME_DEF_STMT (op);
1577	  oecode = gimple_assign_rhs_code (oedef);
1578	  if (oecode != c->oecode)
1579	    continue;
1580
1581	  FOR_EACH_VEC_ELT (subops[i], j, oe1)
1582	    {
1583	      if (oe1->op == c->op)
1584		{
1585		  bitmap_set_bit (candidates2, i);
1586		  ++nr_candidates2;
1587		  break;
1588		}
1589	    }
1590	}
1591
1592      if (nr_candidates2 >= 2)
1593	{
1594	  operand_entry_t oe1, oe2;
1595	  gimple prod;
1596	  int first = bitmap_first_set_bit (candidates2);
1597
1598	  /* Build the new addition chain.  */
1599	  oe1 = (*ops)[first];
1600	  if (dump_file && (dump_flags & TDF_DETAILS))
1601	    {
1602	      fprintf (dump_file, "Building (");
1603	      print_generic_expr (dump_file, oe1->op, 0);
1604	    }
1605	  zero_one_operation (&oe1->op, c->oecode, c->op);
1606	  EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
1607	    {
1608	      gimple sum;
1609	      oe2 = (*ops)[i];
1610	      if (dump_file && (dump_flags & TDF_DETAILS))
1611		{
1612		  fprintf (dump_file, " + ");
1613		  print_generic_expr (dump_file, oe2->op, 0);
1614		}
1615	      zero_one_operation (&oe2->op, c->oecode, c->op);
1616	      sum = build_and_add_sum (TREE_TYPE (oe1->op),
1617				       oe1->op, oe2->op, opcode);
1618	      oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1619	      oe2->rank = 0;
1620	      oe1->op = gimple_get_lhs (sum);
1621	    }
1622
1623	  /* Apply the multiplication/division.  */
1624	  prod = build_and_add_sum (TREE_TYPE (oe1->op),
1625				    oe1->op, c->op, c->oecode);
1626	  if (dump_file && (dump_flags & TDF_DETAILS))
1627	    {
1628	      fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1629	      print_generic_expr (dump_file, c->op, 0);
1630	      fprintf (dump_file, "\n");
1631	    }
1632
1633	  /* Record it in the addition chain and disable further
1634	     undistribution with this op.  */
1635	  oe1->op = gimple_assign_lhs (prod);
1636	  oe1->rank = get_rank (oe1->op);
1637	  subops[first].release ();
1638
1639	  changed = true;
1640	}
1641
1642      cvec.pop ();
1643    }
1644
1645  for (i = 0; i < ops->length (); ++i)
1646    subops[i].release ();
1647  free (subops);
1648  cvec.release ();
1649  sbitmap_free (candidates);
1650  sbitmap_free (candidates2);
1651
1652  return changed;
1653}
1654
1655/* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1656   expression, examine the other OPS to see if any of them are comparisons
1657   of the same values, which we may be able to combine or eliminate.
1658   For example, we can rewrite (a < b) | (a == b) as (a <= b).  */
1659
1660static bool
1661eliminate_redundant_comparison (enum tree_code opcode,
1662				vec<operand_entry_t> *ops,
1663				unsigned int currindex,
1664				operand_entry_t curr)
1665{
1666  tree op1, op2;
1667  enum tree_code lcode, rcode;
1668  gimple def1, def2;
1669  int i;
1670  operand_entry_t oe;
1671
1672  if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
1673    return false;
1674
1675  /* Check that CURR is a comparison.  */
1676  if (TREE_CODE (curr->op) != SSA_NAME)
1677    return false;
1678  def1 = SSA_NAME_DEF_STMT (curr->op);
1679  if (!is_gimple_assign (def1))
1680    return false;
1681  lcode = gimple_assign_rhs_code (def1);
1682  if (TREE_CODE_CLASS (lcode) != tcc_comparison)
1683    return false;
1684  op1 = gimple_assign_rhs1 (def1);
1685  op2 = gimple_assign_rhs2 (def1);
1686
1687  /* Now look for a similar comparison in the remaining OPS.  */
1688  for (i = currindex + 1; ops->iterate (i, &oe); i++)
1689    {
1690      tree t;
1691
1692      if (TREE_CODE (oe->op) != SSA_NAME)
1693	continue;
1694      def2 = SSA_NAME_DEF_STMT (oe->op);
1695      if (!is_gimple_assign (def2))
1696	continue;
1697      rcode = gimple_assign_rhs_code (def2);
1698      if (TREE_CODE_CLASS (rcode) != tcc_comparison)
1699	continue;
1700
1701      /* If we got here, we have a match.  See if we can combine the
1702	 two comparisons.  */
1703      if (opcode == BIT_IOR_EXPR)
1704	t = maybe_fold_or_comparisons (lcode, op1, op2,
1705				       rcode, gimple_assign_rhs1 (def2),
1706				       gimple_assign_rhs2 (def2));
1707      else
1708	t = maybe_fold_and_comparisons (lcode, op1, op2,
1709					rcode, gimple_assign_rhs1 (def2),
1710					gimple_assign_rhs2 (def2));
1711      if (!t)
1712	continue;
1713
1714      /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1715	 always give us a boolean_type_node value back.  If the original
1716	 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1717	 we need to convert.  */
1718      if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
1719	t = fold_convert (TREE_TYPE (curr->op), t);
1720
1721      if (TREE_CODE (t) != INTEGER_CST
1722	  && !operand_equal_p (t, curr->op, 0))
1723	{
1724	  enum tree_code subcode;
1725	  tree newop1, newop2;
1726	  if (!COMPARISON_CLASS_P (t))
1727	    continue;
1728	  extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1729	  STRIP_USELESS_TYPE_CONVERSION (newop1);
1730	  STRIP_USELESS_TYPE_CONVERSION (newop2);
1731	  if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
1732	    continue;
1733	}
1734
1735      if (dump_file && (dump_flags & TDF_DETAILS))
1736	{
1737	  fprintf (dump_file, "Equivalence: ");
1738	  print_generic_expr (dump_file, curr->op, 0);
1739	  fprintf (dump_file, " %s ", op_symbol_code (opcode));
1740	  print_generic_expr (dump_file, oe->op, 0);
1741	  fprintf (dump_file, " -> ");
1742	  print_generic_expr (dump_file, t, 0);
1743	  fprintf (dump_file, "\n");
1744	}
1745
1746      /* Now we can delete oe, as it has been subsumed by the new combined
1747         expression t.  */
1748      ops->ordered_remove (i);
1749      reassociate_stats.ops_eliminated ++;
1750
1751      /* If t is the same as curr->op, we're done.  Otherwise we must
1752	 replace curr->op with t.  Special case is if we got a constant
1753	 back, in which case we add it to the end instead of in place of
1754	 the current entry.  */
1755      if (TREE_CODE (t) == INTEGER_CST)
1756	{
1757	  ops->ordered_remove (currindex);
1758	  add_to_ops_vec (ops, t);
1759	}
1760      else if (!operand_equal_p (t, curr->op, 0))
1761	{
1762	  gimple sum;
1763	  enum tree_code subcode;
1764	  tree newop1;
1765	  tree newop2;
1766	  gcc_assert (COMPARISON_CLASS_P (t));
1767	  extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1768	  STRIP_USELESS_TYPE_CONVERSION (newop1);
1769	  STRIP_USELESS_TYPE_CONVERSION (newop2);
1770	  gcc_checking_assert (is_gimple_val (newop1)
1771			       && is_gimple_val (newop2));
1772	  sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
1773	  curr->op = gimple_get_lhs (sum);
1774	}
1775      return true;
1776    }
1777
1778  return false;
1779}
1780
1781/* Perform various identities and other optimizations on the list of
1782   operand entries, stored in OPS.  The tree code for the binary
1783   operation between all the operands is OPCODE.  */
1784
1785static void
1786optimize_ops_list (enum tree_code opcode,
1787		   vec<operand_entry_t> *ops)
1788{
1789  unsigned int length = ops->length ();
1790  unsigned int i;
1791  operand_entry_t oe;
1792  operand_entry_t oelast = NULL;
1793  bool iterate = false;
1794
1795  if (length == 1)
1796    return;
1797
1798  oelast = ops->last ();
1799
1800  /* If the last two are constants, pop the constants off, merge them
1801     and try the next two.  */
1802  if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
1803    {
1804      operand_entry_t oelm1 = (*ops)[length - 2];
1805
1806      if (oelm1->rank == 0
1807	  && is_gimple_min_invariant (oelm1->op)
1808	  && useless_type_conversion_p (TREE_TYPE (oelm1->op),
1809				       TREE_TYPE (oelast->op)))
1810	{
1811	  tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
1812				     oelm1->op, oelast->op);
1813
1814	  if (folded && is_gimple_min_invariant (folded))
1815	    {
1816	      if (dump_file && (dump_flags & TDF_DETAILS))
1817		fprintf (dump_file, "Merging constants\n");
1818
1819	      ops->pop ();
1820	      ops->pop ();
1821
1822	      add_to_ops_vec (ops, folded);
1823	      reassociate_stats.constants_eliminated++;
1824
1825	      optimize_ops_list (opcode, ops);
1826	      return;
1827	    }
1828	}
1829    }
1830
1831  eliminate_using_constants (opcode, ops);
1832  oelast = NULL;
1833
1834  for (i = 0; ops->iterate (i, &oe);)
1835    {
1836      bool done = false;
1837
1838      if (eliminate_not_pairs (opcode, ops, i, oe))
1839	return;
1840      if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
1841	  || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
1842	  || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
1843	{
1844	  if (done)
1845	    return;
1846	  iterate = true;
1847	  oelast = NULL;
1848	  continue;
1849	}
1850      oelast = oe;
1851      i++;
1852    }
1853
1854  length = ops->length ();
1855  oelast = ops->last ();
1856
1857  if (iterate)
1858    optimize_ops_list (opcode, ops);
1859}
1860
1861/* The following functions are subroutines to optimize_range_tests and allow
1862   it to try to change a logical combination of comparisons into a range
1863   test.
1864
1865   For example, both
1866	X == 2 || X == 5 || X == 3 || X == 4
1867   and
1868	X >= 2 && X <= 5
1869   are converted to
1870	(unsigned) (X - 2) <= 3
1871
1872   For more information see comments above fold_test_range in fold-const.c,
1873   this implementation is for GIMPLE.  */
1874
1875struct range_entry
1876{
1877  tree exp;
1878  tree low;
1879  tree high;
1880  bool in_p;
1881  bool strict_overflow_p;
1882  unsigned int idx, next;
1883};
1884
1885/* This is similar to make_range in fold-const.c, but on top of
1886   GIMPLE instead of trees.  If EXP is non-NULL, it should be
1887   an SSA_NAME and STMT argument is ignored, otherwise STMT
1888   argument should be a GIMPLE_COND.  */
1889
1890static void
1891init_range_entry (struct range_entry *r, tree exp, gimple stmt)
1892{
1893  int in_p;
1894  tree low, high;
1895  bool is_bool, strict_overflow_p;
1896
1897  r->exp = NULL_TREE;
1898  r->in_p = false;
1899  r->strict_overflow_p = false;
1900  r->low = NULL_TREE;
1901  r->high = NULL_TREE;
1902  if (exp != NULL_TREE
1903      && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
1904    return;
1905
1906  /* Start with simply saying "EXP != 0" and then look at the code of EXP
1907     and see if we can refine the range.  Some of the cases below may not
1908     happen, but it doesn't seem worth worrying about this.  We "continue"
1909     the outer loop when we've changed something; otherwise we "break"
1910     the switch, which will "break" the while.  */
1911  low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
1912  high = low;
1913  in_p = 0;
1914  strict_overflow_p = false;
1915  is_bool = false;
1916  if (exp == NULL_TREE)
1917    is_bool = true;
1918  else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
1919    {
1920      if (TYPE_UNSIGNED (TREE_TYPE (exp)))
1921	is_bool = true;
1922      else
1923	return;
1924    }
1925  else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
1926    is_bool = true;
1927
1928  while (1)
1929    {
1930      enum tree_code code;
1931      tree arg0, arg1, exp_type;
1932      tree nexp;
1933      location_t loc;
1934
1935      if (exp != NULL_TREE)
1936	{
1937	  if (TREE_CODE (exp) != SSA_NAME
1938	      || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp))
1939	    break;
1940
1941	  stmt = SSA_NAME_DEF_STMT (exp);
1942	  if (!is_gimple_assign (stmt))
1943	    break;
1944
1945	  code = gimple_assign_rhs_code (stmt);
1946	  arg0 = gimple_assign_rhs1 (stmt);
1947	  arg1 = gimple_assign_rhs2 (stmt);
1948	  exp_type = TREE_TYPE (exp);
1949	}
1950      else
1951	{
1952	  code = gimple_cond_code (stmt);
1953	  arg0 = gimple_cond_lhs (stmt);
1954	  arg1 = gimple_cond_rhs (stmt);
1955	  exp_type = boolean_type_node;
1956	}
1957
1958      if (TREE_CODE (arg0) != SSA_NAME)
1959	break;
1960      loc = gimple_location (stmt);
1961      switch (code)
1962	{
1963	case BIT_NOT_EXPR:
1964	  if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE
1965	      /* Ensure the range is either +[-,0], +[0,0],
1966		 -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
1967		 -[1,1].  If it is e.g. +[-,-] or -[-,-]
1968		 or similar expression of unconditional true or
1969		 false, it should not be negated.  */
1970	      && ((high && integer_zerop (high))
1971		  || (low && integer_onep (low))))
1972	    {
1973	      in_p = !in_p;
1974	      exp = arg0;
1975	      continue;
1976	    }
1977	  break;
1978	case SSA_NAME:
1979	  exp = arg0;
1980	  continue;
1981	CASE_CONVERT:
1982	  if (is_bool)
1983	    goto do_default;
1984	  if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
1985	    {
1986	      if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
1987		is_bool = true;
1988	      else
1989		return;
1990	    }
1991	  else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
1992	    is_bool = true;
1993	  goto do_default;
1994	case EQ_EXPR:
1995	case NE_EXPR:
1996	case LT_EXPR:
1997	case LE_EXPR:
1998	case GE_EXPR:
1999	case GT_EXPR:
2000	  is_bool = true;
2001	  /* FALLTHRU */
2002	default:
2003	  if (!is_bool)
2004	    return;
2005	do_default:
2006	  nexp = make_range_step (loc, code, arg0, arg1, exp_type,
2007				  &low, &high, &in_p,
2008				  &strict_overflow_p);
2009	  if (nexp != NULL_TREE)
2010	    {
2011	      exp = nexp;
2012	      gcc_assert (TREE_CODE (exp) == SSA_NAME);
2013	      continue;
2014	    }
2015	  break;
2016	}
2017      break;
2018    }
2019  if (is_bool)
2020    {
2021      r->exp = exp;
2022      r->in_p = in_p;
2023      r->low = low;
2024      r->high = high;
2025      r->strict_overflow_p = strict_overflow_p;
2026    }
2027}
2028
2029/* Comparison function for qsort.  Sort entries
2030   without SSA_NAME exp first, then with SSA_NAMEs sorted
2031   by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
2032   by increasing ->low and if ->low is the same, by increasing
2033   ->high.  ->low == NULL_TREE means minimum, ->high == NULL_TREE
2034   maximum.  */
2035
2036static int
2037range_entry_cmp (const void *a, const void *b)
2038{
2039  const struct range_entry *p = (const struct range_entry *) a;
2040  const struct range_entry *q = (const struct range_entry *) b;
2041
2042  if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
2043    {
2044      if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2045	{
2046	  /* Group range_entries for the same SSA_NAME together.  */
2047	  if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
2048	    return -1;
2049	  else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
2050	    return 1;
2051	  /* If ->low is different, NULL low goes first, then by
2052	     ascending low.  */
2053	  if (p->low != NULL_TREE)
2054	    {
2055	      if (q->low != NULL_TREE)
2056		{
2057		  tree tem = fold_binary (LT_EXPR, boolean_type_node,
2058					  p->low, q->low);
2059		  if (tem && integer_onep (tem))
2060		    return -1;
2061		  tem = fold_binary (GT_EXPR, boolean_type_node,
2062				     p->low, q->low);
2063		  if (tem && integer_onep (tem))
2064		    return 1;
2065		}
2066	      else
2067		return 1;
2068	    }
2069	  else if (q->low != NULL_TREE)
2070	    return -1;
2071	  /* If ->high is different, NULL high goes last, before that by
2072	     ascending high.  */
2073	  if (p->high != NULL_TREE)
2074	    {
2075	      if (q->high != NULL_TREE)
2076		{
2077		  tree tem = fold_binary (LT_EXPR, boolean_type_node,
2078					  p->high, q->high);
2079		  if (tem && integer_onep (tem))
2080		    return -1;
2081		  tem = fold_binary (GT_EXPR, boolean_type_node,
2082				     p->high, q->high);
2083		  if (tem && integer_onep (tem))
2084		    return 1;
2085		}
2086	      else
2087		return -1;
2088	    }
2089	  else if (q->high != NULL_TREE)
2090	    return 1;
2091	  /* If both ranges are the same, sort below by ascending idx.  */
2092	}
2093      else
2094	return 1;
2095    }
2096  else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2097    return -1;
2098
2099  if (p->idx < q->idx)
2100    return -1;
2101  else
2102    {
2103      gcc_checking_assert (p->idx > q->idx);
2104      return 1;
2105    }
2106}
2107
2108/* Helper routine of optimize_range_test.
2109   [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2110   RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2111   OPCODE and OPS are arguments of optimize_range_tests.  If OTHERRANGE
2112   is NULL, OTHERRANGEP should not be and then OTHERRANGEP points to
2113   an array of COUNT pointers to other ranges.  Return
2114   true if the range merge has been successful.
2115   If OPCODE is ERROR_MARK, this is called from within
2116   maybe_optimize_range_tests and is performing inter-bb range optimization.
2117   In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2118   oe->rank.  */
2119
2120static bool
2121update_range_test (struct range_entry *range, struct range_entry *otherrange,
2122		   struct range_entry **otherrangep,
2123		   unsigned int count, enum tree_code opcode,
2124		   vec<operand_entry_t> *ops, tree exp, gimple_seq seq,
2125		   bool in_p, tree low, tree high, bool strict_overflow_p)
2126{
2127  operand_entry_t oe = (*ops)[range->idx];
2128  tree op = oe->op;
2129  gimple stmt = op ? SSA_NAME_DEF_STMT (op) :
2130    last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2131  location_t loc = gimple_location (stmt);
2132  tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2133  tree tem = build_range_check (loc, optype, unshare_expr (exp),
2134				in_p, low, high);
2135  enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
2136  gimple_stmt_iterator gsi;
2137  unsigned int i, uid;
2138
2139  if (tem == NULL_TREE)
2140    return false;
2141
2142  /* If op is default def SSA_NAME, there is no place to insert the
2143     new comparison.  Give up, unless we can use OP itself as the
2144     range test.  */
2145  if (op && SSA_NAME_IS_DEFAULT_DEF (op))
2146    {
2147      if (op == range->exp
2148	  && ((TYPE_PRECISION (optype) == 1 && TYPE_UNSIGNED (optype))
2149	      || TREE_CODE (optype) == BOOLEAN_TYPE)
2150	  && (op == tem
2151	      || (TREE_CODE (tem) == EQ_EXPR
2152		  && TREE_OPERAND (tem, 0) == op
2153		  && integer_onep (TREE_OPERAND (tem, 1))))
2154	  && opcode != BIT_IOR_EXPR
2155	  && (opcode != ERROR_MARK || oe->rank != BIT_IOR_EXPR))
2156	{
2157	  stmt = NULL;
2158	  tem = op;
2159	}
2160      else
2161	return false;
2162    }
2163
2164  if (strict_overflow_p && issue_strict_overflow_warning (wc))
2165    warning_at (loc, OPT_Wstrict_overflow,
2166		"assuming signed overflow does not occur "
2167		"when simplifying range test");
2168
2169  if (dump_file && (dump_flags & TDF_DETAILS))
2170    {
2171      struct range_entry *r;
2172      fprintf (dump_file, "Optimizing range tests ");
2173      print_generic_expr (dump_file, range->exp, 0);
2174      fprintf (dump_file, " %c[", range->in_p ? '+' : '-');
2175      print_generic_expr (dump_file, range->low, 0);
2176      fprintf (dump_file, ", ");
2177      print_generic_expr (dump_file, range->high, 0);
2178      fprintf (dump_file, "]");
2179      for (i = 0; i < count; i++)
2180	{
2181	  if (otherrange)
2182	    r = otherrange + i;
2183	  else
2184	    r = otherrangep[i];
2185	  fprintf (dump_file, " and %c[", r->in_p ? '+' : '-');
2186	  print_generic_expr (dump_file, r->low, 0);
2187	  fprintf (dump_file, ", ");
2188	  print_generic_expr (dump_file, r->high, 0);
2189	  fprintf (dump_file, "]");
2190	}
2191      fprintf (dump_file, "\n into ");
2192      print_generic_expr (dump_file, tem, 0);
2193      fprintf (dump_file, "\n");
2194    }
2195
2196  if (opcode == BIT_IOR_EXPR
2197      || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2198    tem = invert_truthvalue_loc (loc, tem);
2199
2200  tem = fold_convert_loc (loc, optype, tem);
2201  if (stmt)
2202    {
2203      gsi = gsi_for_stmt (stmt);
2204      uid = gimple_uid (stmt);
2205    }
2206  else
2207    {
2208      gsi = gsi_none ();
2209      uid = 0;
2210    }
2211  if (stmt == NULL)
2212    gcc_checking_assert (tem == op);
2213  /* In rare cases range->exp can be equal to lhs of stmt.
2214     In that case we have to insert after the stmt rather then before
2215     it.  If stmt is a PHI, insert it at the start of the basic block.  */
2216  else if (op != range->exp)
2217    {
2218      gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2219      tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
2220				      GSI_SAME_STMT);
2221      gsi_prev (&gsi);
2222    }
2223  else if (gimple_code (stmt) != GIMPLE_PHI)
2224    {
2225      gsi_insert_seq_after (&gsi, seq, GSI_CONTINUE_LINKING);
2226      tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, false,
2227				      GSI_CONTINUE_LINKING);
2228    }
2229  else
2230    {
2231      gsi = gsi_after_labels (gimple_bb (stmt));
2232      if (!gsi_end_p (gsi))
2233	uid = gimple_uid (gsi_stmt (gsi));
2234      else
2235	{
2236	  gsi = gsi_start_bb (gimple_bb (stmt));
2237	  uid = 1;
2238	  while (!gsi_end_p (gsi))
2239	    {
2240	      uid = gimple_uid (gsi_stmt (gsi));
2241	      gsi_next (&gsi);
2242	    }
2243	}
2244      gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2245      tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
2246				      GSI_SAME_STMT);
2247      if (gsi_end_p (gsi))
2248	gsi = gsi_last_bb (gimple_bb (stmt));
2249      else
2250	gsi_prev (&gsi);
2251    }
2252  for (; !gsi_end_p (gsi); gsi_prev (&gsi))
2253    if (gimple_uid (gsi_stmt (gsi)))
2254      break;
2255    else
2256      gimple_set_uid (gsi_stmt (gsi), uid);
2257
2258  oe->op = tem;
2259  range->exp = exp;
2260  range->low = low;
2261  range->high = high;
2262  range->in_p = in_p;
2263  range->strict_overflow_p = false;
2264
2265  for (i = 0; i < count; i++)
2266    {
2267      if (otherrange)
2268	range = otherrange + i;
2269      else
2270	range = otherrangep[i];
2271      oe = (*ops)[range->idx];
2272      /* Now change all the other range test immediate uses, so that
2273	 those tests will be optimized away.  */
2274      if (opcode == ERROR_MARK)
2275	{
2276	  if (oe->op)
2277	    oe->op = build_int_cst (TREE_TYPE (oe->op),
2278				    oe->rank == BIT_IOR_EXPR ? 0 : 1);
2279	  else
2280	    oe->op = (oe->rank == BIT_IOR_EXPR
2281		      ? boolean_false_node : boolean_true_node);
2282	}
2283      else
2284	oe->op = error_mark_node;
2285      range->exp = NULL_TREE;
2286    }
2287  return true;
2288}
2289
2290/* Optimize X == CST1 || X == CST2
2291   if popcount (CST1 ^ CST2) == 1 into
2292   (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2293   Similarly for ranges.  E.g.
2294   X != 2 && X != 3 && X != 10 && X != 11
2295   will be transformed by the previous optimization into
2296   !((X - 2U) <= 1U || (X - 10U) <= 1U)
2297   and this loop can transform that into
2298   !(((X & ~8) - 2U) <= 1U).  */
2299
2300static bool
2301optimize_range_tests_xor (enum tree_code opcode, tree type,
2302			  tree lowi, tree lowj, tree highi, tree highj,
2303			  vec<operand_entry_t> *ops,
2304			  struct range_entry *rangei,
2305			  struct range_entry *rangej)
2306{
2307  tree lowxor, highxor, tem, exp;
2308  /* Check lowi ^ lowj == highi ^ highj and
2309     popcount (lowi ^ lowj) == 1.  */
2310  lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
2311  if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
2312    return false;
2313  if (!integer_pow2p (lowxor))
2314    return false;
2315  highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
2316  if (!tree_int_cst_equal (lowxor, highxor))
2317    return false;
2318
2319  tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
2320  exp = fold_build2 (BIT_AND_EXPR, type, rangei->exp, tem);
2321  lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
2322  highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
2323  if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, exp,
2324			 NULL, rangei->in_p, lowj, highj,
2325			 rangei->strict_overflow_p
2326			 || rangej->strict_overflow_p))
2327    return true;
2328  return false;
2329}
2330
2331/* Optimize X == CST1 || X == CST2
2332   if popcount (CST2 - CST1) == 1 into
2333   ((X - CST1) & ~(CST2 - CST1)) == 0.
2334   Similarly for ranges.  E.g.
2335   X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
2336   || X == 75 || X == 45
2337   will be transformed by the previous optimization into
2338   (X - 43U) <= 3U || (X - 75U) <= 3U
2339   and this loop can transform that into
2340   ((X - 43U) & ~(75U - 43U)) <= 3U.  */
2341static bool
2342optimize_range_tests_diff (enum tree_code opcode, tree type,
2343			    tree lowi, tree lowj, tree highi, tree highj,
2344			    vec<operand_entry_t> *ops,
2345			    struct range_entry *rangei,
2346			    struct range_entry *rangej)
2347{
2348  tree tem1, tem2, mask;
2349  /* Check highi - lowi == highj - lowj.  */
2350  tem1 = fold_binary (MINUS_EXPR, type, highi, lowi);
2351  if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2352    return false;
2353  tem2 = fold_binary (MINUS_EXPR, type, highj, lowj);
2354  if (!tree_int_cst_equal (tem1, tem2))
2355    return false;
2356  /* Check popcount (lowj - lowi) == 1.  */
2357  tem1 = fold_binary (MINUS_EXPR, type, lowj, lowi);
2358  if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2359    return false;
2360  if (!integer_pow2p (tem1))
2361    return false;
2362
2363  type = unsigned_type_for (type);
2364  tem1 = fold_convert (type, tem1);
2365  tem2 = fold_convert (type, tem2);
2366  lowi = fold_convert (type, lowi);
2367  mask = fold_build1 (BIT_NOT_EXPR, type, tem1);
2368  tem1 = fold_binary (MINUS_EXPR, type,
2369		      fold_convert (type, rangei->exp), lowi);
2370  tem1 = fold_build2 (BIT_AND_EXPR, type, tem1, mask);
2371  lowj = build_int_cst (type, 0);
2372  if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, tem1,
2373			 NULL, rangei->in_p, lowj, tem2,
2374			 rangei->strict_overflow_p
2375			 || rangej->strict_overflow_p))
2376    return true;
2377  return false;
2378}
2379
2380/* It does some common checks for function optimize_range_tests_xor and
2381   optimize_range_tests_diff.
2382   If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
2383   Else it calls optimize_range_tests_diff.  */
2384
2385static bool
2386optimize_range_tests_1 (enum tree_code opcode, int first, int length,
2387			bool optimize_xor, vec<operand_entry_t> *ops,
2388			struct range_entry *ranges)
2389{
2390  int i, j;
2391  bool any_changes = false;
2392  for (i = first; i < length; i++)
2393    {
2394      tree lowi, highi, lowj, highj, type, tem;
2395
2396      if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2397	continue;
2398      type = TREE_TYPE (ranges[i].exp);
2399      if (!INTEGRAL_TYPE_P (type))
2400	continue;
2401      lowi = ranges[i].low;
2402      if (lowi == NULL_TREE)
2403	lowi = TYPE_MIN_VALUE (type);
2404      highi = ranges[i].high;
2405      if (highi == NULL_TREE)
2406	continue;
2407      for (j = i + 1; j < length && j < i + 64; j++)
2408	{
2409	  bool changes;
2410	  if (ranges[i].exp != ranges[j].exp || ranges[j].in_p)
2411	    continue;
2412	  lowj = ranges[j].low;
2413	  if (lowj == NULL_TREE)
2414	    continue;
2415	  highj = ranges[j].high;
2416	  if (highj == NULL_TREE)
2417	    highj = TYPE_MAX_VALUE (type);
2418	  /* Check lowj > highi.  */
2419	  tem = fold_binary (GT_EXPR, boolean_type_node,
2420			     lowj, highi);
2421	  if (tem == NULL_TREE || !integer_onep (tem))
2422	    continue;
2423	  if (optimize_xor)
2424	    changes = optimize_range_tests_xor (opcode, type, lowi, lowj,
2425						highi, highj, ops,
2426						ranges + i, ranges + j);
2427	  else
2428	    changes = optimize_range_tests_diff (opcode, type, lowi, lowj,
2429						 highi, highj, ops,
2430						 ranges + i, ranges + j);
2431	  if (changes)
2432	    {
2433	      any_changes = true;
2434	      break;
2435	    }
2436	}
2437    }
2438  return any_changes;
2439}
2440
2441/* Helper function of optimize_range_tests_to_bit_test.  Handle a single
2442   range, EXP, LOW, HIGH, compute bit mask of bits to test and return
2443   EXP on success, NULL otherwise.  */
2444
2445static tree
2446extract_bit_test_mask (tree exp, int prec, tree totallow, tree low, tree high,
2447		       wide_int *mask, tree *totallowp)
2448{
2449  tree tem = int_const_binop (MINUS_EXPR, high, low);
2450  if (tem == NULL_TREE
2451      || TREE_CODE (tem) != INTEGER_CST
2452      || TREE_OVERFLOW (tem)
2453      || tree_int_cst_sgn (tem) == -1
2454      || compare_tree_int (tem, prec) != -1)
2455    return NULL_TREE;
2456
2457  unsigned HOST_WIDE_INT max = tree_to_uhwi (tem) + 1;
2458  *mask = wi::shifted_mask (0, max, false, prec);
2459  if (TREE_CODE (exp) == BIT_AND_EXPR
2460      && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
2461    {
2462      widest_int msk = wi::to_widest (TREE_OPERAND (exp, 1));
2463      msk = wi::zext (~msk, TYPE_PRECISION (TREE_TYPE (exp)));
2464      if (wi::popcount (msk) == 1
2465	  && wi::ltu_p (msk, prec - max))
2466	{
2467	  *mask |= wi::shifted_mask (msk.to_uhwi (), max, false, prec);
2468	  max += msk.to_uhwi ();
2469	  exp = TREE_OPERAND (exp, 0);
2470	  if (integer_zerop (low)
2471	      && TREE_CODE (exp) == PLUS_EXPR
2472	      && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
2473	    {
2474	      tree ret = TREE_OPERAND (exp, 0);
2475	      STRIP_NOPS (ret);
2476	      widest_int bias
2477	        = wi::neg (wi::sext (wi::to_widest (TREE_OPERAND (exp, 1)),
2478				     TYPE_PRECISION (TREE_TYPE (low))));
2479	      tree tbias = wide_int_to_tree (TREE_TYPE (ret), bias);
2480	      if (totallowp)
2481		{
2482		  *totallowp = tbias;
2483		  return ret;
2484		}
2485	      else if (!tree_int_cst_lt (totallow, tbias))
2486		return NULL_TREE;
2487	      bias = wi::to_widest (tbias);
2488	      bias -= wi::to_widest (totallow);
2489	      if (wi::ges_p (bias, 0) && wi::lts_p (bias, prec - max))
2490		{
2491		  *mask = wi::lshift (*mask, bias);
2492		  return ret;
2493		}
2494	    }
2495	}
2496    }
2497  if (totallowp)
2498    return exp;
2499  if (!tree_int_cst_lt (totallow, low))
2500    return exp;
2501  tem = int_const_binop (MINUS_EXPR, low, totallow);
2502  if (tem == NULL_TREE
2503      || TREE_CODE (tem) != INTEGER_CST
2504      || TREE_OVERFLOW (tem)
2505      || compare_tree_int (tem, prec - max) == 1)
2506    return NULL_TREE;
2507
2508  *mask = wi::lshift (*mask, wi::to_widest (tem));
2509  return exp;
2510}
2511
2512/* Attempt to optimize small range tests using bit test.
2513   E.g.
2514   X != 43 && X != 76 && X != 44 && X != 78 && X != 49
2515   && X != 77 && X != 46 && X != 75 && X != 45 && X != 82
2516   has been by earlier optimizations optimized into:
2517   ((X - 43U) & ~32U) > 3U && X != 49 && X != 82
2518   As all the 43 through 82 range is less than 64 numbers,
2519   for 64-bit word targets optimize that into:
2520   (X - 43U) > 40U && ((1 << (X - 43U)) & 0x8F0000004FULL) == 0  */
2521
2522static bool
2523optimize_range_tests_to_bit_test (enum tree_code opcode, int first, int length,
2524				  vec<operand_entry_t> *ops,
2525				  struct range_entry *ranges)
2526{
2527  int i, j;
2528  bool any_changes = false;
2529  int prec = GET_MODE_BITSIZE (word_mode);
2530  auto_vec<struct range_entry *, 64> candidates;
2531
2532  for (i = first; i < length - 2; i++)
2533    {
2534      tree lowi, highi, lowj, highj, type;
2535
2536      if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2537	continue;
2538      type = TREE_TYPE (ranges[i].exp);
2539      if (!INTEGRAL_TYPE_P (type))
2540	continue;
2541      lowi = ranges[i].low;
2542      if (lowi == NULL_TREE)
2543	lowi = TYPE_MIN_VALUE (type);
2544      highi = ranges[i].high;
2545      if (highi == NULL_TREE)
2546	continue;
2547      wide_int mask;
2548      tree exp = extract_bit_test_mask (ranges[i].exp, prec, lowi, lowi,
2549					highi, &mask, &lowi);
2550      if (exp == NULL_TREE)
2551	continue;
2552      bool strict_overflow_p = ranges[i].strict_overflow_p;
2553      candidates.truncate (0);
2554      int end = MIN (i + 64, length);
2555      for (j = i + 1; j < end; j++)
2556	{
2557	  tree exp2;
2558	  if (ranges[j].exp == NULL_TREE || ranges[j].in_p)
2559	    continue;
2560	  if (ranges[j].exp == exp)
2561	    ;
2562	  else if (TREE_CODE (ranges[j].exp) == BIT_AND_EXPR)
2563	    {
2564	      exp2 = TREE_OPERAND (ranges[j].exp, 0);
2565	      if (exp2 == exp)
2566		;
2567	      else if (TREE_CODE (exp2) == PLUS_EXPR)
2568		{
2569		  exp2 = TREE_OPERAND (exp2, 0);
2570		  STRIP_NOPS (exp2);
2571		  if (exp2 != exp)
2572		    continue;
2573		}
2574	      else
2575		continue;
2576	    }
2577	  else
2578	    continue;
2579	  lowj = ranges[j].low;
2580	  if (lowj == NULL_TREE)
2581	    continue;
2582	  highj = ranges[j].high;
2583	  if (highj == NULL_TREE)
2584	    highj = TYPE_MAX_VALUE (type);
2585	  wide_int mask2;
2586	  exp2 = extract_bit_test_mask (ranges[j].exp, prec, lowi, lowj,
2587					highj, &mask2, NULL);
2588	  if (exp2 != exp)
2589	    continue;
2590	  mask |= mask2;
2591	  strict_overflow_p |= ranges[j].strict_overflow_p;
2592	  candidates.safe_push (&ranges[j]);
2593	}
2594
2595      /* If we need otherwise 3 or more comparisons, use a bit test.  */
2596      if (candidates.length () >= 2)
2597	{
2598	  tree high = wide_int_to_tree (TREE_TYPE (lowi),
2599					wi::to_widest (lowi)
2600					+ prec - 1 - wi::clz (mask));
2601	  operand_entry_t oe = (*ops)[ranges[i].idx];
2602	  tree op = oe->op;
2603	  gimple stmt = op ? SSA_NAME_DEF_STMT (op)
2604			   : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2605	  location_t loc = gimple_location (stmt);
2606	  tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2607
2608	  /* See if it isn't cheaper to pretend the minimum value of the
2609	     range is 0, if maximum value is small enough.
2610	     We can avoid then subtraction of the minimum value, but the
2611	     mask constant could be perhaps more expensive.  */
2612	  if (compare_tree_int (lowi, 0) > 0
2613	      && compare_tree_int (high, prec) < 0)
2614	    {
2615	      int cost_diff;
2616	      HOST_WIDE_INT m = tree_to_uhwi (lowi);
2617	      rtx reg = gen_raw_REG (word_mode, 10000);
2618	      bool speed_p = optimize_bb_for_speed_p (gimple_bb (stmt));
2619	      cost_diff = set_rtx_cost (gen_rtx_PLUS (word_mode, reg,
2620						      GEN_INT (-m)), speed_p);
2621	      rtx r = immed_wide_int_const (mask, word_mode);
2622	      cost_diff += set_src_cost (gen_rtx_AND (word_mode, reg, r),
2623					 speed_p);
2624	      r = immed_wide_int_const (wi::lshift (mask, m), word_mode);
2625	      cost_diff -= set_src_cost (gen_rtx_AND (word_mode, reg, r),
2626					 speed_p);
2627	      if (cost_diff > 0)
2628		{
2629		  mask = wi::lshift (mask, m);
2630		  lowi = build_zero_cst (TREE_TYPE (lowi));
2631		}
2632	    }
2633
2634	  tree tem = build_range_check (loc, optype, unshare_expr (exp),
2635					false, lowi, high);
2636	  if (tem == NULL_TREE || is_gimple_val (tem))
2637	    continue;
2638	  tree etype = unsigned_type_for (TREE_TYPE (exp));
2639	  exp = fold_build2_loc (loc, MINUS_EXPR, etype,
2640				 fold_convert_loc (loc, etype, exp),
2641				 fold_convert_loc (loc, etype, lowi));
2642	  exp = fold_convert_loc (loc, integer_type_node, exp);
2643	  tree word_type = lang_hooks.types.type_for_mode (word_mode, 1);
2644	  exp = fold_build2_loc (loc, LSHIFT_EXPR, word_type,
2645				 build_int_cst (word_type, 1), exp);
2646	  exp = fold_build2_loc (loc, BIT_AND_EXPR, word_type, exp,
2647				 wide_int_to_tree (word_type, mask));
2648	  exp = fold_build2_loc (loc, EQ_EXPR, optype, exp,
2649				 build_zero_cst (word_type));
2650	  if (is_gimple_val (exp))
2651	    continue;
2652
2653	  /* The shift might have undefined behavior if TEM is true,
2654	     but reassociate_bb isn't prepared to have basic blocks
2655	     split when it is running.  So, temporarily emit a code
2656	     with BIT_IOR_EXPR instead of &&, and fix it up in
2657	     branch_fixup.  */
2658	  gimple_seq seq;
2659	  tem = force_gimple_operand (tem, &seq, true, NULL_TREE);
2660	  gcc_assert (TREE_CODE (tem) == SSA_NAME);
2661	  gimple_set_visited (SSA_NAME_DEF_STMT (tem), true);
2662	  gimple_seq seq2;
2663	  exp = force_gimple_operand (exp, &seq2, true, NULL_TREE);
2664	  gimple_seq_add_seq_without_update (&seq, seq2);
2665	  gcc_assert (TREE_CODE (exp) == SSA_NAME);
2666	  gimple_set_visited (SSA_NAME_DEF_STMT (exp), true);
2667	  gimple g = gimple_build_assign (make_ssa_name (optype),
2668					  BIT_IOR_EXPR, tem, exp);
2669	  gimple_set_location (g, loc);
2670	  gimple_seq_add_stmt_without_update (&seq, g);
2671	  exp = gimple_assign_lhs (g);
2672	  tree val = build_zero_cst (optype);
2673	  if (update_range_test (&ranges[i], NULL, candidates.address (),
2674				 candidates.length (), opcode, ops, exp,
2675				 seq, false, val, val, strict_overflow_p))
2676	    {
2677	      any_changes = true;
2678	      reassoc_branch_fixups.safe_push (tem);
2679	    }
2680	  else
2681	    gimple_seq_discard (seq);
2682	}
2683    }
2684  return any_changes;
2685}
2686
2687/* Optimize range tests, similarly how fold_range_test optimizes
2688   it on trees.  The tree code for the binary
2689   operation between all the operands is OPCODE.
2690   If OPCODE is ERROR_MARK, optimize_range_tests is called from within
2691   maybe_optimize_range_tests for inter-bb range optimization.
2692   In that case if oe->op is NULL, oe->id is bb->index whose
2693   GIMPLE_COND is && or ||ed into the test, and oe->rank says
2694   the actual opcode.  */
2695
2696static bool
2697optimize_range_tests (enum tree_code opcode,
2698		      vec<operand_entry_t> *ops)
2699{
2700  unsigned int length = ops->length (), i, j, first;
2701  operand_entry_t oe;
2702  struct range_entry *ranges;
2703  bool any_changes = false;
2704
2705  if (length == 1)
2706    return false;
2707
2708  ranges = XNEWVEC (struct range_entry, length);
2709  for (i = 0; i < length; i++)
2710    {
2711      oe = (*ops)[i];
2712      ranges[i].idx = i;
2713      init_range_entry (ranges + i, oe->op,
2714			oe->op ? NULL :
2715			  last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)));
2716      /* For | invert it now, we will invert it again before emitting
2717	 the optimized expression.  */
2718      if (opcode == BIT_IOR_EXPR
2719	  || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2720	ranges[i].in_p = !ranges[i].in_p;
2721    }
2722
2723  qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
2724  for (i = 0; i < length; i++)
2725    if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
2726      break;
2727
2728  /* Try to merge ranges.  */
2729  for (first = i; i < length; i++)
2730    {
2731      tree low = ranges[i].low;
2732      tree high = ranges[i].high;
2733      int in_p = ranges[i].in_p;
2734      bool strict_overflow_p = ranges[i].strict_overflow_p;
2735      int update_fail_count = 0;
2736
2737      for (j = i + 1; j < length; j++)
2738	{
2739	  if (ranges[i].exp != ranges[j].exp)
2740	    break;
2741	  if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
2742			     ranges[j].in_p, ranges[j].low, ranges[j].high))
2743	    break;
2744	  strict_overflow_p |= ranges[j].strict_overflow_p;
2745	}
2746
2747      if (j == i + 1)
2748	continue;
2749
2750      if (update_range_test (ranges + i, ranges + i + 1, NULL, j - i - 1,
2751			     opcode, ops, ranges[i].exp, NULL, in_p,
2752			     low, high, strict_overflow_p))
2753	{
2754	  i = j - 1;
2755	  any_changes = true;
2756	}
2757      /* Avoid quadratic complexity if all merge_ranges calls would succeed,
2758	 while update_range_test would fail.  */
2759      else if (update_fail_count == 64)
2760	i = j - 1;
2761      else
2762	++update_fail_count;
2763    }
2764
2765  any_changes |= optimize_range_tests_1 (opcode, first, length, true,
2766					 ops, ranges);
2767
2768  if (BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2)
2769    any_changes |= optimize_range_tests_1 (opcode, first, length, false,
2770					   ops, ranges);
2771  if (lshift_cheap_p (optimize_function_for_speed_p (cfun)))
2772    any_changes |= optimize_range_tests_to_bit_test (opcode, first, length,
2773						     ops, ranges);
2774
2775  if (any_changes && opcode != ERROR_MARK)
2776    {
2777      j = 0;
2778      FOR_EACH_VEC_ELT (*ops, i, oe)
2779	{
2780	  if (oe->op == error_mark_node)
2781	    continue;
2782	  else if (i != j)
2783	    (*ops)[j] = oe;
2784	  j++;
2785	}
2786      ops->truncate (j);
2787    }
2788
2789  XDELETEVEC (ranges);
2790  return any_changes;
2791}
2792
2793/* Return true if STMT is a cast like:
2794   <bb N>:
2795   ...
2796   _123 = (int) _234;
2797
2798   <bb M>:
2799   # _345 = PHI <_123(N), 1(...), 1(...)>
2800   where _234 has bool type, _123 has single use and
2801   bb N has a single successor M.  This is commonly used in
2802   the last block of a range test.  */
2803
2804static bool
2805final_range_test_p (gimple stmt)
2806{
2807  basic_block bb, rhs_bb;
2808  edge e;
2809  tree lhs, rhs;
2810  use_operand_p use_p;
2811  gimple use_stmt;
2812
2813  if (!gimple_assign_cast_p (stmt))
2814    return false;
2815  bb = gimple_bb (stmt);
2816  if (!single_succ_p (bb))
2817    return false;
2818  e = single_succ_edge (bb);
2819  if (e->flags & EDGE_COMPLEX)
2820    return false;
2821
2822  lhs = gimple_assign_lhs (stmt);
2823  rhs = gimple_assign_rhs1 (stmt);
2824  if (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2825      || TREE_CODE (rhs) != SSA_NAME
2826      || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE)
2827    return false;
2828
2829  /* Test whether lhs is consumed only by a PHI in the only successor bb.  */
2830  if (!single_imm_use (lhs, &use_p, &use_stmt))
2831    return false;
2832
2833  if (gimple_code (use_stmt) != GIMPLE_PHI
2834      || gimple_bb (use_stmt) != e->dest)
2835    return false;
2836
2837  /* And that the rhs is defined in the same loop.  */
2838  rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs));
2839  if (rhs_bb == NULL
2840      || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
2841    return false;
2842
2843  return true;
2844}
2845
2846/* Return true if BB is suitable basic block for inter-bb range test
2847   optimization.  If BACKWARD is true, BB should be the only predecessor
2848   of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
2849   or compared with to find a common basic block to which all conditions
2850   branch to if true resp. false.  If BACKWARD is false, TEST_BB should
2851   be the only predecessor of BB.  */
2852
2853static bool
2854suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
2855		  bool backward)
2856{
2857  edge_iterator ei, ei2;
2858  edge e, e2;
2859  gimple stmt;
2860  gphi_iterator gsi;
2861  bool other_edge_seen = false;
2862  bool is_cond;
2863
2864  if (test_bb == bb)
2865    return false;
2866  /* Check last stmt first.  */
2867  stmt = last_stmt (bb);
2868  if (stmt == NULL
2869      || (gimple_code (stmt) != GIMPLE_COND
2870	  && (backward || !final_range_test_p (stmt)))
2871      || gimple_visited_p (stmt)
2872      || stmt_could_throw_p (stmt)
2873      || *other_bb == bb)
2874    return false;
2875  is_cond = gimple_code (stmt) == GIMPLE_COND;
2876  if (is_cond)
2877    {
2878      /* If last stmt is GIMPLE_COND, verify that one of the succ edges
2879	 goes to the next bb (if BACKWARD, it is TEST_BB), and the other
2880	 to *OTHER_BB (if not set yet, try to find it out).  */
2881      if (EDGE_COUNT (bb->succs) != 2)
2882	return false;
2883      FOR_EACH_EDGE (e, ei, bb->succs)
2884	{
2885	  if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2886	    return false;
2887	  if (e->dest == test_bb)
2888	    {
2889	      if (backward)
2890		continue;
2891	      else
2892		return false;
2893	    }
2894	  if (e->dest == bb)
2895	    return false;
2896	  if (*other_bb == NULL)
2897	    {
2898	      FOR_EACH_EDGE (e2, ei2, test_bb->succs)
2899		if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2900		  return false;
2901		else if (e->dest == e2->dest)
2902		  *other_bb = e->dest;
2903	      if (*other_bb == NULL)
2904		return false;
2905	    }
2906	  if (e->dest == *other_bb)
2907	    other_edge_seen = true;
2908	  else if (backward)
2909	    return false;
2910	}
2911      if (*other_bb == NULL || !other_edge_seen)
2912	return false;
2913    }
2914  else if (single_succ (bb) != *other_bb)
2915    return false;
2916
2917  /* Now check all PHIs of *OTHER_BB.  */
2918  e = find_edge (bb, *other_bb);
2919  e2 = find_edge (test_bb, *other_bb);
2920  for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2921    {
2922      gphi *phi = gsi.phi ();
2923      /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
2924	 corresponding to BB and TEST_BB predecessor must be the same.  */
2925      if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
2926			    gimple_phi_arg_def (phi, e2->dest_idx), 0))
2927	{
2928	  /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
2929	     one of the PHIs should have the lhs of the last stmt in
2930	     that block as PHI arg and that PHI should have 0 or 1
2931	     corresponding to it in all other range test basic blocks
2932	     considered.  */
2933	  if (!is_cond)
2934	    {
2935	      if (gimple_phi_arg_def (phi, e->dest_idx)
2936		  == gimple_assign_lhs (stmt)
2937		  && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
2938		      || integer_onep (gimple_phi_arg_def (phi,
2939							   e2->dest_idx))))
2940		continue;
2941	    }
2942	  else
2943	    {
2944	      gimple test_last = last_stmt (test_bb);
2945	      if (gimple_code (test_last) != GIMPLE_COND
2946		  && gimple_phi_arg_def (phi, e2->dest_idx)
2947		     == gimple_assign_lhs (test_last)
2948		  && (integer_zerop (gimple_phi_arg_def (phi, e->dest_idx))
2949		      || integer_onep (gimple_phi_arg_def (phi, e->dest_idx))))
2950		continue;
2951	    }
2952
2953	  return false;
2954	}
2955    }
2956  return true;
2957}
2958
2959/* Return true if BB doesn't have side-effects that would disallow
2960   range test optimization, all SSA_NAMEs set in the bb are consumed
2961   in the bb and there are no PHIs.  */
2962
2963static bool
2964no_side_effect_bb (basic_block bb)
2965{
2966  gimple_stmt_iterator gsi;
2967  gimple last;
2968
2969  if (!gimple_seq_empty_p (phi_nodes (bb)))
2970    return false;
2971  last = last_stmt (bb);
2972  for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2973    {
2974      gimple stmt = gsi_stmt (gsi);
2975      tree lhs;
2976      imm_use_iterator imm_iter;
2977      use_operand_p use_p;
2978
2979      if (is_gimple_debug (stmt))
2980	continue;
2981      if (gimple_has_side_effects (stmt))
2982	return false;
2983      if (stmt == last)
2984	return true;
2985      if (!is_gimple_assign (stmt))
2986	return false;
2987      lhs = gimple_assign_lhs (stmt);
2988      if (TREE_CODE (lhs) != SSA_NAME)
2989	return false;
2990      if (gimple_assign_rhs_could_trap_p (stmt))
2991	return false;
2992      FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
2993	{
2994	  gimple use_stmt = USE_STMT (use_p);
2995	  if (is_gimple_debug (use_stmt))
2996	    continue;
2997	  if (gimple_bb (use_stmt) != bb)
2998	    return false;
2999	}
3000    }
3001  return false;
3002}
3003
3004/* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
3005   return true and fill in *OPS recursively.  */
3006
3007static bool
3008get_ops (tree var, enum tree_code code, vec<operand_entry_t> *ops,
3009	 struct loop *loop)
3010{
3011  gimple stmt = SSA_NAME_DEF_STMT (var);
3012  tree rhs[2];
3013  int i;
3014
3015  if (!is_reassociable_op (stmt, code, loop))
3016    return false;
3017
3018  rhs[0] = gimple_assign_rhs1 (stmt);
3019  rhs[1] = gimple_assign_rhs2 (stmt);
3020  gimple_set_visited (stmt, true);
3021  for (i = 0; i < 2; i++)
3022    if (TREE_CODE (rhs[i]) == SSA_NAME
3023	&& !get_ops (rhs[i], code, ops, loop)
3024	&& has_single_use (rhs[i]))
3025      {
3026	operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
3027
3028	oe->op = rhs[i];
3029	oe->rank = code;
3030	oe->id = 0;
3031	oe->count = 1;
3032	ops->safe_push (oe);
3033      }
3034  return true;
3035}
3036
3037/* Find the ops that were added by get_ops starting from VAR, see if
3038   they were changed during update_range_test and if yes, create new
3039   stmts.  */
3040
3041static tree
3042update_ops (tree var, enum tree_code code, vec<operand_entry_t> ops,
3043	    unsigned int *pidx, struct loop *loop)
3044{
3045  gimple stmt = SSA_NAME_DEF_STMT (var);
3046  tree rhs[4];
3047  int i;
3048
3049  if (!is_reassociable_op (stmt, code, loop))
3050    return NULL;
3051
3052  rhs[0] = gimple_assign_rhs1 (stmt);
3053  rhs[1] = gimple_assign_rhs2 (stmt);
3054  rhs[2] = rhs[0];
3055  rhs[3] = rhs[1];
3056  for (i = 0; i < 2; i++)
3057    if (TREE_CODE (rhs[i]) == SSA_NAME)
3058      {
3059	rhs[2 + i] = update_ops (rhs[i], code, ops, pidx, loop);
3060	if (rhs[2 + i] == NULL_TREE)
3061	  {
3062	    if (has_single_use (rhs[i]))
3063	      rhs[2 + i] = ops[(*pidx)++]->op;
3064	    else
3065	      rhs[2 + i] = rhs[i];
3066	  }
3067      }
3068  if ((rhs[2] != rhs[0] || rhs[3] != rhs[1])
3069      && (rhs[2] != rhs[1] || rhs[3] != rhs[0]))
3070    {
3071      gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3072      var = make_ssa_name (TREE_TYPE (var));
3073      gassign *g = gimple_build_assign (var, gimple_assign_rhs_code (stmt),
3074					rhs[2], rhs[3]);
3075      gimple_set_uid (g, gimple_uid (stmt));
3076      gimple_set_visited (g, true);
3077      gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3078    }
3079  return var;
3080}
3081
3082/* Structure to track the initial value passed to get_ops and
3083   the range in the ops vector for each basic block.  */
3084
3085struct inter_bb_range_test_entry
3086{
3087  tree op;
3088  unsigned int first_idx, last_idx;
3089};
3090
3091/* Inter-bb range test optimization.  */
3092
3093static void
3094maybe_optimize_range_tests (gimple stmt)
3095{
3096  basic_block first_bb = gimple_bb (stmt);
3097  basic_block last_bb = first_bb;
3098  basic_block other_bb = NULL;
3099  basic_block bb;
3100  edge_iterator ei;
3101  edge e;
3102  auto_vec<operand_entry_t> ops;
3103  auto_vec<inter_bb_range_test_entry> bbinfo;
3104  bool any_changes = false;
3105
3106  /* Consider only basic blocks that end with GIMPLE_COND or
3107     a cast statement satisfying final_range_test_p.  All
3108     but the last bb in the first_bb .. last_bb range
3109     should end with GIMPLE_COND.  */
3110  if (gimple_code (stmt) == GIMPLE_COND)
3111    {
3112      if (EDGE_COUNT (first_bb->succs) != 2)
3113	return;
3114    }
3115  else if (final_range_test_p (stmt))
3116    other_bb = single_succ (first_bb);
3117  else
3118    return;
3119
3120  if (stmt_could_throw_p (stmt))
3121    return;
3122
3123  /* As relative ordering of post-dominator sons isn't fixed,
3124     maybe_optimize_range_tests can be called first on any
3125     bb in the range we want to optimize.  So, start searching
3126     backwards, if first_bb can be set to a predecessor.  */
3127  while (single_pred_p (first_bb))
3128    {
3129      basic_block pred_bb = single_pred (first_bb);
3130      if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, true))
3131	break;
3132      if (!no_side_effect_bb (first_bb))
3133	break;
3134      first_bb = pred_bb;
3135    }
3136  /* If first_bb is last_bb, other_bb hasn't been computed yet.
3137     Before starting forward search in last_bb successors, find
3138     out the other_bb.  */
3139  if (first_bb == last_bb)
3140    {
3141      other_bb = NULL;
3142      /* As non-GIMPLE_COND last stmt always terminates the range,
3143	 if forward search didn't discover anything, just give up.  */
3144      if (gimple_code (stmt) != GIMPLE_COND)
3145	return;
3146      /* Look at both successors.  Either it ends with a GIMPLE_COND
3147	 and satisfies suitable_cond_bb, or ends with a cast and
3148	 other_bb is that cast's successor.  */
3149      FOR_EACH_EDGE (e, ei, first_bb->succs)
3150	if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
3151	    || e->dest == first_bb)
3152	  return;
3153	else if (single_pred_p (e->dest))
3154	  {
3155	    stmt = last_stmt (e->dest);
3156	    if (stmt
3157		&& gimple_code (stmt) == GIMPLE_COND
3158		&& EDGE_COUNT (e->dest->succs) == 2)
3159	      {
3160		if (suitable_cond_bb (first_bb, e->dest, &other_bb, true))
3161		  break;
3162		else
3163		  other_bb = NULL;
3164	      }
3165	    else if (stmt
3166		     && final_range_test_p (stmt)
3167		     && find_edge (first_bb, single_succ (e->dest)))
3168	      {
3169		other_bb = single_succ (e->dest);
3170		if (other_bb == first_bb)
3171		  other_bb = NULL;
3172	      }
3173	  }
3174      if (other_bb == NULL)
3175	return;
3176    }
3177  /* Now do the forward search, moving last_bb to successor bbs
3178     that aren't other_bb.  */
3179  while (EDGE_COUNT (last_bb->succs) == 2)
3180    {
3181      FOR_EACH_EDGE (e, ei, last_bb->succs)
3182	if (e->dest != other_bb)
3183	  break;
3184      if (e == NULL)
3185	break;
3186      if (!single_pred_p (e->dest))
3187	break;
3188      if (!suitable_cond_bb (e->dest, last_bb, &other_bb, false))
3189	break;
3190      if (!no_side_effect_bb (e->dest))
3191	break;
3192      last_bb = e->dest;
3193    }
3194  if (first_bb == last_bb)
3195    return;
3196  /* Here basic blocks first_bb through last_bb's predecessor
3197     end with GIMPLE_COND, all of them have one of the edges to
3198     other_bb and another to another block in the range,
3199     all blocks except first_bb don't have side-effects and
3200     last_bb ends with either GIMPLE_COND, or cast satisfying
3201     final_range_test_p.  */
3202  for (bb = last_bb; ; bb = single_pred (bb))
3203    {
3204      enum tree_code code;
3205      tree lhs, rhs;
3206      inter_bb_range_test_entry bb_ent;
3207
3208      bb_ent.op = NULL_TREE;
3209      bb_ent.first_idx = ops.length ();
3210      bb_ent.last_idx = bb_ent.first_idx;
3211      e = find_edge (bb, other_bb);
3212      stmt = last_stmt (bb);
3213      gimple_set_visited (stmt, true);
3214      if (gimple_code (stmt) != GIMPLE_COND)
3215	{
3216	  use_operand_p use_p;
3217	  gimple phi;
3218	  edge e2;
3219	  unsigned int d;
3220
3221	  lhs = gimple_assign_lhs (stmt);
3222	  rhs = gimple_assign_rhs1 (stmt);
3223	  gcc_assert (bb == last_bb);
3224
3225	  /* stmt is
3226	     _123 = (int) _234;
3227
3228	     followed by:
3229	     <bb M>:
3230	     # _345 = PHI <_123(N), 1(...), 1(...)>
3231
3232	     or 0 instead of 1.  If it is 0, the _234
3233	     range test is anded together with all the
3234	     other range tests, if it is 1, it is ored with
3235	     them.  */
3236	  single_imm_use (lhs, &use_p, &phi);
3237	  gcc_assert (gimple_code (phi) == GIMPLE_PHI);
3238	  e2 = find_edge (first_bb, other_bb);
3239	  d = e2->dest_idx;
3240	  gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
3241	  if (integer_zerop (gimple_phi_arg_def (phi, d)))
3242	    code = BIT_AND_EXPR;
3243	  else
3244	    {
3245	      gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
3246	      code = BIT_IOR_EXPR;
3247	    }
3248
3249	  /* If _234 SSA_NAME_DEF_STMT is
3250	     _234 = _567 | _789;
3251	     (or &, corresponding to 1/0 in the phi arguments,
3252	     push into ops the individual range test arguments
3253	     of the bitwise or resp. and, recursively.  */
3254	  if (!get_ops (rhs, code, &ops,
3255			loop_containing_stmt (stmt))
3256	      && has_single_use (rhs))
3257	    {
3258	      /* Otherwise, push the _234 range test itself.  */
3259	      operand_entry_t oe
3260		= (operand_entry_t) pool_alloc (operand_entry_pool);
3261
3262	      oe->op = rhs;
3263	      oe->rank = code;
3264	      oe->id = 0;
3265	      oe->count = 1;
3266	      ops.safe_push (oe);
3267	      bb_ent.last_idx++;
3268	    }
3269	  else
3270	    bb_ent.last_idx = ops.length ();
3271	  bb_ent.op = rhs;
3272	  bbinfo.safe_push (bb_ent);
3273	  continue;
3274	}
3275      /* Otherwise stmt is GIMPLE_COND.  */
3276      code = gimple_cond_code (stmt);
3277      lhs = gimple_cond_lhs (stmt);
3278      rhs = gimple_cond_rhs (stmt);
3279      if (TREE_CODE (lhs) == SSA_NAME
3280	  && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3281	  && ((code != EQ_EXPR && code != NE_EXPR)
3282	      || rhs != boolean_false_node
3283		 /* Either push into ops the individual bitwise
3284		    or resp. and operands, depending on which
3285		    edge is other_bb.  */
3286	      || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
3287				 ^ (code == EQ_EXPR))
3288				? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
3289			   loop_containing_stmt (stmt))))
3290	{
3291	  /* Or push the GIMPLE_COND stmt itself.  */
3292	  operand_entry_t oe
3293	    = (operand_entry_t) pool_alloc (operand_entry_pool);
3294
3295	  oe->op = NULL;
3296	  oe->rank = (e->flags & EDGE_TRUE_VALUE)
3297		     ? BIT_IOR_EXPR : BIT_AND_EXPR;
3298	  /* oe->op = NULL signs that there is no SSA_NAME
3299	     for the range test, and oe->id instead is the
3300	     basic block number, at which's end the GIMPLE_COND
3301	     is.  */
3302	  oe->id = bb->index;
3303	  oe->count = 1;
3304	  ops.safe_push (oe);
3305	  bb_ent.op = NULL;
3306	  bb_ent.last_idx++;
3307	}
3308      else if (ops.length () > bb_ent.first_idx)
3309	{
3310	  bb_ent.op = lhs;
3311	  bb_ent.last_idx = ops.length ();
3312	}
3313      bbinfo.safe_push (bb_ent);
3314      if (bb == first_bb)
3315	break;
3316    }
3317  if (ops.length () > 1)
3318    any_changes = optimize_range_tests (ERROR_MARK, &ops);
3319  if (any_changes)
3320    {
3321      unsigned int idx, max_idx = 0;
3322      /* update_ops relies on has_single_use predicates returning the
3323	 same values as it did during get_ops earlier.  Additionally it
3324	 never removes statements, only adds new ones and it should walk
3325	 from the single imm use and check the predicate already before
3326	 making those changes.
3327	 On the other side, the handling of GIMPLE_COND directly can turn
3328	 previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
3329	 it needs to be done in a separate loop afterwards.  */
3330      for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
3331	{
3332	  if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
3333	      && bbinfo[idx].op != NULL_TREE)
3334	    {
3335	      tree new_op;
3336
3337	      max_idx = idx;
3338	      stmt = last_stmt (bb);
3339	      new_op = update_ops (bbinfo[idx].op,
3340				   (enum tree_code)
3341				   ops[bbinfo[idx].first_idx]->rank,
3342				   ops, &bbinfo[idx].first_idx,
3343				   loop_containing_stmt (stmt));
3344	      if (new_op == NULL_TREE)
3345		{
3346		  gcc_assert (bb == last_bb);
3347		  new_op = ops[bbinfo[idx].first_idx++]->op;
3348		}
3349	      if (bbinfo[idx].op != new_op)
3350		{
3351		  imm_use_iterator iter;
3352		  use_operand_p use_p;
3353		  gimple use_stmt, cast_stmt = NULL;
3354
3355		  FOR_EACH_IMM_USE_STMT (use_stmt, iter, bbinfo[idx].op)
3356		    if (is_gimple_debug (use_stmt))
3357		      continue;
3358		    else if (gimple_code (use_stmt) == GIMPLE_COND
3359			     || gimple_code (use_stmt) == GIMPLE_PHI)
3360		      FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
3361			SET_USE (use_p, new_op);
3362		    else if (gimple_assign_cast_p (use_stmt))
3363		      cast_stmt = use_stmt;
3364		    else
3365		      gcc_unreachable ();
3366		  if (cast_stmt)
3367		    {
3368		      gcc_assert (bb == last_bb);
3369		      tree lhs = gimple_assign_lhs (cast_stmt);
3370		      tree new_lhs = make_ssa_name (TREE_TYPE (lhs));
3371		      enum tree_code rhs_code
3372			= gimple_assign_rhs_code (cast_stmt);
3373		      gassign *g;
3374		      if (is_gimple_min_invariant (new_op))
3375			{
3376			  new_op = fold_convert (TREE_TYPE (lhs), new_op);
3377			  g = gimple_build_assign (new_lhs, new_op);
3378			}
3379		      else
3380			g = gimple_build_assign (new_lhs, rhs_code, new_op);
3381		      gimple_stmt_iterator gsi = gsi_for_stmt (cast_stmt);
3382		      gimple_set_uid (g, gimple_uid (cast_stmt));
3383		      gimple_set_visited (g, true);
3384		      gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3385		      FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
3386			if (is_gimple_debug (use_stmt))
3387			  continue;
3388			else if (gimple_code (use_stmt) == GIMPLE_COND
3389				 || gimple_code (use_stmt) == GIMPLE_PHI)
3390			  FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
3391			    SET_USE (use_p, new_lhs);
3392			else
3393			  gcc_unreachable ();
3394		    }
3395		}
3396	    }
3397	  if (bb == first_bb)
3398	    break;
3399	}
3400      for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
3401	{
3402	  if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
3403	      && bbinfo[idx].op == NULL_TREE
3404	      && ops[bbinfo[idx].first_idx]->op != NULL_TREE)
3405	    {
3406	      gcond *cond_stmt = as_a <gcond *> (last_stmt (bb));
3407
3408	      if (idx > max_idx)
3409		max_idx = idx;
3410
3411	      if (integer_zerop (ops[bbinfo[idx].first_idx]->op))
3412		gimple_cond_make_false (cond_stmt);
3413	      else if (integer_onep (ops[bbinfo[idx].first_idx]->op))
3414		gimple_cond_make_true (cond_stmt);
3415	      else
3416		{
3417		  gimple_cond_set_code (cond_stmt, NE_EXPR);
3418		  gimple_cond_set_lhs (cond_stmt,
3419				       ops[bbinfo[idx].first_idx]->op);
3420		  gimple_cond_set_rhs (cond_stmt, boolean_false_node);
3421		}
3422	      update_stmt (cond_stmt);
3423	    }
3424	  if (bb == first_bb)
3425	    break;
3426	}
3427
3428      /* The above changes could result in basic blocks after the first
3429	 modified one, up to and including last_bb, to be executed even if
3430	 they would not be in the original program.  If the value ranges of
3431	 assignment lhs' in those bbs were dependent on the conditions
3432	 guarding those basic blocks which now can change, the VRs might
3433	 be incorrect.  As no_side_effect_bb should ensure those SSA_NAMEs
3434	 are only used within the same bb, it should be not a big deal if
3435	 we just reset all the VRs in those bbs.  See PR68671.  */
3436      for (bb = last_bb, idx = 0; idx < max_idx; bb = single_pred (bb), idx++)
3437	reset_flow_sensitive_info_in_bb (bb);
3438    }
3439}
3440
3441/* Return true if OPERAND is defined by a PHI node which uses the LHS
3442   of STMT in it's operands.  This is also known as a "destructive
3443   update" operation.  */
3444
3445static bool
3446is_phi_for_stmt (gimple stmt, tree operand)
3447{
3448  gimple def_stmt;
3449  gphi *def_phi;
3450  tree lhs;
3451  use_operand_p arg_p;
3452  ssa_op_iter i;
3453
3454  if (TREE_CODE (operand) != SSA_NAME)
3455    return false;
3456
3457  lhs = gimple_assign_lhs (stmt);
3458
3459  def_stmt = SSA_NAME_DEF_STMT (operand);
3460  def_phi = dyn_cast <gphi *> (def_stmt);
3461  if (!def_phi)
3462    return false;
3463
3464  FOR_EACH_PHI_ARG (arg_p, def_phi, i, SSA_OP_USE)
3465    if (lhs == USE_FROM_PTR (arg_p))
3466      return true;
3467  return false;
3468}
3469
3470/* Remove def stmt of VAR if VAR has zero uses and recurse
3471   on rhs1 operand if so.  */
3472
3473static void
3474remove_visited_stmt_chain (tree var)
3475{
3476  gimple stmt;
3477  gimple_stmt_iterator gsi;
3478
3479  while (1)
3480    {
3481      if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
3482	return;
3483      stmt = SSA_NAME_DEF_STMT (var);
3484      if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
3485	{
3486	  var = gimple_assign_rhs1 (stmt);
3487	  gsi = gsi_for_stmt (stmt);
3488	  reassoc_remove_stmt (&gsi);
3489	  release_defs (stmt);
3490	}
3491      else
3492	return;
3493    }
3494}
3495
3496/* This function checks three consequtive operands in
3497   passed operands vector OPS starting from OPINDEX and
3498   swaps two operands if it is profitable for binary operation
3499   consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
3500
3501   We pair ops with the same rank if possible.
3502
3503   The alternative we try is to see if STMT is a destructive
3504   update style statement, which is like:
3505   b = phi (a, ...)
3506   a = c + b;
3507   In that case, we want to use the destructive update form to
3508   expose the possible vectorizer sum reduction opportunity.
3509   In that case, the third operand will be the phi node. This
3510   check is not performed if STMT is null.
3511
3512   We could, of course, try to be better as noted above, and do a
3513   lot of work to try to find these opportunities in >3 operand
3514   cases, but it is unlikely to be worth it.  */
3515
3516static void
3517swap_ops_for_binary_stmt (vec<operand_entry_t> ops,
3518			  unsigned int opindex, gimple stmt)
3519{
3520  operand_entry_t oe1, oe2, oe3;
3521
3522  oe1 = ops[opindex];
3523  oe2 = ops[opindex + 1];
3524  oe3 = ops[opindex + 2];
3525
3526  if ((oe1->rank == oe2->rank
3527       && oe2->rank != oe3->rank)
3528      || (stmt && is_phi_for_stmt (stmt, oe3->op)
3529	  && !is_phi_for_stmt (stmt, oe1->op)
3530	  && !is_phi_for_stmt (stmt, oe2->op)))
3531    {
3532      struct operand_entry temp = *oe3;
3533      oe3->op = oe1->op;
3534      oe3->rank = oe1->rank;
3535      oe1->op = temp.op;
3536      oe1->rank= temp.rank;
3537    }
3538  else if ((oe1->rank == oe3->rank
3539	    && oe2->rank != oe3->rank)
3540	   || (stmt && is_phi_for_stmt (stmt, oe2->op)
3541	       && !is_phi_for_stmt (stmt, oe1->op)
3542	       && !is_phi_for_stmt (stmt, oe3->op)))
3543    {
3544      struct operand_entry temp = *oe2;
3545      oe2->op = oe1->op;
3546      oe2->rank = oe1->rank;
3547      oe1->op = temp.op;
3548      oe1->rank = temp.rank;
3549    }
3550}
3551
3552/* If definition of RHS1 or RHS2 dominates STMT, return the later of those
3553   two definitions, otherwise return STMT.  */
3554
3555static inline gimple
3556find_insert_point (gimple stmt, tree rhs1, tree rhs2)
3557{
3558  if (TREE_CODE (rhs1) == SSA_NAME
3559      && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs1)))
3560    stmt = SSA_NAME_DEF_STMT (rhs1);
3561  if (TREE_CODE (rhs2) == SSA_NAME
3562      && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs2)))
3563    stmt = SSA_NAME_DEF_STMT (rhs2);
3564  return stmt;
3565}
3566
3567/* Recursively rewrite our linearized statements so that the operators
3568   match those in OPS[OPINDEX], putting the computation in rank
3569   order.  Return new lhs.  */
3570
3571static tree
3572rewrite_expr_tree (gimple stmt, unsigned int opindex,
3573		   vec<operand_entry_t> ops, bool changed)
3574{
3575  tree rhs1 = gimple_assign_rhs1 (stmt);
3576  tree rhs2 = gimple_assign_rhs2 (stmt);
3577  tree lhs = gimple_assign_lhs (stmt);
3578  operand_entry_t oe;
3579
3580  /* The final recursion case for this function is that you have
3581     exactly two operations left.
3582     If we had exactly one op in the entire list to start with, we
3583     would have never called this function, and the tail recursion
3584     rewrites them one at a time.  */
3585  if (opindex + 2 == ops.length ())
3586    {
3587      operand_entry_t oe1, oe2;
3588
3589      oe1 = ops[opindex];
3590      oe2 = ops[opindex + 1];
3591
3592      if (rhs1 != oe1->op || rhs2 != oe2->op)
3593	{
3594	  gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3595	  unsigned int uid = gimple_uid (stmt);
3596
3597	  if (dump_file && (dump_flags & TDF_DETAILS))
3598	    {
3599	      fprintf (dump_file, "Transforming ");
3600	      print_gimple_stmt (dump_file, stmt, 0, 0);
3601	    }
3602
3603	  /* Even when changed is false, reassociation could have e.g. removed
3604	     some redundant operations, so unless we are just swapping the
3605	     arguments or unless there is no change at all (then we just
3606	     return lhs), force creation of a new SSA_NAME.  */
3607	  if (changed || ((rhs1 != oe2->op || rhs2 != oe1->op) && opindex))
3608	    {
3609	      gimple insert_point = find_insert_point (stmt, oe1->op, oe2->op);
3610	      lhs = make_ssa_name (TREE_TYPE (lhs));
3611	      stmt
3612		= gimple_build_assign (lhs, gimple_assign_rhs_code (stmt),
3613				       oe1->op, oe2->op);
3614	      gimple_set_uid (stmt, uid);
3615	      gimple_set_visited (stmt, true);
3616	      if (insert_point == gsi_stmt (gsi))
3617		gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
3618	      else
3619		insert_stmt_after (stmt, insert_point);
3620	    }
3621	  else
3622	    {
3623	      gcc_checking_assert (find_insert_point (stmt, oe1->op, oe2->op)
3624				   == stmt);
3625	      gimple_assign_set_rhs1 (stmt, oe1->op);
3626	      gimple_assign_set_rhs2 (stmt, oe2->op);
3627	      update_stmt (stmt);
3628	    }
3629
3630	  if (rhs1 != oe1->op && rhs1 != oe2->op)
3631	    remove_visited_stmt_chain (rhs1);
3632
3633	  if (dump_file && (dump_flags & TDF_DETAILS))
3634	    {
3635	      fprintf (dump_file, " into ");
3636	      print_gimple_stmt (dump_file, stmt, 0, 0);
3637	    }
3638	}
3639      return lhs;
3640    }
3641
3642  /* If we hit here, we should have 3 or more ops left.  */
3643  gcc_assert (opindex + 2 < ops.length ());
3644
3645  /* Rewrite the next operator.  */
3646  oe = ops[opindex];
3647
3648  /* Recurse on the LHS of the binary operator, which is guaranteed to
3649     be the non-leaf side.  */
3650  tree new_rhs1
3651    = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops,
3652			 changed || oe->op != rhs2);
3653
3654  if (oe->op != rhs2 || new_rhs1 != rhs1)
3655    {
3656      if (dump_file && (dump_flags & TDF_DETAILS))
3657	{
3658	  fprintf (dump_file, "Transforming ");
3659	  print_gimple_stmt (dump_file, stmt, 0, 0);
3660	}
3661
3662      /* If changed is false, this is either opindex == 0
3663	 or all outer rhs2's were equal to corresponding oe->op,
3664	 and powi_result is NULL.
3665	 That means lhs is equivalent before and after reassociation.
3666	 Otherwise ensure the old lhs SSA_NAME is not reused and
3667	 create a new stmt as well, so that any debug stmts will be
3668	 properly adjusted.  */
3669      if (changed)
3670	{
3671	  gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3672	  unsigned int uid = gimple_uid (stmt);
3673	  gimple insert_point = find_insert_point (stmt, new_rhs1, oe->op);
3674
3675	  lhs = make_ssa_name (TREE_TYPE (lhs));
3676	  stmt = gimple_build_assign (lhs, gimple_assign_rhs_code (stmt),
3677				      new_rhs1, oe->op);
3678	  gimple_set_uid (stmt, uid);
3679	  gimple_set_visited (stmt, true);
3680	  if (insert_point == gsi_stmt (gsi))
3681	    gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
3682	  else
3683	    insert_stmt_after (stmt, insert_point);
3684	}
3685      else
3686	{
3687	  gcc_checking_assert (find_insert_point (stmt, new_rhs1, oe->op)
3688			       == stmt);
3689	  gimple_assign_set_rhs1 (stmt, new_rhs1);
3690	  gimple_assign_set_rhs2 (stmt, oe->op);
3691	  update_stmt (stmt);
3692	}
3693
3694      if (dump_file && (dump_flags & TDF_DETAILS))
3695	{
3696	  fprintf (dump_file, " into ");
3697	  print_gimple_stmt (dump_file, stmt, 0, 0);
3698	}
3699    }
3700  return lhs;
3701}
3702
3703/* Find out how many cycles we need to compute statements chain.
3704   OPS_NUM holds number os statements in a chain.  CPU_WIDTH is a
3705   maximum number of independent statements we may execute per cycle.  */
3706
3707static int
3708get_required_cycles (int ops_num, int cpu_width)
3709{
3710  int res;
3711  int elog;
3712  unsigned int rest;
3713
3714  /* While we have more than 2 * cpu_width operands
3715     we may reduce number of operands by cpu_width
3716     per cycle.  */
3717  res = ops_num / (2 * cpu_width);
3718
3719  /* Remained operands count may be reduced twice per cycle
3720     until we have only one operand.  */
3721  rest = (unsigned)(ops_num - res * cpu_width);
3722  elog = exact_log2 (rest);
3723  if (elog >= 0)
3724    res += elog;
3725  else
3726    res += floor_log2 (rest) + 1;
3727
3728  return res;
3729}
3730
3731/* Returns an optimal number of registers to use for computation of
3732   given statements.  */
3733
3734static int
3735get_reassociation_width (int ops_num, enum tree_code opc,
3736			 machine_mode mode)
3737{
3738  int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH);
3739  int width;
3740  int width_min;
3741  int cycles_best;
3742
3743  if (param_width > 0)
3744    width = param_width;
3745  else
3746    width = targetm.sched.reassociation_width (opc, mode);
3747
3748  if (width == 1)
3749    return width;
3750
3751  /* Get the minimal time required for sequence computation.  */
3752  cycles_best = get_required_cycles (ops_num, width);
3753
3754  /* Check if we may use less width and still compute sequence for
3755     the same time.  It will allow us to reduce registers usage.
3756     get_required_cycles is monotonically increasing with lower width
3757     so we can perform a binary search for the minimal width that still
3758     results in the optimal cycle count.  */
3759  width_min = 1;
3760  while (width > width_min)
3761    {
3762      int width_mid = (width + width_min) / 2;
3763
3764      if (get_required_cycles (ops_num, width_mid) == cycles_best)
3765	width = width_mid;
3766      else if (width_min < width_mid)
3767	width_min = width_mid;
3768      else
3769	break;
3770    }
3771
3772  return width;
3773}
3774
3775/* Recursively rewrite our linearized statements so that the operators
3776   match those in OPS[OPINDEX], putting the computation in rank
3777   order and trying to allow operations to be executed in
3778   parallel.  */
3779
3780static void
3781rewrite_expr_tree_parallel (gassign *stmt, int width,
3782			    vec<operand_entry_t> ops)
3783{
3784  enum tree_code opcode = gimple_assign_rhs_code (stmt);
3785  int op_num = ops.length ();
3786  int stmt_num = op_num - 1;
3787  gimple *stmts = XALLOCAVEC (gimple, stmt_num);
3788  int op_index = op_num - 1;
3789  int stmt_index = 0;
3790  int ready_stmts_end = 0;
3791  int i = 0;
3792  tree last_rhs1 = gimple_assign_rhs1 (stmt);
3793
3794  /* We start expression rewriting from the top statements.
3795     So, in this loop we create a full list of statements
3796     we will work with.  */
3797  stmts[stmt_num - 1] = stmt;
3798  for (i = stmt_num - 2; i >= 0; i--)
3799    stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
3800
3801  for (i = 0; i < stmt_num; i++)
3802    {
3803      tree op1, op2;
3804
3805      /* Determine whether we should use results of
3806	 already handled statements or not.  */
3807      if (ready_stmts_end == 0
3808	  && (i - stmt_index >= width || op_index < 1))
3809	ready_stmts_end = i;
3810
3811      /* Now we choose operands for the next statement.  Non zero
3812	 value in ready_stmts_end means here that we should use
3813	 the result of already generated statements as new operand.  */
3814      if (ready_stmts_end > 0)
3815	{
3816	  op1 = gimple_assign_lhs (stmts[stmt_index++]);
3817	  if (ready_stmts_end > stmt_index)
3818	    op2 = gimple_assign_lhs (stmts[stmt_index++]);
3819	  else if (op_index >= 0)
3820	    op2 = ops[op_index--]->op;
3821	  else
3822	    {
3823	      gcc_assert (stmt_index < i);
3824	      op2 = gimple_assign_lhs (stmts[stmt_index++]);
3825	    }
3826
3827	  if (stmt_index >= ready_stmts_end)
3828	    ready_stmts_end = 0;
3829	}
3830      else
3831	{
3832	  if (op_index > 1)
3833	    swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
3834	  op2 = ops[op_index--]->op;
3835	  op1 = ops[op_index--]->op;
3836	}
3837
3838      /* If we emit the last statement then we should put
3839	 operands into the last statement.  It will also
3840	 break the loop.  */
3841      if (op_index < 0 && stmt_index == i)
3842	i = stmt_num - 1;
3843
3844      if (dump_file && (dump_flags & TDF_DETAILS))
3845	{
3846	  fprintf (dump_file, "Transforming ");
3847	  print_gimple_stmt (dump_file, stmts[i], 0, 0);
3848	}
3849
3850      /* We keep original statement only for the last one.  All
3851	 others are recreated.  */
3852      if (i == stmt_num - 1)
3853	{
3854	  gimple_assign_set_rhs1 (stmts[i], op1);
3855	  gimple_assign_set_rhs2 (stmts[i], op2);
3856	  update_stmt (stmts[i]);
3857	}
3858      else
3859	stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode);
3860
3861      if (dump_file && (dump_flags & TDF_DETAILS))
3862	{
3863	  fprintf (dump_file, " into ");
3864	  print_gimple_stmt (dump_file, stmts[i], 0, 0);
3865	}
3866    }
3867
3868  remove_visited_stmt_chain (last_rhs1);
3869}
3870
3871/* Transform STMT, which is really (A +B) + (C + D) into the left
3872   linear form, ((A+B)+C)+D.
3873   Recurse on D if necessary.  */
3874
3875static void
3876linearize_expr (gimple stmt)
3877{
3878  gimple_stmt_iterator gsi;
3879  gimple binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3880  gimple binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3881  gimple oldbinrhs = binrhs;
3882  enum tree_code rhscode = gimple_assign_rhs_code (stmt);
3883  gimple newbinrhs = NULL;
3884  struct loop *loop = loop_containing_stmt (stmt);
3885  tree lhs = gimple_assign_lhs (stmt);
3886
3887  gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
3888	      && is_reassociable_op (binrhs, rhscode, loop));
3889
3890  gsi = gsi_for_stmt (stmt);
3891
3892  gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
3893  binrhs = gimple_build_assign (make_ssa_name (TREE_TYPE (lhs)),
3894				gimple_assign_rhs_code (binrhs),
3895				gimple_assign_lhs (binlhs),
3896				gimple_assign_rhs2 (binrhs));
3897  gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
3898  gsi_insert_before (&gsi, binrhs, GSI_SAME_STMT);
3899  gimple_set_uid (binrhs, gimple_uid (stmt));
3900
3901  if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
3902    newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3903
3904  if (dump_file && (dump_flags & TDF_DETAILS))
3905    {
3906      fprintf (dump_file, "Linearized: ");
3907      print_gimple_stmt (dump_file, stmt, 0, 0);
3908    }
3909
3910  reassociate_stats.linearized++;
3911  update_stmt (stmt);
3912
3913  gsi = gsi_for_stmt (oldbinrhs);
3914  reassoc_remove_stmt (&gsi);
3915  release_defs (oldbinrhs);
3916
3917  gimple_set_visited (stmt, true);
3918  gimple_set_visited (binlhs, true);
3919  gimple_set_visited (binrhs, true);
3920
3921  /* Tail recurse on the new rhs if it still needs reassociation.  */
3922  if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
3923    /* ??? This should probably be linearize_expr (newbinrhs) but I don't
3924	   want to change the algorithm while converting to tuples.  */
3925    linearize_expr (stmt);
3926}
3927
3928/* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
3929   it.  Otherwise, return NULL.  */
3930
3931static gimple
3932get_single_immediate_use (tree lhs)
3933{
3934  use_operand_p immuse;
3935  gimple immusestmt;
3936
3937  if (TREE_CODE (lhs) == SSA_NAME
3938      && single_imm_use (lhs, &immuse, &immusestmt)
3939      && is_gimple_assign (immusestmt))
3940    return immusestmt;
3941
3942  return NULL;
3943}
3944
3945/* Recursively negate the value of TONEGATE, and return the SSA_NAME
3946   representing the negated value.  Insertions of any necessary
3947   instructions go before GSI.
3948   This function is recursive in that, if you hand it "a_5" as the
3949   value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
3950   transform b_3 + b_4 into a_5 = -b_3 + -b_4.  */
3951
3952static tree
3953negate_value (tree tonegate, gimple_stmt_iterator *gsip)
3954{
3955  gimple negatedefstmt = NULL;
3956  tree resultofnegate;
3957  gimple_stmt_iterator gsi;
3958  unsigned int uid;
3959
3960  /* If we are trying to negate a name, defined by an add, negate the
3961     add operands instead.  */
3962  if (TREE_CODE (tonegate) == SSA_NAME)
3963    negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
3964  if (TREE_CODE (tonegate) == SSA_NAME
3965      && is_gimple_assign (negatedefstmt)
3966      && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
3967      && has_single_use (gimple_assign_lhs (negatedefstmt))
3968      && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
3969    {
3970      tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
3971      tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
3972      tree lhs = gimple_assign_lhs (negatedefstmt);
3973      gimple g;
3974
3975      gsi = gsi_for_stmt (negatedefstmt);
3976      rhs1 = negate_value (rhs1, &gsi);
3977
3978      gsi = gsi_for_stmt (negatedefstmt);
3979      rhs2 = negate_value (rhs2, &gsi);
3980
3981      gsi = gsi_for_stmt (negatedefstmt);
3982      lhs = make_ssa_name (TREE_TYPE (lhs));
3983      gimple_set_visited (negatedefstmt, true);
3984      g = gimple_build_assign (lhs, PLUS_EXPR, rhs1, rhs2);
3985      gimple_set_uid (g, gimple_uid (negatedefstmt));
3986      gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3987      return lhs;
3988    }
3989
3990  tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
3991  resultofnegate = force_gimple_operand_gsi (gsip, tonegate, true,
3992					     NULL_TREE, true, GSI_SAME_STMT);
3993  gsi = *gsip;
3994  uid = gimple_uid (gsi_stmt (gsi));
3995  for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi))
3996    {
3997      gimple stmt = gsi_stmt (gsi);
3998      if (gimple_uid (stmt) != 0)
3999	break;
4000      gimple_set_uid (stmt, uid);
4001    }
4002  return resultofnegate;
4003}
4004
4005/* Return true if we should break up the subtract in STMT into an add
4006   with negate.  This is true when we the subtract operands are really
4007   adds, or the subtract itself is used in an add expression.  In
4008   either case, breaking up the subtract into an add with negate
4009   exposes the adds to reassociation.  */
4010
4011static bool
4012should_break_up_subtract (gimple stmt)
4013{
4014  tree lhs = gimple_assign_lhs (stmt);
4015  tree binlhs = gimple_assign_rhs1 (stmt);
4016  tree binrhs = gimple_assign_rhs2 (stmt);
4017  gimple immusestmt;
4018  struct loop *loop = loop_containing_stmt (stmt);
4019
4020  if (TREE_CODE (binlhs) == SSA_NAME
4021      && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
4022    return true;
4023
4024  if (TREE_CODE (binrhs) == SSA_NAME
4025      && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
4026    return true;
4027
4028  if (TREE_CODE (lhs) == SSA_NAME
4029      && (immusestmt = get_single_immediate_use (lhs))
4030      && is_gimple_assign (immusestmt)
4031      && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
4032	  ||  gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
4033    return true;
4034  return false;
4035}
4036
4037/* Transform STMT from A - B into A + -B.  */
4038
4039static void
4040break_up_subtract (gimple stmt, gimple_stmt_iterator *gsip)
4041{
4042  tree rhs1 = gimple_assign_rhs1 (stmt);
4043  tree rhs2 = gimple_assign_rhs2 (stmt);
4044
4045  if (dump_file && (dump_flags & TDF_DETAILS))
4046    {
4047      fprintf (dump_file, "Breaking up subtract ");
4048      print_gimple_stmt (dump_file, stmt, 0, 0);
4049    }
4050
4051  rhs2 = negate_value (rhs2, gsip);
4052  gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
4053  update_stmt (stmt);
4054}
4055
4056/* Determine whether STMT is a builtin call that raises an SSA name
4057   to an integer power and has only one use.  If so, and this is early
4058   reassociation and unsafe math optimizations are permitted, place
4059   the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
4060   If any of these conditions does not hold, return FALSE.  */
4061
4062static bool
4063acceptable_pow_call (gimple stmt, tree *base, HOST_WIDE_INT *exponent)
4064{
4065  tree fndecl, arg1;
4066  REAL_VALUE_TYPE c, cint;
4067
4068  if (!first_pass_instance
4069      || !flag_unsafe_math_optimizations
4070      || !is_gimple_call (stmt)
4071      || !has_single_use (gimple_call_lhs (stmt)))
4072    return false;
4073
4074  fndecl = gimple_call_fndecl (stmt);
4075
4076  if (!fndecl
4077      || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
4078    return false;
4079
4080  switch (DECL_FUNCTION_CODE (fndecl))
4081    {
4082    CASE_FLT_FN (BUILT_IN_POW):
4083      if (flag_errno_math)
4084	return false;
4085
4086      *base = gimple_call_arg (stmt, 0);
4087      arg1 = gimple_call_arg (stmt, 1);
4088
4089      if (TREE_CODE (arg1) != REAL_CST)
4090	return false;
4091
4092      c = TREE_REAL_CST (arg1);
4093
4094      if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
4095	return false;
4096
4097      *exponent = real_to_integer (&c);
4098      real_from_integer (&cint, VOIDmode, *exponent, SIGNED);
4099      if (!real_identical (&c, &cint))
4100	return false;
4101
4102      break;
4103
4104    CASE_FLT_FN (BUILT_IN_POWI):
4105      *base = gimple_call_arg (stmt, 0);
4106      arg1 = gimple_call_arg (stmt, 1);
4107
4108      if (!tree_fits_shwi_p (arg1))
4109	return false;
4110
4111      *exponent = tree_to_shwi (arg1);
4112      break;
4113
4114    default:
4115      return false;
4116    }
4117
4118  /* Expanding negative exponents is generally unproductive, so we don't
4119     complicate matters with those.  Exponents of zero and one should
4120     have been handled by expression folding.  */
4121  if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
4122    return false;
4123
4124  return true;
4125}
4126
4127/* Recursively linearize a binary expression that is the RHS of STMT.
4128   Place the operands of the expression tree in the vector named OPS.  */
4129
4130static void
4131linearize_expr_tree (vec<operand_entry_t> *ops, gimple stmt,
4132		     bool is_associative, bool set_visited)
4133{
4134  tree binlhs = gimple_assign_rhs1 (stmt);
4135  tree binrhs = gimple_assign_rhs2 (stmt);
4136  gimple binlhsdef = NULL, binrhsdef = NULL;
4137  bool binlhsisreassoc = false;
4138  bool binrhsisreassoc = false;
4139  enum tree_code rhscode = gimple_assign_rhs_code (stmt);
4140  struct loop *loop = loop_containing_stmt (stmt);
4141  tree base = NULL_TREE;
4142  HOST_WIDE_INT exponent = 0;
4143
4144  if (set_visited)
4145    gimple_set_visited (stmt, true);
4146
4147  if (TREE_CODE (binlhs) == SSA_NAME)
4148    {
4149      binlhsdef = SSA_NAME_DEF_STMT (binlhs);
4150      binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
4151			 && !stmt_could_throw_p (binlhsdef));
4152    }
4153
4154  if (TREE_CODE (binrhs) == SSA_NAME)
4155    {
4156      binrhsdef = SSA_NAME_DEF_STMT (binrhs);
4157      binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
4158			 && !stmt_could_throw_p (binrhsdef));
4159    }
4160
4161  /* If the LHS is not reassociable, but the RHS is, we need to swap
4162     them.  If neither is reassociable, there is nothing we can do, so
4163     just put them in the ops vector.  If the LHS is reassociable,
4164     linearize it.  If both are reassociable, then linearize the RHS
4165     and the LHS.  */
4166
4167  if (!binlhsisreassoc)
4168    {
4169      tree temp;
4170
4171      /* If this is not a associative operation like division, give up.  */
4172      if (!is_associative)
4173	{
4174	  add_to_ops_vec (ops, binrhs);
4175	  return;
4176	}
4177
4178      if (!binrhsisreassoc)
4179	{
4180	  if (rhscode == MULT_EXPR
4181	      && TREE_CODE (binrhs) == SSA_NAME
4182	      && acceptable_pow_call (binrhsdef, &base, &exponent))
4183	    {
4184	      add_repeat_to_ops_vec (ops, base, exponent);
4185	      gimple_set_visited (binrhsdef, true);
4186	    }
4187	  else
4188	    add_to_ops_vec (ops, binrhs);
4189
4190	  if (rhscode == MULT_EXPR
4191	      && TREE_CODE (binlhs) == SSA_NAME
4192	      && acceptable_pow_call (binlhsdef, &base, &exponent))
4193	    {
4194	      add_repeat_to_ops_vec (ops, base, exponent);
4195	      gimple_set_visited (binlhsdef, true);
4196	    }
4197	  else
4198	    add_to_ops_vec (ops, binlhs);
4199
4200	  return;
4201	}
4202
4203      if (dump_file && (dump_flags & TDF_DETAILS))
4204	{
4205	  fprintf (dump_file, "swapping operands of ");
4206	  print_gimple_stmt (dump_file, stmt, 0, 0);
4207	}
4208
4209      swap_ssa_operands (stmt,
4210			 gimple_assign_rhs1_ptr (stmt),
4211			 gimple_assign_rhs2_ptr (stmt));
4212      update_stmt (stmt);
4213
4214      if (dump_file && (dump_flags & TDF_DETAILS))
4215	{
4216	  fprintf (dump_file, " is now ");
4217	  print_gimple_stmt (dump_file, stmt, 0, 0);
4218	}
4219
4220      /* We want to make it so the lhs is always the reassociative op,
4221	 so swap.  */
4222      temp = binlhs;
4223      binlhs = binrhs;
4224      binrhs = temp;
4225    }
4226  else if (binrhsisreassoc)
4227    {
4228      linearize_expr (stmt);
4229      binlhs = gimple_assign_rhs1 (stmt);
4230      binrhs = gimple_assign_rhs2 (stmt);
4231    }
4232
4233  gcc_assert (TREE_CODE (binrhs) != SSA_NAME
4234	      || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
4235				      rhscode, loop));
4236  linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
4237		       is_associative, set_visited);
4238
4239  if (rhscode == MULT_EXPR
4240      && TREE_CODE (binrhs) == SSA_NAME
4241      && acceptable_pow_call (SSA_NAME_DEF_STMT (binrhs), &base, &exponent))
4242    {
4243      add_repeat_to_ops_vec (ops, base, exponent);
4244      gimple_set_visited (SSA_NAME_DEF_STMT (binrhs), true);
4245    }
4246  else
4247    add_to_ops_vec (ops, binrhs);
4248}
4249
4250/* Repropagate the negates back into subtracts, since no other pass
4251   currently does it.  */
4252
4253static void
4254repropagate_negates (void)
4255{
4256  unsigned int i = 0;
4257  tree negate;
4258
4259  FOR_EACH_VEC_ELT (plus_negates, i, negate)
4260    {
4261      gimple user = get_single_immediate_use (negate);
4262
4263      if (!user || !is_gimple_assign (user))
4264	continue;
4265
4266      /* The negate operand can be either operand of a PLUS_EXPR
4267	 (it can be the LHS if the RHS is a constant for example).
4268
4269	 Force the negate operand to the RHS of the PLUS_EXPR, then
4270	 transform the PLUS_EXPR into a MINUS_EXPR.  */
4271      if (gimple_assign_rhs_code (user) == PLUS_EXPR)
4272	{
4273	  /* If the negated operand appears on the LHS of the
4274	     PLUS_EXPR, exchange the operands of the PLUS_EXPR
4275	     to force the negated operand to the RHS of the PLUS_EXPR.  */
4276	  if (gimple_assign_rhs1 (user) == negate)
4277	    {
4278	      swap_ssa_operands (user,
4279				 gimple_assign_rhs1_ptr (user),
4280				 gimple_assign_rhs2_ptr (user));
4281	    }
4282
4283	  /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
4284	     the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR.  */
4285	  if (gimple_assign_rhs2 (user) == negate)
4286	    {
4287	      tree rhs1 = gimple_assign_rhs1 (user);
4288	      tree rhs2 = get_unary_op (negate, NEGATE_EXPR);
4289	      gimple_stmt_iterator gsi = gsi_for_stmt (user);
4290	      gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
4291	      update_stmt (user);
4292	    }
4293	}
4294      else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
4295	{
4296	  if (gimple_assign_rhs1 (user) == negate)
4297	    {
4298	      /* We have
4299	           x = -a
4300		   y = x - b
4301		 which we transform into
4302		   x = a + b
4303		   y = -x .
4304		 This pushes down the negate which we possibly can merge
4305		 into some other operation, hence insert it into the
4306		 plus_negates vector.  */
4307	      gimple feed = SSA_NAME_DEF_STMT (negate);
4308	      tree a = gimple_assign_rhs1 (feed);
4309	      tree b = gimple_assign_rhs2 (user);
4310	      gimple_stmt_iterator gsi = gsi_for_stmt (feed);
4311	      gimple_stmt_iterator gsi2 = gsi_for_stmt (user);
4312	      tree x = make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed)));
4313	      gimple g = gimple_build_assign (x, PLUS_EXPR, a, b);
4314	      gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
4315	      gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, x);
4316	      user = gsi_stmt (gsi2);
4317	      update_stmt (user);
4318	      reassoc_remove_stmt (&gsi);
4319	      release_defs (feed);
4320	      plus_negates.safe_push (gimple_assign_lhs (user));
4321	    }
4322	  else
4323	    {
4324	      /* Transform "x = -a; y = b - x" into "y = b + a", getting
4325	         rid of one operation.  */
4326	      gimple feed = SSA_NAME_DEF_STMT (negate);
4327	      tree a = gimple_assign_rhs1 (feed);
4328	      tree rhs1 = gimple_assign_rhs1 (user);
4329	      gimple_stmt_iterator gsi = gsi_for_stmt (user);
4330	      gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
4331	      update_stmt (gsi_stmt (gsi));
4332	    }
4333	}
4334    }
4335}
4336
4337/* Returns true if OP is of a type for which we can do reassociation.
4338   That is for integral or non-saturating fixed-point types, and for
4339   floating point type when associative-math is enabled.  */
4340
4341static bool
4342can_reassociate_p (tree op)
4343{
4344  tree type = TREE_TYPE (op);
4345  if ((INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
4346      || NON_SAT_FIXED_POINT_TYPE_P (type)
4347      || (flag_associative_math && FLOAT_TYPE_P (type)))
4348    return true;
4349  return false;
4350}
4351
4352/* Break up subtract operations in block BB.
4353
4354   We do this top down because we don't know whether the subtract is
4355   part of a possible chain of reassociation except at the top.
4356
4357   IE given
4358   d = f + g
4359   c = a + e
4360   b = c - d
4361   q = b - r
4362   k = t - q
4363
4364   we want to break up k = t - q, but we won't until we've transformed q
4365   = b - r, which won't be broken up until we transform b = c - d.
4366
4367   En passant, clear the GIMPLE visited flag on every statement
4368   and set UIDs within each basic block.  */
4369
4370static void
4371break_up_subtract_bb (basic_block bb)
4372{
4373  gimple_stmt_iterator gsi;
4374  basic_block son;
4375  unsigned int uid = 1;
4376
4377  for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4378    {
4379      gimple stmt = gsi_stmt (gsi);
4380      gimple_set_visited (stmt, false);
4381      gimple_set_uid (stmt, uid++);
4382
4383      if (!is_gimple_assign (stmt)
4384	  || !can_reassociate_p (gimple_assign_lhs (stmt)))
4385	continue;
4386
4387      /* Look for simple gimple subtract operations.  */
4388      if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
4389	{
4390	  if (!can_reassociate_p (gimple_assign_rhs1 (stmt))
4391	      || !can_reassociate_p (gimple_assign_rhs2 (stmt)))
4392	    continue;
4393
4394	  /* Check for a subtract used only in an addition.  If this
4395	     is the case, transform it into add of a negate for better
4396	     reassociation.  IE transform C = A-B into C = A + -B if C
4397	     is only used in an addition.  */
4398	  if (should_break_up_subtract (stmt))
4399	    break_up_subtract (stmt, &gsi);
4400	}
4401      else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
4402	       && can_reassociate_p (gimple_assign_rhs1 (stmt)))
4403	plus_negates.safe_push (gimple_assign_lhs (stmt));
4404    }
4405  for (son = first_dom_son (CDI_DOMINATORS, bb);
4406       son;
4407       son = next_dom_son (CDI_DOMINATORS, son))
4408    break_up_subtract_bb (son);
4409}
4410
4411/* Used for repeated factor analysis.  */
4412struct repeat_factor_d
4413{
4414  /* An SSA name that occurs in a multiply chain.  */
4415  tree factor;
4416
4417  /* Cached rank of the factor.  */
4418  unsigned rank;
4419
4420  /* Number of occurrences of the factor in the chain.  */
4421  HOST_WIDE_INT count;
4422
4423  /* An SSA name representing the product of this factor and
4424     all factors appearing later in the repeated factor vector.  */
4425  tree repr;
4426};
4427
4428typedef struct repeat_factor_d repeat_factor, *repeat_factor_t;
4429typedef const struct repeat_factor_d *const_repeat_factor_t;
4430
4431
4432static vec<repeat_factor> repeat_factor_vec;
4433
4434/* Used for sorting the repeat factor vector.  Sort primarily by
4435   ascending occurrence count, secondarily by descending rank.  */
4436
4437static int
4438compare_repeat_factors (const void *x1, const void *x2)
4439{
4440  const_repeat_factor_t rf1 = (const_repeat_factor_t) x1;
4441  const_repeat_factor_t rf2 = (const_repeat_factor_t) x2;
4442
4443  if (rf1->count != rf2->count)
4444    return rf1->count - rf2->count;
4445
4446  return rf2->rank - rf1->rank;
4447}
4448
4449/* Look for repeated operands in OPS in the multiply tree rooted at
4450   STMT.  Replace them with an optimal sequence of multiplies and powi
4451   builtin calls, and remove the used operands from OPS.  Return an
4452   SSA name representing the value of the replacement sequence.  */
4453
4454static tree
4455attempt_builtin_powi (gimple stmt, vec<operand_entry_t> *ops)
4456{
4457  unsigned i, j, vec_len;
4458  int ii;
4459  operand_entry_t oe;
4460  repeat_factor_t rf1, rf2;
4461  repeat_factor rfnew;
4462  tree result = NULL_TREE;
4463  tree target_ssa, iter_result;
4464  tree type = TREE_TYPE (gimple_get_lhs (stmt));
4465  tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
4466  gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4467  gimple mul_stmt, pow_stmt;
4468
4469  /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
4470     target.  */
4471  if (!powi_fndecl)
4472    return NULL_TREE;
4473
4474  /* Allocate the repeated factor vector.  */
4475  repeat_factor_vec.create (10);
4476
4477  /* Scan the OPS vector for all SSA names in the product and build
4478     up a vector of occurrence counts for each factor.  */
4479  FOR_EACH_VEC_ELT (*ops, i, oe)
4480    {
4481      if (TREE_CODE (oe->op) == SSA_NAME)
4482	{
4483	  FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4484	    {
4485	      if (rf1->factor == oe->op)
4486		{
4487		  rf1->count += oe->count;
4488		  break;
4489		}
4490	    }
4491
4492	  if (j >= repeat_factor_vec.length ())
4493	    {
4494	      rfnew.factor = oe->op;
4495	      rfnew.rank = oe->rank;
4496	      rfnew.count = oe->count;
4497	      rfnew.repr = NULL_TREE;
4498	      repeat_factor_vec.safe_push (rfnew);
4499	    }
4500	}
4501    }
4502
4503  /* Sort the repeated factor vector by (a) increasing occurrence count,
4504     and (b) decreasing rank.  */
4505  repeat_factor_vec.qsort (compare_repeat_factors);
4506
4507  /* It is generally best to combine as many base factors as possible
4508     into a product before applying __builtin_powi to the result.
4509     However, the sort order chosen for the repeated factor vector
4510     allows us to cache partial results for the product of the base
4511     factors for subsequent use.  When we already have a cached partial
4512     result from a previous iteration, it is best to make use of it
4513     before looking for another __builtin_pow opportunity.
4514
4515     As an example, consider x * x * y * y * y * z * z * z * z.
4516     We want to first compose the product x * y * z, raise it to the
4517     second power, then multiply this by y * z, and finally multiply
4518     by z.  This can be done in 5 multiplies provided we cache y * z
4519     for use in both expressions:
4520
4521        t1 = y * z
4522	t2 = t1 * x
4523	t3 = t2 * t2
4524	t4 = t1 * t3
4525	result = t4 * z
4526
4527     If we instead ignored the cached y * z and first multiplied by
4528     the __builtin_pow opportunity z * z, we would get the inferior:
4529
4530        t1 = y * z
4531	t2 = t1 * x
4532	t3 = t2 * t2
4533	t4 = z * z
4534	t5 = t3 * t4
4535        result = t5 * y  */
4536
4537  vec_len = repeat_factor_vec.length ();
4538
4539  /* Repeatedly look for opportunities to create a builtin_powi call.  */
4540  while (true)
4541    {
4542      HOST_WIDE_INT power;
4543
4544      /* First look for the largest cached product of factors from
4545	 preceding iterations.  If found, create a builtin_powi for
4546	 it if the minimum occurrence count for its factors is at
4547	 least 2, or just use this cached product as our next
4548	 multiplicand if the minimum occurrence count is 1.  */
4549      FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4550	{
4551	  if (rf1->repr && rf1->count > 0)
4552	    break;
4553	}
4554
4555      if (j < vec_len)
4556	{
4557	  power = rf1->count;
4558
4559	  if (power == 1)
4560	    {
4561	      iter_result = rf1->repr;
4562
4563	      if (dump_file && (dump_flags & TDF_DETAILS))
4564		{
4565		  unsigned elt;
4566		  repeat_factor_t rf;
4567		  fputs ("Multiplying by cached product ", dump_file);
4568		  for (elt = j; elt < vec_len; elt++)
4569		    {
4570		      rf = &repeat_factor_vec[elt];
4571		      print_generic_expr (dump_file, rf->factor, 0);
4572		      if (elt < vec_len - 1)
4573			fputs (" * ", dump_file);
4574		    }
4575		  fputs ("\n", dump_file);
4576		}
4577	    }
4578	  else
4579	    {
4580	      iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
4581	      pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
4582					    build_int_cst (integer_type_node,
4583							   power));
4584	      gimple_call_set_lhs (pow_stmt, iter_result);
4585	      gimple_set_location (pow_stmt, gimple_location (stmt));
4586	      gimple_set_uid (pow_stmt, gimple_uid (stmt));
4587	      gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
4588
4589	      if (dump_file && (dump_flags & TDF_DETAILS))
4590		{
4591		  unsigned elt;
4592		  repeat_factor_t rf;
4593		  fputs ("Building __builtin_pow call for cached product (",
4594			 dump_file);
4595		  for (elt = j; elt < vec_len; elt++)
4596		    {
4597		      rf = &repeat_factor_vec[elt];
4598		      print_generic_expr (dump_file, rf->factor, 0);
4599		      if (elt < vec_len - 1)
4600			fputs (" * ", dump_file);
4601		    }
4602		  fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n",
4603			   power);
4604		}
4605	    }
4606	}
4607      else
4608	{
4609	  /* Otherwise, find the first factor in the repeated factor
4610	     vector whose occurrence count is at least 2.  If no such
4611	     factor exists, there are no builtin_powi opportunities
4612	     remaining.  */
4613	  FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4614	    {
4615	      if (rf1->count >= 2)
4616		break;
4617	    }
4618
4619	  if (j >= vec_len)
4620	    break;
4621
4622	  power = rf1->count;
4623
4624	  if (dump_file && (dump_flags & TDF_DETAILS))
4625	    {
4626	      unsigned elt;
4627	      repeat_factor_t rf;
4628	      fputs ("Building __builtin_pow call for (", dump_file);
4629	      for (elt = j; elt < vec_len; elt++)
4630		{
4631		  rf = &repeat_factor_vec[elt];
4632		  print_generic_expr (dump_file, rf->factor, 0);
4633		  if (elt < vec_len - 1)
4634		    fputs (" * ", dump_file);
4635		}
4636	      fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n", power);
4637	    }
4638
4639	  reassociate_stats.pows_created++;
4640
4641	  /* Visit each element of the vector in reverse order (so that
4642	     high-occurrence elements are visited first, and within the
4643	     same occurrence count, lower-ranked elements are visited
4644	     first).  Form a linear product of all elements in this order
4645	     whose occurrencce count is at least that of element J.
4646	     Record the SSA name representing the product of each element
4647	     with all subsequent elements in the vector.  */
4648	  if (j == vec_len - 1)
4649	    rf1->repr = rf1->factor;
4650	  else
4651	    {
4652	      for (ii = vec_len - 2; ii >= (int)j; ii--)
4653		{
4654		  tree op1, op2;
4655
4656		  rf1 = &repeat_factor_vec[ii];
4657		  rf2 = &repeat_factor_vec[ii + 1];
4658
4659		  /* Init the last factor's representative to be itself.  */
4660		  if (!rf2->repr)
4661		    rf2->repr = rf2->factor;
4662
4663		  op1 = rf1->factor;
4664		  op2 = rf2->repr;
4665
4666		  target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
4667		  mul_stmt = gimple_build_assign (target_ssa, MULT_EXPR,
4668						  op1, op2);
4669		  gimple_set_location (mul_stmt, gimple_location (stmt));
4670		  gimple_set_uid (mul_stmt, gimple_uid (stmt));
4671		  gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
4672		  rf1->repr = target_ssa;
4673
4674		  /* Don't reprocess the multiply we just introduced.  */
4675		  gimple_set_visited (mul_stmt, true);
4676		}
4677	    }
4678
4679	  /* Form a call to __builtin_powi for the maximum product
4680	     just formed, raised to the power obtained earlier.  */
4681	  rf1 = &repeat_factor_vec[j];
4682	  iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
4683	  pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
4684					build_int_cst (integer_type_node,
4685						       power));
4686	  gimple_call_set_lhs (pow_stmt, iter_result);
4687	  gimple_set_location (pow_stmt, gimple_location (stmt));
4688	  gimple_set_uid (pow_stmt, gimple_uid (stmt));
4689	  gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
4690	}
4691
4692      /* If we previously formed at least one other builtin_powi call,
4693	 form the product of this one and those others.  */
4694      if (result)
4695	{
4696	  tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
4697	  mul_stmt = gimple_build_assign (new_result, MULT_EXPR,
4698					  result, iter_result);
4699	  gimple_set_location (mul_stmt, gimple_location (stmt));
4700	  gimple_set_uid (mul_stmt, gimple_uid (stmt));
4701	  gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
4702	  gimple_set_visited (mul_stmt, true);
4703	  result = new_result;
4704	}
4705      else
4706	result = iter_result;
4707
4708      /* Decrement the occurrence count of each element in the product
4709	 by the count found above, and remove this many copies of each
4710	 factor from OPS.  */
4711      for (i = j; i < vec_len; i++)
4712	{
4713	  unsigned k = power;
4714	  unsigned n;
4715
4716	  rf1 = &repeat_factor_vec[i];
4717	  rf1->count -= power;
4718
4719	  FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
4720	    {
4721	      if (oe->op == rf1->factor)
4722		{
4723		  if (oe->count <= k)
4724		    {
4725		      ops->ordered_remove (n);
4726		      k -= oe->count;
4727
4728		      if (k == 0)
4729			break;
4730		    }
4731		  else
4732		    {
4733		      oe->count -= k;
4734		      break;
4735		    }
4736		}
4737	    }
4738	}
4739    }
4740
4741  /* At this point all elements in the repeated factor vector have a
4742     remaining occurrence count of 0 or 1, and those with a count of 1
4743     don't have cached representatives.  Re-sort the ops vector and
4744     clean up.  */
4745  ops->qsort (sort_by_operand_rank);
4746  repeat_factor_vec.release ();
4747
4748  /* Return the final product computed herein.  Note that there may
4749     still be some elements with single occurrence count left in OPS;
4750     those will be handled by the normal reassociation logic.  */
4751  return result;
4752}
4753
4754/* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS.  */
4755
4756static void
4757transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple stmt, tree new_rhs)
4758{
4759  tree rhs1;
4760
4761  if (dump_file && (dump_flags & TDF_DETAILS))
4762    {
4763      fprintf (dump_file, "Transforming ");
4764      print_gimple_stmt (dump_file, stmt, 0, 0);
4765    }
4766
4767  rhs1 = gimple_assign_rhs1 (stmt);
4768  gimple_assign_set_rhs_from_tree (gsi, new_rhs);
4769  update_stmt (stmt);
4770  remove_visited_stmt_chain (rhs1);
4771
4772  if (dump_file && (dump_flags & TDF_DETAILS))
4773    {
4774      fprintf (dump_file, " into ");
4775      print_gimple_stmt (dump_file, stmt, 0, 0);
4776    }
4777}
4778
4779/* Transform STMT at *GSI into a multiply of RHS1 and RHS2.  */
4780
4781static void
4782transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple stmt,
4783			    tree rhs1, tree rhs2)
4784{
4785  if (dump_file && (dump_flags & TDF_DETAILS))
4786    {
4787      fprintf (dump_file, "Transforming ");
4788      print_gimple_stmt (dump_file, stmt, 0, 0);
4789    }
4790
4791  gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
4792  update_stmt (gsi_stmt (*gsi));
4793  remove_visited_stmt_chain (rhs1);
4794
4795  if (dump_file && (dump_flags & TDF_DETAILS))
4796    {
4797      fprintf (dump_file, " into ");
4798      print_gimple_stmt (dump_file, stmt, 0, 0);
4799    }
4800}
4801
4802/* Reassociate expressions in basic block BB and its post-dominator as
4803   children.  */
4804
4805static void
4806reassociate_bb (basic_block bb)
4807{
4808  gimple_stmt_iterator gsi;
4809  basic_block son;
4810  gimple stmt = last_stmt (bb);
4811
4812  if (stmt && !gimple_visited_p (stmt))
4813    maybe_optimize_range_tests (stmt);
4814
4815  for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
4816    {
4817      stmt = gsi_stmt (gsi);
4818
4819      if (is_gimple_assign (stmt)
4820	  && !stmt_could_throw_p (stmt))
4821	{
4822	  tree lhs, rhs1, rhs2;
4823	  enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
4824
4825	  /* If this is not a gimple binary expression, there is
4826	     nothing for us to do with it.  */
4827	  if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
4828	    continue;
4829
4830	  /* If this was part of an already processed statement,
4831	     we don't need to touch it again. */
4832	  if (gimple_visited_p (stmt))
4833	    {
4834	      /* This statement might have become dead because of previous
4835		 reassociations.  */
4836	      if (has_zero_uses (gimple_get_lhs (stmt)))
4837		{
4838		  reassoc_remove_stmt (&gsi);
4839		  release_defs (stmt);
4840		  /* We might end up removing the last stmt above which
4841		     places the iterator to the end of the sequence.
4842		     Reset it to the last stmt in this case which might
4843		     be the end of the sequence as well if we removed
4844		     the last statement of the sequence.  In which case
4845		     we need to bail out.  */
4846		  if (gsi_end_p (gsi))
4847		    {
4848		      gsi = gsi_last_bb (bb);
4849		      if (gsi_end_p (gsi))
4850			break;
4851		    }
4852		}
4853	      continue;
4854	    }
4855
4856	  lhs = gimple_assign_lhs (stmt);
4857	  rhs1 = gimple_assign_rhs1 (stmt);
4858	  rhs2 = gimple_assign_rhs2 (stmt);
4859
4860	  /* For non-bit or min/max operations we can't associate
4861	     all types.  Verify that here.  */
4862	  if (rhs_code != BIT_IOR_EXPR
4863	      && rhs_code != BIT_AND_EXPR
4864	      && rhs_code != BIT_XOR_EXPR
4865	      && rhs_code != MIN_EXPR
4866	      && rhs_code != MAX_EXPR
4867	      && (!can_reassociate_p (lhs)
4868		  || !can_reassociate_p (rhs1)
4869		  || !can_reassociate_p (rhs2)))
4870	    continue;
4871
4872	  if (associative_tree_code (rhs_code))
4873	    {
4874	      auto_vec<operand_entry_t> ops;
4875	      tree powi_result = NULL_TREE;
4876
4877	      /* There may be no immediate uses left by the time we
4878		 get here because we may have eliminated them all.  */
4879	      if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
4880		continue;
4881
4882	      gimple_set_visited (stmt, true);
4883	      linearize_expr_tree (&ops, stmt, true, true);
4884	      ops.qsort (sort_by_operand_rank);
4885	      optimize_ops_list (rhs_code, &ops);
4886	      if (undistribute_ops_list (rhs_code, &ops,
4887					 loop_containing_stmt (stmt)))
4888		{
4889		  ops.qsort (sort_by_operand_rank);
4890		  optimize_ops_list (rhs_code, &ops);
4891		}
4892
4893	      if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
4894		optimize_range_tests (rhs_code, &ops);
4895
4896	      if (first_pass_instance
4897		  && rhs_code == MULT_EXPR
4898		  && flag_unsafe_math_optimizations)
4899		powi_result = attempt_builtin_powi (stmt, &ops);
4900
4901	      /* If the operand vector is now empty, all operands were
4902		 consumed by the __builtin_powi optimization.  */
4903	      if (ops.length () == 0)
4904		transform_stmt_to_copy (&gsi, stmt, powi_result);
4905	      else if (ops.length () == 1)
4906		{
4907		  tree last_op = ops.last ()->op;
4908
4909		  if (powi_result)
4910		    transform_stmt_to_multiply (&gsi, stmt, last_op,
4911						powi_result);
4912		  else
4913		    transform_stmt_to_copy (&gsi, stmt, last_op);
4914		}
4915	      else
4916		{
4917		  machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
4918		  int ops_num = ops.length ();
4919		  int width = get_reassociation_width (ops_num, rhs_code, mode);
4920		  tree new_lhs = lhs;
4921
4922		  if (dump_file && (dump_flags & TDF_DETAILS))
4923		    fprintf (dump_file,
4924			     "Width = %d was chosen for reassociation\n", width);
4925
4926		  if (width > 1
4927		      && ops.length () > 3)
4928		    rewrite_expr_tree_parallel (as_a <gassign *> (stmt),
4929						width, ops);
4930		  else
4931                    {
4932                      /* When there are three operands left, we want
4933                         to make sure the ones that get the double
4934                         binary op are chosen wisely.  */
4935                      int len = ops.length ();
4936                      if (len >= 3)
4937                        swap_ops_for_binary_stmt (ops, len - 3, stmt);
4938
4939		      new_lhs = rewrite_expr_tree (stmt, 0, ops,
4940						   powi_result != NULL);
4941                    }
4942
4943		  /* If we combined some repeated factors into a
4944		     __builtin_powi call, multiply that result by the
4945		     reassociated operands.  */
4946		  if (powi_result)
4947		    {
4948		      gimple mul_stmt, lhs_stmt = SSA_NAME_DEF_STMT (lhs);
4949		      tree type = TREE_TYPE (lhs);
4950		      tree target_ssa = make_temp_ssa_name (type, NULL,
4951							    "reassocpow");
4952		      gimple_set_lhs (lhs_stmt, target_ssa);
4953		      update_stmt (lhs_stmt);
4954		      if (lhs != new_lhs)
4955			target_ssa = new_lhs;
4956		      mul_stmt = gimple_build_assign (lhs, MULT_EXPR,
4957						      powi_result, target_ssa);
4958		      gimple_set_location (mul_stmt, gimple_location (stmt));
4959		      gimple_set_uid (mul_stmt, gimple_uid (stmt));
4960		      gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
4961		    }
4962		}
4963	    }
4964	}
4965    }
4966  for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
4967       son;
4968       son = next_dom_son (CDI_POST_DOMINATORS, son))
4969    reassociate_bb (son);
4970}
4971
4972/* Add jumps around shifts for range tests turned into bit tests.
4973   For each SSA_NAME VAR we have code like:
4974   VAR = ...; // final stmt of range comparison
4975   // bit test here...;
4976   OTHERVAR = ...; // final stmt of the bit test sequence
4977   RES = VAR | OTHERVAR;
4978   Turn the above into:
4979   VAR = ...;
4980   if (VAR != 0)
4981     goto <l3>;
4982   else
4983     goto <l2>;
4984   <l2>:
4985   // bit test here...;
4986   OTHERVAR = ...;
4987   <l3>:
4988   # RES = PHI<1(l1), OTHERVAR(l2)>;  */
4989
4990static void
4991branch_fixup (void)
4992{
4993  tree var;
4994  unsigned int i;
4995
4996  FOR_EACH_VEC_ELT (reassoc_branch_fixups, i, var)
4997    {
4998      gimple def_stmt = SSA_NAME_DEF_STMT (var);
4999      gimple use_stmt;
5000      use_operand_p use;
5001      bool ok = single_imm_use (var, &use, &use_stmt);
5002      gcc_assert (ok
5003		  && is_gimple_assign (use_stmt)
5004		  && gimple_assign_rhs_code (use_stmt) == BIT_IOR_EXPR
5005		  && gimple_bb (def_stmt) == gimple_bb (use_stmt));
5006
5007      basic_block cond_bb = gimple_bb (def_stmt);
5008      basic_block then_bb = split_block (cond_bb, def_stmt)->dest;
5009      basic_block merge_bb = split_block (then_bb, use_stmt)->dest;
5010
5011      gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
5012      gimple g = gimple_build_cond (NE_EXPR, var,
5013				    build_zero_cst (TREE_TYPE (var)),
5014				    NULL_TREE, NULL_TREE);
5015      location_t loc = gimple_location (use_stmt);
5016      gimple_set_location (g, loc);
5017      gsi_insert_after (&gsi, g, GSI_NEW_STMT);
5018
5019      edge etrue = make_edge (cond_bb, merge_bb, EDGE_TRUE_VALUE);
5020      etrue->probability = REG_BR_PROB_BASE / 2;
5021      etrue->count = cond_bb->count / 2;
5022      edge efalse = find_edge (cond_bb, then_bb);
5023      efalse->flags = EDGE_FALSE_VALUE;
5024      efalse->probability -= etrue->probability;
5025      efalse->count -= etrue->count;
5026      then_bb->count -= etrue->count;
5027
5028      tree othervar = NULL_TREE;
5029      if (gimple_assign_rhs1 (use_stmt) == var)
5030	othervar = gimple_assign_rhs2 (use_stmt);
5031      else if (gimple_assign_rhs2 (use_stmt) == var)
5032	othervar = gimple_assign_rhs1 (use_stmt);
5033      else
5034	gcc_unreachable ();
5035      tree lhs = gimple_assign_lhs (use_stmt);
5036      gphi *phi = create_phi_node (lhs, merge_bb);
5037      add_phi_arg (phi, build_one_cst (TREE_TYPE (lhs)), etrue, loc);
5038      add_phi_arg (phi, othervar, single_succ_edge (then_bb), loc);
5039      gsi = gsi_for_stmt (use_stmt);
5040      gsi_remove (&gsi, true);
5041
5042      set_immediate_dominator (CDI_DOMINATORS, merge_bb, cond_bb);
5043      set_immediate_dominator (CDI_POST_DOMINATORS, cond_bb, merge_bb);
5044    }
5045  reassoc_branch_fixups.release ();
5046}
5047
5048void dump_ops_vector (FILE *file, vec<operand_entry_t> ops);
5049void debug_ops_vector (vec<operand_entry_t> ops);
5050
5051/* Dump the operand entry vector OPS to FILE.  */
5052
5053void
5054dump_ops_vector (FILE *file, vec<operand_entry_t> ops)
5055{
5056  operand_entry_t oe;
5057  unsigned int i;
5058
5059  FOR_EACH_VEC_ELT (ops, i, oe)
5060    {
5061      fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
5062      print_generic_expr (file, oe->op, 0);
5063    }
5064}
5065
5066/* Dump the operand entry vector OPS to STDERR.  */
5067
5068DEBUG_FUNCTION void
5069debug_ops_vector (vec<operand_entry_t> ops)
5070{
5071  dump_ops_vector (stderr, ops);
5072}
5073
5074static void
5075do_reassoc (void)
5076{
5077  break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun));
5078  reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun));
5079}
5080
5081/* Initialize the reassociation pass.  */
5082
5083static void
5084init_reassoc (void)
5085{
5086  int i;
5087  long rank = 2;
5088  int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
5089
5090  /* Find the loops, so that we can prevent moving calculations in
5091     them.  */
5092  loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
5093
5094  memset (&reassociate_stats, 0, sizeof (reassociate_stats));
5095
5096  operand_entry_pool = create_alloc_pool ("operand entry pool",
5097					  sizeof (struct operand_entry), 30);
5098  next_operand_entry_id = 0;
5099
5100  /* Reverse RPO (Reverse Post Order) will give us something where
5101     deeper loops come later.  */
5102  pre_and_rev_post_order_compute (NULL, bbs, false);
5103  bb_rank = XCNEWVEC (long, last_basic_block_for_fn (cfun));
5104  operand_rank = new hash_map<tree, long>;
5105
5106  /* Give each default definition a distinct rank.  This includes
5107     parameters and the static chain.  Walk backwards over all
5108     SSA names so that we get proper rank ordering according
5109     to tree_swap_operands_p.  */
5110  for (i = num_ssa_names - 1; i > 0; --i)
5111    {
5112      tree name = ssa_name (i);
5113      if (name && SSA_NAME_IS_DEFAULT_DEF (name))
5114	insert_operand_rank (name, ++rank);
5115    }
5116
5117  /* Set up rank for each BB  */
5118  for (i = 0; i < n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; i++)
5119    bb_rank[bbs[i]] = ++rank  << 16;
5120
5121  free (bbs);
5122  calculate_dominance_info (CDI_POST_DOMINATORS);
5123  plus_negates = vNULL;
5124}
5125
5126/* Cleanup after the reassociation pass, and print stats if
5127   requested.  */
5128
5129static void
5130fini_reassoc (void)
5131{
5132  statistics_counter_event (cfun, "Linearized",
5133			    reassociate_stats.linearized);
5134  statistics_counter_event (cfun, "Constants eliminated",
5135			    reassociate_stats.constants_eliminated);
5136  statistics_counter_event (cfun, "Ops eliminated",
5137			    reassociate_stats.ops_eliminated);
5138  statistics_counter_event (cfun, "Statements rewritten",
5139			    reassociate_stats.rewritten);
5140  statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
5141			    reassociate_stats.pows_encountered);
5142  statistics_counter_event (cfun, "Built-in powi calls created",
5143			    reassociate_stats.pows_created);
5144
5145  delete operand_rank;
5146  free_alloc_pool (operand_entry_pool);
5147  free (bb_rank);
5148  plus_negates.release ();
5149  free_dominance_info (CDI_POST_DOMINATORS);
5150  loop_optimizer_finalize ();
5151}
5152
5153/* Gate and execute functions for Reassociation.  */
5154
5155static unsigned int
5156execute_reassoc (void)
5157{
5158  init_reassoc ();
5159
5160  do_reassoc ();
5161  repropagate_negates ();
5162  branch_fixup ();
5163
5164  fini_reassoc ();
5165  return 0;
5166}
5167
5168namespace {
5169
5170const pass_data pass_data_reassoc =
5171{
5172  GIMPLE_PASS, /* type */
5173  "reassoc", /* name */
5174  OPTGROUP_NONE, /* optinfo_flags */
5175  TV_TREE_REASSOC, /* tv_id */
5176  ( PROP_cfg | PROP_ssa ), /* properties_required */
5177  0, /* properties_provided */
5178  0, /* properties_destroyed */
5179  0, /* todo_flags_start */
5180  TODO_update_ssa_only_virtuals, /* todo_flags_finish */
5181};
5182
5183class pass_reassoc : public gimple_opt_pass
5184{
5185public:
5186  pass_reassoc (gcc::context *ctxt)
5187    : gimple_opt_pass (pass_data_reassoc, ctxt)
5188  {}
5189
5190  /* opt_pass methods: */
5191  opt_pass * clone () { return new pass_reassoc (m_ctxt); }
5192  virtual bool gate (function *) { return flag_tree_reassoc != 0; }
5193  virtual unsigned int execute (function *) { return execute_reassoc (); }
5194
5195}; // class pass_reassoc
5196
5197} // anon namespace
5198
5199gimple_opt_pass *
5200make_pass_reassoc (gcc::context *ctxt)
5201{
5202  return new pass_reassoc (ctxt);
5203}
5204