1/* Scalar Replacement of Aggregates (SRA) converts some structure
2   references into scalar references, exposing them to the scalar
3   optimizers.
4   Copyright (C) 2003, 2004, 2005, 2006, 2007
5   Free Software Foundation, Inc.
6   Contributed by Diego Novillo <dnovillo@redhat.com>
7
8This file is part of GCC.
9
10GCC is free software; you can redistribute it and/or modify it
11under the terms of the GNU General Public License as published by the
12Free Software Foundation; either version 2, or (at your option) any
13later version.
14
15GCC is distributed in the hope that it will be useful, but WITHOUT
16ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
17FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
18for more details.
19
20You should have received a copy of the GNU General Public License
21along with GCC; see the file COPYING.  If not, write to the Free
22Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
2302110-1301, USA.  */
24
25#include "config.h"
26#include "system.h"
27#include "coretypes.h"
28#include "tm.h"
29#include "ggc.h"
30#include "tree.h"
31
32/* These RTL headers are needed for basic-block.h.  */
33#include "rtl.h"
34#include "tm_p.h"
35#include "hard-reg-set.h"
36#include "basic-block.h"
37#include "diagnostic.h"
38#include "langhooks.h"
39#include "tree-inline.h"
40#include "tree-flow.h"
41#include "tree-gimple.h"
42#include "tree-dump.h"
43#include "tree-pass.h"
44#include "timevar.h"
45#include "flags.h"
46#include "bitmap.h"
47#include "obstack.h"
48#include "target.h"
49/* expr.h is needed for MOVE_RATIO.  */
50#include "expr.h"
51#include "params.h"
52
53
54/* This object of this pass is to replace a non-addressable aggregate with a
55   set of independent variables.  Most of the time, all of these variables
56   will be scalars.  But a secondary objective is to break up larger
57   aggregates into smaller aggregates.  In the process we may find that some
58   bits of the larger aggregate can be deleted as unreferenced.
59
60   This substitution is done globally.  More localized substitutions would
61   be the purvey of a load-store motion pass.
62
63   The optimization proceeds in phases:
64
65     (1) Identify variables that have types that are candidates for
66	 decomposition.
67
68     (2) Scan the function looking for the ways these variables are used.
69	 In particular we're interested in the number of times a variable
70	 (or member) is needed as a complete unit, and the number of times
71	 a variable (or member) is copied.
72
73     (3) Based on the usage profile, instantiate substitution variables.
74
75     (4) Scan the function making replacements.
76*/
77
78
79/* The set of todo flags to return from tree_sra.  */
80static unsigned int todoflags;
81
82/* The set of aggregate variables that are candidates for scalarization.  */
83static bitmap sra_candidates;
84
85/* Set of scalarizable PARM_DECLs that need copy-in operations at the
86   beginning of the function.  */
87static bitmap needs_copy_in;
88
89/* Sets of bit pairs that cache type decomposition and instantiation.  */
90static bitmap sra_type_decomp_cache;
91static bitmap sra_type_inst_cache;
92
93/* One of these structures is created for each candidate aggregate and
94   each (accessed) member or group of members of such an aggregate.  */
95struct sra_elt
96{
97  /* A tree of the elements.  Used when we want to traverse everything.  */
98  struct sra_elt *parent;
99  struct sra_elt *groups;
100  struct sra_elt *children;
101  struct sra_elt *sibling;
102
103  /* If this element is a root, then this is the VAR_DECL.  If this is
104     a sub-element, this is some token used to identify the reference.
105     In the case of COMPONENT_REF, this is the FIELD_DECL.  In the case
106     of an ARRAY_REF, this is the (constant) index.  In the case of an
107     ARRAY_RANGE_REF, this is the (constant) RANGE_EXPR.  In the case
108     of a complex number, this is a zero or one.  */
109  tree element;
110
111  /* The type of the element.  */
112  tree type;
113
114  /* A VAR_DECL, for any sub-element we've decided to replace.  */
115  tree replacement;
116
117  /* The number of times the element is referenced as a whole.  I.e.
118     given "a.b.c", this would be incremented for C, but not for A or B.  */
119  unsigned int n_uses;
120
121  /* The number of times the element is copied to or from another
122     scalarizable element.  */
123  unsigned int n_copies;
124
125  /* True if TYPE is scalar.  */
126  bool is_scalar;
127
128  /* True if this element is a group of members of its parent.  */
129  bool is_group;
130
131  /* True if we saw something about this element that prevents scalarization,
132     such as non-constant indexing.  */
133  bool cannot_scalarize;
134
135  /* True if we've decided that structure-to-structure assignment
136     should happen via memcpy and not per-element.  */
137  bool use_block_copy;
138
139  /* True if everything under this element has been marked TREE_NO_WARNING.  */
140  bool all_no_warning;
141
142  /* A flag for use with/after random access traversals.  */
143  bool visited;
144};
145
146#define IS_ELEMENT_FOR_GROUP(ELEMENT) (TREE_CODE (ELEMENT) == RANGE_EXPR)
147
148#define FOR_EACH_ACTUAL_CHILD(CHILD, ELT)			\
149  for ((CHILD) = (ELT)->is_group				\
150		 ? next_child_for_group (NULL, (ELT))		\
151		 : (ELT)->children;				\
152       (CHILD);							\
153       (CHILD) = (ELT)->is_group				\
154		 ? next_child_for_group ((CHILD), (ELT))	\
155		 : (CHILD)->sibling)
156
157/* Helper function for above macro.  Return next child in group.  */
158static struct sra_elt *
159next_child_for_group (struct sra_elt *child, struct sra_elt *group)
160{
161  gcc_assert (group->is_group);
162
163  /* Find the next child in the parent.  */
164  if (child)
165    child = child->sibling;
166  else
167    child = group->parent->children;
168
169  /* Skip siblings that do not belong to the group.  */
170  while (child)
171    {
172      tree g_elt = group->element;
173      if (TREE_CODE (g_elt) == RANGE_EXPR)
174	{
175	  if (!tree_int_cst_lt (child->element, TREE_OPERAND (g_elt, 0))
176	      && !tree_int_cst_lt (TREE_OPERAND (g_elt, 1), child->element))
177	    break;
178	}
179      else
180	gcc_unreachable ();
181
182      child = child->sibling;
183    }
184
185  return child;
186}
187
188/* Random access to the child of a parent is performed by hashing.
189   This prevents quadratic behavior, and allows SRA to function
190   reasonably on larger records.  */
191static htab_t sra_map;
192
193/* All structures are allocated out of the following obstack.  */
194static struct obstack sra_obstack;
195
196/* Debugging functions.  */
197static void dump_sra_elt_name (FILE *, struct sra_elt *);
198extern void debug_sra_elt_name (struct sra_elt *);
199
200/* Forward declarations.  */
201static tree generate_element_ref (struct sra_elt *);
202
203/* Return true if DECL is an SRA candidate.  */
204
205static bool
206is_sra_candidate_decl (tree decl)
207{
208  return DECL_P (decl) && bitmap_bit_p (sra_candidates, DECL_UID (decl));
209}
210
211/* Return true if TYPE is a scalar type.  */
212
213static bool
214is_sra_scalar_type (tree type)
215{
216  enum tree_code code = TREE_CODE (type);
217  return (code == INTEGER_TYPE || code == REAL_TYPE || code == VECTOR_TYPE
218	  || code == ENUMERAL_TYPE || code == BOOLEAN_TYPE
219	  || code == POINTER_TYPE || code == OFFSET_TYPE
220	  || code == REFERENCE_TYPE);
221}
222
223/* Return true if TYPE can be decomposed into a set of independent variables.
224
225   Note that this doesn't imply that all elements of TYPE can be
226   instantiated, just that if we decide to break up the type into
227   separate pieces that it can be done.  */
228
229bool
230sra_type_can_be_decomposed_p (tree type)
231{
232  unsigned int cache = TYPE_UID (TYPE_MAIN_VARIANT (type)) * 2;
233  tree t;
234
235  /* Avoid searching the same type twice.  */
236  if (bitmap_bit_p (sra_type_decomp_cache, cache+0))
237    return true;
238  if (bitmap_bit_p (sra_type_decomp_cache, cache+1))
239    return false;
240
241  /* The type must have a definite nonzero size.  */
242  if (TYPE_SIZE (type) == NULL || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
243      || integer_zerop (TYPE_SIZE (type)))
244    goto fail;
245
246  /* The type must be a non-union aggregate.  */
247  switch (TREE_CODE (type))
248    {
249    case RECORD_TYPE:
250      {
251	bool saw_one_field = false;
252
253	for (t = TYPE_FIELDS (type); t ; t = TREE_CHAIN (t))
254	  if (TREE_CODE (t) == FIELD_DECL)
255	    {
256	      /* Reject incorrectly represented bit fields.  */
257	      if (DECL_BIT_FIELD (t)
258		  && (tree_low_cst (DECL_SIZE (t), 1)
259		      != TYPE_PRECISION (TREE_TYPE (t))))
260		goto fail;
261
262	      saw_one_field = true;
263	    }
264
265	/* Record types must have at least one field.  */
266	if (!saw_one_field)
267	  goto fail;
268      }
269      break;
270
271    case ARRAY_TYPE:
272      /* Array types must have a fixed lower and upper bound.  */
273      t = TYPE_DOMAIN (type);
274      if (t == NULL)
275	goto fail;
276      if (TYPE_MIN_VALUE (t) == NULL || !TREE_CONSTANT (TYPE_MIN_VALUE (t)))
277	goto fail;
278      if (TYPE_MAX_VALUE (t) == NULL || !TREE_CONSTANT (TYPE_MAX_VALUE (t)))
279	goto fail;
280      break;
281
282    case COMPLEX_TYPE:
283      break;
284
285    default:
286      goto fail;
287    }
288
289  bitmap_set_bit (sra_type_decomp_cache, cache+0);
290  return true;
291
292 fail:
293  bitmap_set_bit (sra_type_decomp_cache, cache+1);
294  return false;
295}
296
297/* Return true if DECL can be decomposed into a set of independent
298   (though not necessarily scalar) variables.  */
299
300static bool
301decl_can_be_decomposed_p (tree var)
302{
303  /* Early out for scalars.  */
304  if (is_sra_scalar_type (TREE_TYPE (var)))
305    return false;
306
307  /* The variable must not be aliased.  */
308  if (!is_gimple_non_addressable (var))
309    {
310      if (dump_file && (dump_flags & TDF_DETAILS))
311	{
312	  fprintf (dump_file, "Cannot scalarize variable ");
313	  print_generic_expr (dump_file, var, dump_flags);
314	  fprintf (dump_file, " because it must live in memory\n");
315	}
316      return false;
317    }
318
319  /* The variable must not be volatile.  */
320  if (TREE_THIS_VOLATILE (var))
321    {
322      if (dump_file && (dump_flags & TDF_DETAILS))
323	{
324	  fprintf (dump_file, "Cannot scalarize variable ");
325	  print_generic_expr (dump_file, var, dump_flags);
326	  fprintf (dump_file, " because it is declared volatile\n");
327	}
328      return false;
329    }
330
331  /* We must be able to decompose the variable's type.  */
332  if (!sra_type_can_be_decomposed_p (TREE_TYPE (var)))
333    {
334      if (dump_file && (dump_flags & TDF_DETAILS))
335	{
336	  fprintf (dump_file, "Cannot scalarize variable ");
337	  print_generic_expr (dump_file, var, dump_flags);
338	  fprintf (dump_file, " because its type cannot be decomposed\n");
339	}
340      return false;
341    }
342
343  return true;
344}
345
346/* Return true if TYPE can be *completely* decomposed into scalars.  */
347
348static bool
349type_can_instantiate_all_elements (tree type)
350{
351  if (is_sra_scalar_type (type))
352    return true;
353  if (!sra_type_can_be_decomposed_p (type))
354    return false;
355
356  switch (TREE_CODE (type))
357    {
358    case RECORD_TYPE:
359      {
360	unsigned int cache = TYPE_UID (TYPE_MAIN_VARIANT (type)) * 2;
361	tree f;
362
363	if (bitmap_bit_p (sra_type_inst_cache, cache+0))
364	  return true;
365	if (bitmap_bit_p (sra_type_inst_cache, cache+1))
366	  return false;
367
368	for (f = TYPE_FIELDS (type); f ; f = TREE_CHAIN (f))
369	  if (TREE_CODE (f) == FIELD_DECL)
370	    {
371	      if (!type_can_instantiate_all_elements (TREE_TYPE (f)))
372		{
373		  bitmap_set_bit (sra_type_inst_cache, cache+1);
374		  return false;
375		}
376	    }
377
378	bitmap_set_bit (sra_type_inst_cache, cache+0);
379	return true;
380      }
381
382    case ARRAY_TYPE:
383      return type_can_instantiate_all_elements (TREE_TYPE (type));
384
385    case COMPLEX_TYPE:
386      return true;
387
388    default:
389      gcc_unreachable ();
390    }
391}
392
393/* Test whether ELT or some sub-element cannot be scalarized.  */
394
395static bool
396can_completely_scalarize_p (struct sra_elt *elt)
397{
398  struct sra_elt *c;
399
400  if (elt->cannot_scalarize)
401    return false;
402
403  for (c = elt->children; c; c = c->sibling)
404    if (!can_completely_scalarize_p (c))
405      return false;
406
407  for (c = elt->groups; c; c = c->sibling)
408    if (!can_completely_scalarize_p (c))
409      return false;
410
411  return true;
412}
413
414
415/* A simplified tree hashing algorithm that only handles the types of
416   trees we expect to find in sra_elt->element.  */
417
418static hashval_t
419sra_hash_tree (tree t)
420{
421  hashval_t h;
422
423  switch (TREE_CODE (t))
424    {
425    case VAR_DECL:
426    case PARM_DECL:
427    case RESULT_DECL:
428      h = DECL_UID (t);
429      break;
430
431    case INTEGER_CST:
432      h = TREE_INT_CST_LOW (t) ^ TREE_INT_CST_HIGH (t);
433      break;
434
435    case RANGE_EXPR:
436      h = iterative_hash_expr (TREE_OPERAND (t, 0), 0);
437      h = iterative_hash_expr (TREE_OPERAND (t, 1), h);
438      break;
439
440    case FIELD_DECL:
441      /* We can have types that are compatible, but have different member
442	 lists, so we can't hash fields by ID.  Use offsets instead.  */
443      h = iterative_hash_expr (DECL_FIELD_OFFSET (t), 0);
444      h = iterative_hash_expr (DECL_FIELD_BIT_OFFSET (t), h);
445      break;
446
447    default:
448      gcc_unreachable ();
449    }
450
451  return h;
452}
453
454/* Hash function for type SRA_PAIR.  */
455
456static hashval_t
457sra_elt_hash (const void *x)
458{
459  const struct sra_elt *e = x;
460  const struct sra_elt *p;
461  hashval_t h;
462
463  h = sra_hash_tree (e->element);
464
465  /* Take into account everything back up the chain.  Given that chain
466     lengths are rarely very long, this should be acceptable.  If we
467     truly identify this as a performance problem, it should work to
468     hash the pointer value "e->parent".  */
469  for (p = e->parent; p ; p = p->parent)
470    h = (h * 65521) ^ sra_hash_tree (p->element);
471
472  return h;
473}
474
475/* Equality function for type SRA_PAIR.  */
476
477static int
478sra_elt_eq (const void *x, const void *y)
479{
480  const struct sra_elt *a = x;
481  const struct sra_elt *b = y;
482  tree ae, be;
483
484  if (a->parent != b->parent)
485    return false;
486
487  ae = a->element;
488  be = b->element;
489
490  if (ae == be)
491    return true;
492  if (TREE_CODE (ae) != TREE_CODE (be))
493    return false;
494
495  switch (TREE_CODE (ae))
496    {
497    case VAR_DECL:
498    case PARM_DECL:
499    case RESULT_DECL:
500      /* These are all pointer unique.  */
501      return false;
502
503    case INTEGER_CST:
504      /* Integers are not pointer unique, so compare their values.  */
505      return tree_int_cst_equal (ae, be);
506
507    case RANGE_EXPR:
508      return
509	tree_int_cst_equal (TREE_OPERAND (ae, 0), TREE_OPERAND (be, 0))
510	&& tree_int_cst_equal (TREE_OPERAND (ae, 1), TREE_OPERAND (be, 1));
511
512    case FIELD_DECL:
513      /* Fields are unique within a record, but not between
514	 compatible records.  */
515      if (DECL_FIELD_CONTEXT (ae) == DECL_FIELD_CONTEXT (be))
516	return false;
517      return fields_compatible_p (ae, be);
518
519    default:
520      gcc_unreachable ();
521    }
522}
523
524/* Create or return the SRA_ELT structure for CHILD in PARENT.  PARENT
525   may be null, in which case CHILD must be a DECL.  */
526
527static struct sra_elt *
528lookup_element (struct sra_elt *parent, tree child, tree type,
529		enum insert_option insert)
530{
531  struct sra_elt dummy;
532  struct sra_elt **slot;
533  struct sra_elt *elt;
534
535  if (parent)
536    dummy.parent = parent->is_group ? parent->parent : parent;
537  else
538    dummy.parent = NULL;
539  dummy.element = child;
540
541  slot = (struct sra_elt **) htab_find_slot (sra_map, &dummy, insert);
542  if (!slot && insert == NO_INSERT)
543    return NULL;
544
545  elt = *slot;
546  if (!elt && insert == INSERT)
547    {
548      *slot = elt = obstack_alloc (&sra_obstack, sizeof (*elt));
549      memset (elt, 0, sizeof (*elt));
550
551      elt->parent = parent;
552      elt->element = child;
553      elt->type = type;
554      elt->is_scalar = is_sra_scalar_type (type);
555
556      if (parent)
557	{
558	  if (IS_ELEMENT_FOR_GROUP (elt->element))
559	    {
560	      elt->is_group = true;
561	      elt->sibling = parent->groups;
562	      parent->groups = elt;
563	    }
564	  else
565	    {
566	      elt->sibling = parent->children;
567	      parent->children = elt;
568	    }
569	}
570
571      /* If this is a parameter, then if we want to scalarize, we have
572	 one copy from the true function parameter.  Count it now.  */
573      if (TREE_CODE (child) == PARM_DECL)
574	{
575	  elt->n_copies = 1;
576	  bitmap_set_bit (needs_copy_in, DECL_UID (child));
577	}
578    }
579
580  return elt;
581}
582
583/* Create or return the SRA_ELT structure for EXPR if the expression
584   refers to a scalarizable variable.  */
585
586static struct sra_elt *
587maybe_lookup_element_for_expr (tree expr)
588{
589  struct sra_elt *elt;
590  tree child;
591
592  switch (TREE_CODE (expr))
593    {
594    case VAR_DECL:
595    case PARM_DECL:
596    case RESULT_DECL:
597      if (is_sra_candidate_decl (expr))
598	return lookup_element (NULL, expr, TREE_TYPE (expr), INSERT);
599      return NULL;
600
601    case ARRAY_REF:
602      /* We can't scalarize variable array indices.  */
603      if (in_array_bounds_p (expr))
604        child = TREE_OPERAND (expr, 1);
605      else
606	return NULL;
607      break;
608
609    case ARRAY_RANGE_REF:
610      /* We can't scalarize variable array indices.  */
611      if (range_in_array_bounds_p (expr))
612	{
613	  tree domain = TYPE_DOMAIN (TREE_TYPE (expr));
614	  child = build2 (RANGE_EXPR, integer_type_node,
615			  TYPE_MIN_VALUE (domain), TYPE_MAX_VALUE (domain));
616	}
617      else
618	return NULL;
619      break;
620
621    case COMPONENT_REF:
622      /* Don't look through unions.  */
623      if (TREE_CODE (TREE_TYPE (TREE_OPERAND (expr, 0))) != RECORD_TYPE)
624	return NULL;
625      child = TREE_OPERAND (expr, 1);
626      break;
627
628    case REALPART_EXPR:
629      child = integer_zero_node;
630      break;
631    case IMAGPART_EXPR:
632      child = integer_one_node;
633      break;
634
635    default:
636      return NULL;
637    }
638
639  elt = maybe_lookup_element_for_expr (TREE_OPERAND (expr, 0));
640  if (elt)
641    return lookup_element (elt, child, TREE_TYPE (expr), INSERT);
642  return NULL;
643}
644
645
646/* Functions to walk just enough of the tree to see all scalarizable
647   references, and categorize them.  */
648
649/* A set of callbacks for phases 2 and 4.  They'll be invoked for the
650   various kinds of references seen.  In all cases, *BSI is an iterator
651   pointing to the statement being processed.  */
652struct sra_walk_fns
653{
654  /* Invoked when ELT is required as a unit.  Note that ELT might refer to
655     a leaf node, in which case this is a simple scalar reference.  *EXPR_P
656     points to the location of the expression.  IS_OUTPUT is true if this
657     is a left-hand-side reference.  USE_ALL is true if we saw something we
658     couldn't quite identify and had to force the use of the entire object.  */
659  void (*use) (struct sra_elt *elt, tree *expr_p,
660	       block_stmt_iterator *bsi, bool is_output, bool use_all);
661
662  /* Invoked when we have a copy between two scalarizable references.  */
663  void (*copy) (struct sra_elt *lhs_elt, struct sra_elt *rhs_elt,
664		block_stmt_iterator *bsi);
665
666  /* Invoked when ELT is initialized from a constant.  VALUE may be NULL,
667     in which case it should be treated as an empty CONSTRUCTOR.  */
668  void (*init) (struct sra_elt *elt, tree value, block_stmt_iterator *bsi);
669
670  /* Invoked when we have a copy between one scalarizable reference ELT
671     and one non-scalarizable reference OTHER without side-effects.
672     IS_OUTPUT is true if ELT is on the left-hand side.  */
673  void (*ldst) (struct sra_elt *elt, tree other,
674		block_stmt_iterator *bsi, bool is_output);
675
676  /* True during phase 2, false during phase 4.  */
677  /* ??? This is a hack.  */
678  bool initial_scan;
679};
680
681#ifdef ENABLE_CHECKING
682/* Invoked via walk_tree, if *TP contains a candidate decl, return it.  */
683
684static tree
685sra_find_candidate_decl (tree *tp, int *walk_subtrees,
686			 void *data ATTRIBUTE_UNUSED)
687{
688  tree t = *tp;
689  enum tree_code code = TREE_CODE (t);
690
691  if (code == VAR_DECL || code == PARM_DECL || code == RESULT_DECL)
692    {
693      *walk_subtrees = 0;
694      if (is_sra_candidate_decl (t))
695	return t;
696    }
697  else if (TYPE_P (t))
698    *walk_subtrees = 0;
699
700  return NULL;
701}
702#endif
703
704/* Walk most expressions looking for a scalarizable aggregate.
705   If we find one, invoke FNS->USE.  */
706
707static void
708sra_walk_expr (tree *expr_p, block_stmt_iterator *bsi, bool is_output,
709	       const struct sra_walk_fns *fns)
710{
711  tree expr = *expr_p;
712  tree inner = expr;
713  bool disable_scalarization = false;
714  bool use_all_p = false;
715
716  /* We're looking to collect a reference expression between EXPR and INNER,
717     such that INNER is a scalarizable decl and all other nodes through EXPR
718     are references that we can scalarize.  If we come across something that
719     we can't scalarize, we reset EXPR.  This has the effect of making it
720     appear that we're referring to the larger expression as a whole.  */
721
722  while (1)
723    switch (TREE_CODE (inner))
724      {
725      case VAR_DECL:
726      case PARM_DECL:
727      case RESULT_DECL:
728	/* If there is a scalarizable decl at the bottom, then process it.  */
729	if (is_sra_candidate_decl (inner))
730	  {
731	    struct sra_elt *elt = maybe_lookup_element_for_expr (expr);
732	    if (disable_scalarization)
733	      elt->cannot_scalarize = true;
734	    else
735	      fns->use (elt, expr_p, bsi, is_output, use_all_p);
736	  }
737	return;
738
739      case ARRAY_REF:
740	/* Non-constant index means any member may be accessed.  Prevent the
741	   expression from being scalarized.  If we were to treat this as a
742	   reference to the whole array, we can wind up with a single dynamic
743	   index reference inside a loop being overridden by several constant
744	   index references during loop setup.  It's possible that this could
745	   be avoided by using dynamic usage counts based on BB trip counts
746	   (based on loop analysis or profiling), but that hardly seems worth
747	   the effort.  */
748	/* ??? Hack.  Figure out how to push this into the scan routines
749	   without duplicating too much code.  */
750	if (!in_array_bounds_p (inner))
751	  {
752	    disable_scalarization = true;
753	    goto use_all;
754	  }
755	/* ??? Are we assured that non-constant bounds and stride will have
756	   the same value everywhere?  I don't think Fortran will...  */
757	if (TREE_OPERAND (inner, 2) || TREE_OPERAND (inner, 3))
758	  goto use_all;
759	inner = TREE_OPERAND (inner, 0);
760	break;
761
762      case ARRAY_RANGE_REF:
763	if (!range_in_array_bounds_p (inner))
764	  {
765	    disable_scalarization = true;
766	    goto use_all;
767	  }
768	/* ??? See above non-constant bounds and stride .  */
769	if (TREE_OPERAND (inner, 2) || TREE_OPERAND (inner, 3))
770	  goto use_all;
771	inner = TREE_OPERAND (inner, 0);
772	break;
773
774      case COMPONENT_REF:
775	/* A reference to a union member constitutes a reference to the
776	   entire union.  */
777	if (TREE_CODE (TREE_TYPE (TREE_OPERAND (inner, 0))) != RECORD_TYPE)
778	  goto use_all;
779	/* ??? See above re non-constant stride.  */
780	if (TREE_OPERAND (inner, 2))
781	  goto use_all;
782	inner = TREE_OPERAND (inner, 0);
783	break;
784
785      case REALPART_EXPR:
786      case IMAGPART_EXPR:
787	inner = TREE_OPERAND (inner, 0);
788	break;
789
790      case BIT_FIELD_REF:
791	/* A bit field reference (access to *multiple* fields simultaneously)
792	   is not currently scalarized.  Consider this an access to the
793	   complete outer element, to which walk_tree will bring us next.  */
794	goto use_all;
795
796      case VIEW_CONVERT_EXPR:
797      case NOP_EXPR:
798	/* Similarly, a view/nop explicitly wants to look at an object in a
799	   type other than the one we've scalarized.  */
800	goto use_all;
801
802      case WITH_SIZE_EXPR:
803	/* This is a transparent wrapper.  The entire inner expression really
804	   is being used.  */
805	goto use_all;
806
807      use_all:
808        expr_p = &TREE_OPERAND (inner, 0);
809	inner = expr = *expr_p;
810	use_all_p = true;
811	break;
812
813      default:
814#ifdef ENABLE_CHECKING
815	/* Validate that we're not missing any references.  */
816	gcc_assert (!walk_tree (&inner, sra_find_candidate_decl, NULL, NULL));
817#endif
818	return;
819      }
820}
821
822/* Walk a TREE_LIST of values looking for scalarizable aggregates.
823   If we find one, invoke FNS->USE.  */
824
825static void
826sra_walk_tree_list (tree list, block_stmt_iterator *bsi, bool is_output,
827		    const struct sra_walk_fns *fns)
828{
829  tree op;
830  for (op = list; op ; op = TREE_CHAIN (op))
831    sra_walk_expr (&TREE_VALUE (op), bsi, is_output, fns);
832}
833
834/* Walk the arguments of a CALL_EXPR looking for scalarizable aggregates.
835   If we find one, invoke FNS->USE.  */
836
837static void
838sra_walk_call_expr (tree expr, block_stmt_iterator *bsi,
839		    const struct sra_walk_fns *fns)
840{
841  sra_walk_tree_list (TREE_OPERAND (expr, 1), bsi, false, fns);
842}
843
844/* Walk the inputs and outputs of an ASM_EXPR looking for scalarizable
845   aggregates.  If we find one, invoke FNS->USE.  */
846
847static void
848sra_walk_asm_expr (tree expr, block_stmt_iterator *bsi,
849		   const struct sra_walk_fns *fns)
850{
851  sra_walk_tree_list (ASM_INPUTS (expr), bsi, false, fns);
852  sra_walk_tree_list (ASM_OUTPUTS (expr), bsi, true, fns);
853}
854
855/* Walk a MODIFY_EXPR and categorize the assignment appropriately.  */
856
857static void
858sra_walk_modify_expr (tree expr, block_stmt_iterator *bsi,
859		      const struct sra_walk_fns *fns)
860{
861  struct sra_elt *lhs_elt, *rhs_elt;
862  tree lhs, rhs;
863
864  lhs = TREE_OPERAND (expr, 0);
865  rhs = TREE_OPERAND (expr, 1);
866  lhs_elt = maybe_lookup_element_for_expr (lhs);
867  rhs_elt = maybe_lookup_element_for_expr (rhs);
868
869  /* If both sides are scalarizable, this is a COPY operation.  */
870  if (lhs_elt && rhs_elt)
871    {
872      fns->copy (lhs_elt, rhs_elt, bsi);
873      return;
874    }
875
876  /* If the RHS is scalarizable, handle it.  There are only two cases.  */
877  if (rhs_elt)
878    {
879      if (!rhs_elt->is_scalar && !TREE_SIDE_EFFECTS (lhs))
880	fns->ldst (rhs_elt, lhs, bsi, false);
881      else
882	fns->use (rhs_elt, &TREE_OPERAND (expr, 1), bsi, false, false);
883    }
884
885  /* If it isn't scalarizable, there may be scalarizable variables within, so
886     check for a call or else walk the RHS to see if we need to do any
887     copy-in operations.  We need to do it before the LHS is scalarized so
888     that the statements get inserted in the proper place, before any
889     copy-out operations.  */
890  else
891    {
892      tree call = get_call_expr_in (rhs);
893      if (call)
894	sra_walk_call_expr (call, bsi, fns);
895      else
896	sra_walk_expr (&TREE_OPERAND (expr, 1), bsi, false, fns);
897    }
898
899  /* Likewise, handle the LHS being scalarizable.  We have cases similar
900     to those above, but also want to handle RHS being constant.  */
901  if (lhs_elt)
902    {
903      /* If this is an assignment from a constant, or constructor, then
904	 we have access to all of the elements individually.  Invoke INIT.  */
905      if (TREE_CODE (rhs) == COMPLEX_EXPR
906	  || TREE_CODE (rhs) == COMPLEX_CST
907	  || TREE_CODE (rhs) == CONSTRUCTOR)
908	fns->init (lhs_elt, rhs, bsi);
909
910      /* If this is an assignment from read-only memory, treat this as if
911	 we'd been passed the constructor directly.  Invoke INIT.  */
912      else if (TREE_CODE (rhs) == VAR_DECL
913	       && TREE_STATIC (rhs)
914	       && TREE_READONLY (rhs)
915	       && targetm.binds_local_p (rhs))
916	fns->init (lhs_elt, DECL_INITIAL (rhs), bsi);
917
918      /* If this is a copy from a non-scalarizable lvalue, invoke LDST.
919	 The lvalue requirement prevents us from trying to directly scalarize
920	 the result of a function call.  Which would result in trying to call
921	 the function multiple times, and other evil things.  */
922      else if (!lhs_elt->is_scalar
923	       && !TREE_SIDE_EFFECTS (rhs) && is_gimple_addressable (rhs))
924	fns->ldst (lhs_elt, rhs, bsi, true);
925
926      /* Otherwise we're being used in some context that requires the
927	 aggregate to be seen as a whole.  Invoke USE.  */
928      else
929	fns->use (lhs_elt, &TREE_OPERAND (expr, 0), bsi, true, false);
930    }
931
932  /* Similarly to above, LHS_ELT being null only means that the LHS as a
933     whole is not a scalarizable reference.  There may be occurrences of
934     scalarizable variables within, which implies a USE.  */
935  else
936    sra_walk_expr (&TREE_OPERAND (expr, 0), bsi, true, fns);
937}
938
939/* Entry point to the walk functions.  Search the entire function,
940   invoking the callbacks in FNS on each of the references to
941   scalarizable variables.  */
942
943static void
944sra_walk_function (const struct sra_walk_fns *fns)
945{
946  basic_block bb;
947  block_stmt_iterator si, ni;
948
949  /* ??? Phase 4 could derive some benefit to walking the function in
950     dominator tree order.  */
951
952  FOR_EACH_BB (bb)
953    for (si = bsi_start (bb); !bsi_end_p (si); si = ni)
954      {
955	tree stmt, t;
956	stmt_ann_t ann;
957
958	stmt = bsi_stmt (si);
959	ann = stmt_ann (stmt);
960
961	ni = si;
962	bsi_next (&ni);
963
964	/* If the statement has no virtual operands, then it doesn't
965	   make any structure references that we care about.  */
966	if (ZERO_SSA_OPERANDS (stmt, (SSA_OP_VIRTUAL_DEFS | SSA_OP_VUSE)))
967	  continue;
968
969	switch (TREE_CODE (stmt))
970	  {
971	  case RETURN_EXPR:
972	    /* If we have "return <retval>" then the return value is
973	       already exposed for our pleasure.  Walk it as a USE to
974	       force all the components back in place for the return.
975
976	       If we have an embedded assignment, then <retval> is of
977	       a type that gets returned in registers in this ABI, and
978	       we do not wish to extend their lifetimes.  Treat this
979	       as a USE of the variable on the RHS of this assignment.  */
980
981	    t = TREE_OPERAND (stmt, 0);
982	    if (TREE_CODE (t) == MODIFY_EXPR)
983	      sra_walk_expr (&TREE_OPERAND (t, 1), &si, false, fns);
984	    else
985	      sra_walk_expr (&TREE_OPERAND (stmt, 0), &si, false, fns);
986	    break;
987
988	  case MODIFY_EXPR:
989	    sra_walk_modify_expr (stmt, &si, fns);
990	    break;
991	  case CALL_EXPR:
992	    sra_walk_call_expr (stmt, &si, fns);
993	    break;
994	  case ASM_EXPR:
995	    sra_walk_asm_expr (stmt, &si, fns);
996	    break;
997
998	  default:
999	    break;
1000	  }
1001      }
1002}
1003
1004/* Phase One: Scan all referenced variables in the program looking for
1005   structures that could be decomposed.  */
1006
1007static bool
1008find_candidates_for_sra (void)
1009{
1010  bool any_set = false;
1011  tree var;
1012  referenced_var_iterator rvi;
1013
1014  FOR_EACH_REFERENCED_VAR (var, rvi)
1015    {
1016      if (decl_can_be_decomposed_p (var))
1017        {
1018          bitmap_set_bit (sra_candidates, DECL_UID (var));
1019          any_set = true;
1020        }
1021    }
1022
1023  return any_set;
1024}
1025
1026
1027/* Phase Two: Scan all references to scalarizable variables.  Count the
1028   number of times they are used or copied respectively.  */
1029
1030/* Callbacks to fill in SRA_WALK_FNS.  Everything but USE is
1031   considered a copy, because we can decompose the reference such that
1032   the sub-elements needn't be contiguous.  */
1033
1034static void
1035scan_use (struct sra_elt *elt, tree *expr_p ATTRIBUTE_UNUSED,
1036	  block_stmt_iterator *bsi ATTRIBUTE_UNUSED,
1037	  bool is_output ATTRIBUTE_UNUSED, bool use_all ATTRIBUTE_UNUSED)
1038{
1039  elt->n_uses += 1;
1040}
1041
1042static void
1043scan_copy (struct sra_elt *lhs_elt, struct sra_elt *rhs_elt,
1044	   block_stmt_iterator *bsi ATTRIBUTE_UNUSED)
1045{
1046  lhs_elt->n_copies += 1;
1047  rhs_elt->n_copies += 1;
1048}
1049
1050static void
1051scan_init (struct sra_elt *lhs_elt, tree rhs ATTRIBUTE_UNUSED,
1052	   block_stmt_iterator *bsi ATTRIBUTE_UNUSED)
1053{
1054  lhs_elt->n_copies += 1;
1055}
1056
1057static void
1058scan_ldst (struct sra_elt *elt, tree other ATTRIBUTE_UNUSED,
1059	   block_stmt_iterator *bsi ATTRIBUTE_UNUSED,
1060	   bool is_output ATTRIBUTE_UNUSED)
1061{
1062  elt->n_copies += 1;
1063}
1064
1065/* Dump the values we collected during the scanning phase.  */
1066
1067static void
1068scan_dump (struct sra_elt *elt)
1069{
1070  struct sra_elt *c;
1071
1072  dump_sra_elt_name (dump_file, elt);
1073  fprintf (dump_file, ": n_uses=%u n_copies=%u\n", elt->n_uses, elt->n_copies);
1074
1075  for (c = elt->children; c ; c = c->sibling)
1076    scan_dump (c);
1077
1078  for (c = elt->groups; c ; c = c->sibling)
1079    scan_dump (c);
1080}
1081
1082/* Entry point to phase 2.  Scan the entire function, building up
1083   scalarization data structures, recording copies and uses.  */
1084
1085static void
1086scan_function (void)
1087{
1088  static const struct sra_walk_fns fns = {
1089    scan_use, scan_copy, scan_init, scan_ldst, true
1090  };
1091  bitmap_iterator bi;
1092
1093  sra_walk_function (&fns);
1094
1095  if (dump_file && (dump_flags & TDF_DETAILS))
1096    {
1097      unsigned i;
1098
1099      fputs ("\nScan results:\n", dump_file);
1100      EXECUTE_IF_SET_IN_BITMAP (sra_candidates, 0, i, bi)
1101	{
1102	  tree var = referenced_var (i);
1103	  struct sra_elt *elt = lookup_element (NULL, var, NULL, NO_INSERT);
1104	  if (elt)
1105	    scan_dump (elt);
1106	}
1107      fputc ('\n', dump_file);
1108    }
1109}
1110
1111/* Phase Three: Make decisions about which variables to scalarize, if any.
1112   All elements to be scalarized have replacement variables made for them.  */
1113
1114/* A subroutine of build_element_name.  Recursively build the element
1115   name on the obstack.  */
1116
1117static void
1118build_element_name_1 (struct sra_elt *elt)
1119{
1120  tree t;
1121  char buffer[32];
1122
1123  if (elt->parent)
1124    {
1125      build_element_name_1 (elt->parent);
1126      obstack_1grow (&sra_obstack, '$');
1127
1128      if (TREE_CODE (elt->parent->type) == COMPLEX_TYPE)
1129	{
1130	  if (elt->element == integer_zero_node)
1131	    obstack_grow (&sra_obstack, "real", 4);
1132	  else
1133	    obstack_grow (&sra_obstack, "imag", 4);
1134	  return;
1135	}
1136    }
1137
1138  t = elt->element;
1139  if (TREE_CODE (t) == INTEGER_CST)
1140    {
1141      /* ??? Eh.  Don't bother doing double-wide printing.  */
1142      sprintf (buffer, HOST_WIDE_INT_PRINT_DEC, TREE_INT_CST_LOW (t));
1143      obstack_grow (&sra_obstack, buffer, strlen (buffer));
1144    }
1145  else
1146    {
1147      tree name = DECL_NAME (t);
1148      if (name)
1149	obstack_grow (&sra_obstack, IDENTIFIER_POINTER (name),
1150		      IDENTIFIER_LENGTH (name));
1151      else
1152	{
1153	  sprintf (buffer, "D%u", DECL_UID (t));
1154	  obstack_grow (&sra_obstack, buffer, strlen (buffer));
1155	}
1156    }
1157}
1158
1159/* Construct a pretty variable name for an element's replacement variable.
1160   The name is built on the obstack.  */
1161
1162static char *
1163build_element_name (struct sra_elt *elt)
1164{
1165  build_element_name_1 (elt);
1166  obstack_1grow (&sra_obstack, '\0');
1167  return XOBFINISH (&sra_obstack, char *);
1168}
1169
1170/* Instantiate an element as an independent variable.  */
1171
1172static void
1173instantiate_element (struct sra_elt *elt)
1174{
1175  struct sra_elt *base_elt;
1176  tree var, base;
1177
1178  for (base_elt = elt; base_elt->parent; base_elt = base_elt->parent)
1179    continue;
1180  base = base_elt->element;
1181
1182  elt->replacement = var = make_rename_temp (elt->type, "SR");
1183  DECL_SOURCE_LOCATION (var) = DECL_SOURCE_LOCATION (base);
1184  DECL_ARTIFICIAL (var) = 1;
1185
1186  if (TREE_THIS_VOLATILE (elt->type))
1187    {
1188      TREE_THIS_VOLATILE (var) = 1;
1189      TREE_SIDE_EFFECTS (var) = 1;
1190    }
1191
1192  if (DECL_NAME (base) && !DECL_IGNORED_P (base))
1193    {
1194      char *pretty_name = build_element_name (elt);
1195      DECL_NAME (var) = get_identifier (pretty_name);
1196      obstack_free (&sra_obstack, pretty_name);
1197
1198      SET_DECL_DEBUG_EXPR (var, generate_element_ref (elt));
1199      DECL_DEBUG_EXPR_IS_FROM (var) = 1;
1200
1201      DECL_IGNORED_P (var) = 0;
1202      TREE_NO_WARNING (var) = TREE_NO_WARNING (base);
1203    }
1204  else
1205    {
1206      DECL_IGNORED_P (var) = 1;
1207      /* ??? We can't generate any warning that would be meaningful.  */
1208      TREE_NO_WARNING (var) = 1;
1209    }
1210
1211  if (dump_file)
1212    {
1213      fputs ("  ", dump_file);
1214      dump_sra_elt_name (dump_file, elt);
1215      fputs (" -> ", dump_file);
1216      print_generic_expr (dump_file, var, dump_flags);
1217      fputc ('\n', dump_file);
1218    }
1219}
1220
1221/* Make one pass across an element tree deciding whether or not it's
1222   profitable to instantiate individual leaf scalars.
1223
1224   PARENT_USES and PARENT_COPIES are the sum of the N_USES and N_COPIES
1225   fields all the way up the tree.  */
1226
1227static void
1228decide_instantiation_1 (struct sra_elt *elt, unsigned int parent_uses,
1229			unsigned int parent_copies)
1230{
1231  if (dump_file && !elt->parent)
1232    {
1233      fputs ("Initial instantiation for ", dump_file);
1234      dump_sra_elt_name (dump_file, elt);
1235      fputc ('\n', dump_file);
1236    }
1237
1238  if (elt->cannot_scalarize)
1239    return;
1240
1241  if (elt->is_scalar)
1242    {
1243      /* The decision is simple: instantiate if we're used more frequently
1244	 than the parent needs to be seen as a complete unit.  */
1245      if (elt->n_uses + elt->n_copies + parent_copies > parent_uses)
1246	instantiate_element (elt);
1247    }
1248  else
1249    {
1250      struct sra_elt *c, *group;
1251      unsigned int this_uses = elt->n_uses + parent_uses;
1252      unsigned int this_copies = elt->n_copies + parent_copies;
1253
1254      /* Consider groups of sub-elements as weighing in favour of
1255	 instantiation whatever their size.  */
1256      for (group = elt->groups; group ; group = group->sibling)
1257	FOR_EACH_ACTUAL_CHILD (c, group)
1258	  {
1259	    c->n_uses += group->n_uses;
1260	    c->n_copies += group->n_copies;
1261	  }
1262
1263      for (c = elt->children; c ; c = c->sibling)
1264	decide_instantiation_1 (c, this_uses, this_copies);
1265    }
1266}
1267
1268/* Compute the size and number of all instantiated elements below ELT.
1269   We will only care about this if the size of the complete structure
1270   fits in a HOST_WIDE_INT, so we don't have to worry about overflow.  */
1271
1272static unsigned int
1273sum_instantiated_sizes (struct sra_elt *elt, unsigned HOST_WIDE_INT *sizep)
1274{
1275  if (elt->replacement)
1276    {
1277      *sizep += TREE_INT_CST_LOW (TYPE_SIZE_UNIT (elt->type));
1278      return 1;
1279    }
1280  else
1281    {
1282      struct sra_elt *c;
1283      unsigned int count = 0;
1284
1285      for (c = elt->children; c ; c = c->sibling)
1286	count += sum_instantiated_sizes (c, sizep);
1287
1288      return count;
1289    }
1290}
1291
1292/* Instantiate fields in ELT->TYPE that are not currently present as
1293   children of ELT.  */
1294
1295static void instantiate_missing_elements (struct sra_elt *elt);
1296
1297static void
1298instantiate_missing_elements_1 (struct sra_elt *elt, tree child, tree type)
1299{
1300  struct sra_elt *sub = lookup_element (elt, child, type, INSERT);
1301  if (sub->is_scalar)
1302    {
1303      if (sub->replacement == NULL)
1304	instantiate_element (sub);
1305    }
1306  else
1307    instantiate_missing_elements (sub);
1308}
1309
1310static void
1311instantiate_missing_elements (struct sra_elt *elt)
1312{
1313  tree type = elt->type;
1314
1315  switch (TREE_CODE (type))
1316    {
1317    case RECORD_TYPE:
1318      {
1319	tree f;
1320	for (f = TYPE_FIELDS (type); f ; f = TREE_CHAIN (f))
1321	  if (TREE_CODE (f) == FIELD_DECL)
1322	    {
1323	      tree field_type = TREE_TYPE (f);
1324
1325	      /* canonicalize_component_ref() unwidens some bit-field
1326		 types (not marked as DECL_BIT_FIELD in C++), so we
1327		 must do the same, lest we may introduce type
1328		 mismatches.  */
1329	      if (INTEGRAL_TYPE_P (field_type)
1330		  && DECL_MODE (f) != TYPE_MODE (field_type))
1331		field_type = TREE_TYPE (get_unwidened (build3 (COMPONENT_REF,
1332							       field_type,
1333							       elt->element,
1334							       f, NULL_TREE),
1335						       NULL_TREE));
1336
1337	      instantiate_missing_elements_1 (elt, f, field_type);
1338	    }
1339	break;
1340      }
1341
1342    case ARRAY_TYPE:
1343      {
1344	tree i, max, subtype;
1345
1346	i = TYPE_MIN_VALUE (TYPE_DOMAIN (type));
1347	max = TYPE_MAX_VALUE (TYPE_DOMAIN (type));
1348	subtype = TREE_TYPE (type);
1349
1350	while (1)
1351	  {
1352	    instantiate_missing_elements_1 (elt, i, subtype);
1353	    if (tree_int_cst_equal (i, max))
1354	      break;
1355	    i = int_const_binop (PLUS_EXPR, i, integer_one_node, true);
1356	  }
1357
1358	break;
1359      }
1360
1361    case COMPLEX_TYPE:
1362      type = TREE_TYPE (type);
1363      instantiate_missing_elements_1 (elt, integer_zero_node, type);
1364      instantiate_missing_elements_1 (elt, integer_one_node, type);
1365      break;
1366
1367    default:
1368      gcc_unreachable ();
1369    }
1370}
1371
1372/* Make one pass across an element tree deciding whether to perform block
1373   or element copies.  If we decide on element copies, instantiate all
1374   elements.  Return true if there are any instantiated sub-elements.  */
1375
1376static bool
1377decide_block_copy (struct sra_elt *elt)
1378{
1379  struct sra_elt *c;
1380  bool any_inst;
1381
1382  /* We shouldn't be invoked on groups of sub-elements as they must
1383     behave like their parent as far as block copy is concerned.  */
1384  gcc_assert (!elt->is_group);
1385
1386  /* If scalarization is disabled, respect it.  */
1387  if (elt->cannot_scalarize)
1388    {
1389      elt->use_block_copy = 1;
1390
1391      if (dump_file)
1392	{
1393	  fputs ("Scalarization disabled for ", dump_file);
1394	  dump_sra_elt_name (dump_file, elt);
1395	  fputc ('\n', dump_file);
1396	}
1397
1398      /* Disable scalarization of sub-elements */
1399      for (c = elt->children; c; c = c->sibling)
1400	{
1401	  c->cannot_scalarize = 1;
1402	  decide_block_copy (c);
1403	}
1404
1405      /* Groups behave like their parent.  */
1406      for (c = elt->groups; c; c = c->sibling)
1407	{
1408	  c->cannot_scalarize = 1;
1409	  c->use_block_copy = 1;
1410	}
1411
1412      return false;
1413    }
1414
1415  /* Don't decide if we've no uses.  */
1416  if (elt->n_uses == 0 && elt->n_copies == 0)
1417    ;
1418
1419  else if (!elt->is_scalar)
1420    {
1421      tree size_tree = TYPE_SIZE_UNIT (elt->type);
1422      bool use_block_copy = true;
1423
1424      /* Tradeoffs for COMPLEX types pretty much always make it better
1425	 to go ahead and split the components.  */
1426      if (TREE_CODE (elt->type) == COMPLEX_TYPE)
1427	use_block_copy = false;
1428
1429      /* Don't bother trying to figure out the rest if the structure is
1430	 so large we can't do easy arithmetic.  This also forces block
1431	 copies for variable sized structures.  */
1432      else if (host_integerp (size_tree, 1))
1433	{
1434	  unsigned HOST_WIDE_INT full_size, inst_size = 0;
1435	  unsigned int max_size, max_count, inst_count, full_count;
1436
1437	  /* If the sra-max-structure-size parameter is 0, then the
1438	     user has not overridden the parameter and we can choose a
1439	     sensible default.  */
1440	  max_size = SRA_MAX_STRUCTURE_SIZE
1441	    ? SRA_MAX_STRUCTURE_SIZE
1442	    : MOVE_RATIO * UNITS_PER_WORD;
1443	  max_count = SRA_MAX_STRUCTURE_COUNT
1444	    ? SRA_MAX_STRUCTURE_COUNT
1445	    : MOVE_RATIO;
1446
1447	  full_size = tree_low_cst (size_tree, 1);
1448	  full_count = count_type_elements (elt->type, false);
1449	  inst_count = sum_instantiated_sizes (elt, &inst_size);
1450
1451	  /* ??? What to do here.  If there are two fields, and we've only
1452	     instantiated one, then instantiating the other is clearly a win.
1453	     If there are a large number of fields then the size of the copy
1454	     is much more of a factor.  */
1455
1456	  /* If the structure is small, and we've made copies, go ahead
1457	     and instantiate, hoping that the copies will go away.  */
1458	  if (full_size <= max_size
1459	      && (full_count - inst_count) <= max_count
1460	      && elt->n_copies > elt->n_uses)
1461	    use_block_copy = false;
1462	  else if (inst_count * 100 >= full_count * SRA_FIELD_STRUCTURE_RATIO
1463		   && inst_size * 100 >= full_size * SRA_FIELD_STRUCTURE_RATIO)
1464	    use_block_copy = false;
1465
1466	  /* In order to avoid block copy, we have to be able to instantiate
1467	     all elements of the type.  See if this is possible.  */
1468	  if (!use_block_copy
1469	      && (!can_completely_scalarize_p (elt)
1470		  || !type_can_instantiate_all_elements (elt->type)))
1471	    use_block_copy = true;
1472	}
1473
1474      elt->use_block_copy = use_block_copy;
1475
1476      /* Groups behave like their parent.  */
1477      for (c = elt->groups; c; c = c->sibling)
1478	c->use_block_copy = use_block_copy;
1479
1480      if (dump_file)
1481	{
1482	  fprintf (dump_file, "Using %s for ",
1483		   use_block_copy ? "block-copy" : "element-copy");
1484	  dump_sra_elt_name (dump_file, elt);
1485	  fputc ('\n', dump_file);
1486	}
1487
1488      if (!use_block_copy)
1489	{
1490	  instantiate_missing_elements (elt);
1491	  return true;
1492	}
1493    }
1494
1495  any_inst = elt->replacement != NULL;
1496
1497  for (c = elt->children; c ; c = c->sibling)
1498    any_inst |= decide_block_copy (c);
1499
1500  return any_inst;
1501}
1502
1503/* Entry point to phase 3.  Instantiate scalar replacement variables.  */
1504
1505static void
1506decide_instantiations (void)
1507{
1508  unsigned int i;
1509  bool cleared_any;
1510  bitmap_head done_head;
1511  bitmap_iterator bi;
1512
1513  /* We cannot clear bits from a bitmap we're iterating over,
1514     so save up all the bits to clear until the end.  */
1515  bitmap_initialize (&done_head, &bitmap_default_obstack);
1516  cleared_any = false;
1517
1518  EXECUTE_IF_SET_IN_BITMAP (sra_candidates, 0, i, bi)
1519    {
1520      tree var = referenced_var (i);
1521      struct sra_elt *elt = lookup_element (NULL, var, NULL, NO_INSERT);
1522      if (elt)
1523	{
1524	  decide_instantiation_1 (elt, 0, 0);
1525	  if (!decide_block_copy (elt))
1526	    elt = NULL;
1527	}
1528      if (!elt)
1529	{
1530	  bitmap_set_bit (&done_head, i);
1531	  cleared_any = true;
1532	}
1533    }
1534
1535  if (cleared_any)
1536    {
1537      bitmap_and_compl_into (sra_candidates, &done_head);
1538      bitmap_and_compl_into (needs_copy_in, &done_head);
1539    }
1540  bitmap_clear (&done_head);
1541
1542  if (!bitmap_empty_p (sra_candidates))
1543    todoflags |= TODO_update_smt_usage;
1544
1545  mark_set_for_renaming (sra_candidates);
1546
1547  if (dump_file)
1548    fputc ('\n', dump_file);
1549}
1550
1551
1552/* Phase Four: Update the function to match the replacements created.  */
1553
1554/* Mark all the variables in V_MAY_DEF or V_MUST_DEF operands for STMT for
1555   renaming. This becomes necessary when we modify all of a non-scalar.  */
1556
1557static void
1558mark_all_v_defs_1 (tree stmt)
1559{
1560  tree sym;
1561  ssa_op_iter iter;
1562
1563  update_stmt_if_modified (stmt);
1564
1565  FOR_EACH_SSA_TREE_OPERAND (sym, stmt, iter, SSA_OP_ALL_VIRTUALS)
1566    {
1567      if (TREE_CODE (sym) == SSA_NAME)
1568	sym = SSA_NAME_VAR (sym);
1569      mark_sym_for_renaming (sym);
1570    }
1571}
1572
1573
1574/* Mark all the variables in virtual operands in all the statements in
1575   LIST for renaming.  */
1576
1577static void
1578mark_all_v_defs (tree list)
1579{
1580  if (TREE_CODE (list) != STATEMENT_LIST)
1581    mark_all_v_defs_1 (list);
1582  else
1583    {
1584      tree_stmt_iterator i;
1585      for (i = tsi_start (list); !tsi_end_p (i); tsi_next (&i))
1586	mark_all_v_defs_1 (tsi_stmt (i));
1587    }
1588}
1589
1590/* Mark every replacement under ELT with TREE_NO_WARNING.  */
1591
1592static void
1593mark_no_warning (struct sra_elt *elt)
1594{
1595  if (!elt->all_no_warning)
1596    {
1597      if (elt->replacement)
1598	TREE_NO_WARNING (elt->replacement) = 1;
1599      else
1600	{
1601	  struct sra_elt *c;
1602	  FOR_EACH_ACTUAL_CHILD (c, elt)
1603	    mark_no_warning (c);
1604	}
1605      elt->all_no_warning = true;
1606    }
1607}
1608
1609/* Build a single level component reference to ELT rooted at BASE.  */
1610
1611static tree
1612generate_one_element_ref (struct sra_elt *elt, tree base)
1613{
1614  switch (TREE_CODE (TREE_TYPE (base)))
1615    {
1616    case RECORD_TYPE:
1617      {
1618	tree field = elt->element;
1619
1620	/* Watch out for compatible records with differing field lists.  */
1621	if (DECL_FIELD_CONTEXT (field) != TYPE_MAIN_VARIANT (TREE_TYPE (base)))
1622	  field = find_compatible_field (TREE_TYPE (base), field);
1623
1624        return build3 (COMPONENT_REF, elt->type, base, field, NULL);
1625      }
1626
1627    case ARRAY_TYPE:
1628      todoflags |= TODO_update_smt_usage;
1629      if (TREE_CODE (elt->element) == RANGE_EXPR)
1630	return build4 (ARRAY_RANGE_REF, elt->type, base,
1631		       TREE_OPERAND (elt->element, 0), NULL, NULL);
1632      else
1633	return build4 (ARRAY_REF, elt->type, base, elt->element, NULL, NULL);
1634
1635    case COMPLEX_TYPE:
1636      if (elt->element == integer_zero_node)
1637	return build1 (REALPART_EXPR, elt->type, base);
1638      else
1639	return build1 (IMAGPART_EXPR, elt->type, base);
1640
1641    default:
1642      gcc_unreachable ();
1643    }
1644}
1645
1646/* Build a full component reference to ELT rooted at its native variable.  */
1647
1648static tree
1649generate_element_ref (struct sra_elt *elt)
1650{
1651  if (elt->parent)
1652    return generate_one_element_ref (elt, generate_element_ref (elt->parent));
1653  else
1654    return elt->element;
1655}
1656
1657static tree
1658sra_build_assignment (tree dst, tree src)
1659{
1660  /* We need TYPE_CANONICAL to compare the types of dst and src
1661     efficiently, but that's only introduced in GCC 4.3.  */
1662  return build2 (MODIFY_EXPR, void_type_node, dst, src);
1663}
1664
1665/* Generate a set of assignment statements in *LIST_P to copy all
1666   instantiated elements under ELT to or from the equivalent structure
1667   rooted at EXPR.  COPY_OUT controls the direction of the copy, with
1668   true meaning to copy out of EXPR into ELT.  */
1669
1670static void
1671generate_copy_inout (struct sra_elt *elt, bool copy_out, tree expr,
1672		     tree *list_p)
1673{
1674  struct sra_elt *c;
1675  tree t;
1676
1677  if (!copy_out && TREE_CODE (expr) == SSA_NAME
1678      && TREE_CODE (TREE_TYPE (expr)) == COMPLEX_TYPE)
1679    {
1680      tree r, i;
1681
1682      c = lookup_element (elt, integer_zero_node, NULL, NO_INSERT);
1683      r = c->replacement;
1684      c = lookup_element (elt, integer_one_node, NULL, NO_INSERT);
1685      i = c->replacement;
1686
1687      t = build2 (COMPLEX_EXPR, elt->type, r, i);
1688      t = sra_build_assignment (expr, t);
1689      SSA_NAME_DEF_STMT (expr) = t;
1690      append_to_statement_list (t, list_p);
1691    }
1692  else if (elt->replacement)
1693    {
1694      if (copy_out)
1695	t = sra_build_assignment (elt->replacement, expr);
1696      else
1697	t = sra_build_assignment (expr, elt->replacement);
1698      append_to_statement_list (t, list_p);
1699    }
1700  else
1701    {
1702      FOR_EACH_ACTUAL_CHILD (c, elt)
1703	{
1704	  t = generate_one_element_ref (c, unshare_expr (expr));
1705	  generate_copy_inout (c, copy_out, t, list_p);
1706	}
1707    }
1708}
1709
1710/* Generate a set of assignment statements in *LIST_P to copy all instantiated
1711   elements under SRC to their counterparts under DST.  There must be a 1-1
1712   correspondence of instantiated elements.  */
1713
1714static void
1715generate_element_copy (struct sra_elt *dst, struct sra_elt *src, tree *list_p)
1716{
1717  struct sra_elt *dc, *sc;
1718
1719  FOR_EACH_ACTUAL_CHILD (dc, dst)
1720    {
1721      sc = lookup_element (src, dc->element, NULL, NO_INSERT);
1722      gcc_assert (sc);
1723      generate_element_copy (dc, sc, list_p);
1724    }
1725
1726  if (dst->replacement)
1727    {
1728      tree t;
1729
1730      gcc_assert (src->replacement);
1731
1732      t = sra_build_assignment (dst->replacement, src->replacement);
1733      append_to_statement_list (t, list_p);
1734    }
1735}
1736
1737/* Generate a set of assignment statements in *LIST_P to zero all instantiated
1738   elements under ELT.  In addition, do not assign to elements that have been
1739   marked VISITED but do reset the visited flag; this allows easy coordination
1740   with generate_element_init.  */
1741
1742static void
1743generate_element_zero (struct sra_elt *elt, tree *list_p)
1744{
1745  struct sra_elt *c;
1746
1747  if (elt->visited)
1748    {
1749      elt->visited = false;
1750      return;
1751    }
1752
1753  FOR_EACH_ACTUAL_CHILD (c, elt)
1754    generate_element_zero (c, list_p);
1755
1756  if (elt->replacement)
1757    {
1758      tree t;
1759
1760      gcc_assert (elt->is_scalar);
1761      t = fold_convert (elt->type, integer_zero_node);
1762
1763      t = sra_build_assignment (elt->replacement, t);
1764      append_to_statement_list (t, list_p);
1765    }
1766}
1767
1768/* Generate an assignment VAR = INIT, where INIT may need gimplification.
1769   Add the result to *LIST_P.  */
1770
1771static void
1772generate_one_element_init (tree var, tree init, tree *list_p)
1773{
1774  /* The replacement can be almost arbitrarily complex.  Gimplify.  */
1775  tree stmt = sra_build_assignment (var, init);
1776  gimplify_and_add (stmt, list_p);
1777}
1778
1779/* Generate a set of assignment statements in *LIST_P to set all instantiated
1780   elements under ELT with the contents of the initializer INIT.  In addition,
1781   mark all assigned elements VISITED; this allows easy coordination with
1782   generate_element_zero.  Return false if we found a case we couldn't
1783   handle.  */
1784
1785static bool
1786generate_element_init_1 (struct sra_elt *elt, tree init, tree *list_p)
1787{
1788  bool result = true;
1789  enum tree_code init_code;
1790  struct sra_elt *sub;
1791  tree t;
1792  unsigned HOST_WIDE_INT idx;
1793  tree value, purpose;
1794
1795  /* We can be passed DECL_INITIAL of a static variable.  It might have a
1796     conversion, which we strip off here.  */
1797  STRIP_USELESS_TYPE_CONVERSION (init);
1798  init_code = TREE_CODE (init);
1799
1800  if (elt->is_scalar)
1801    {
1802      if (elt->replacement)
1803	{
1804	  generate_one_element_init (elt->replacement, init, list_p);
1805	  elt->visited = true;
1806	}
1807      return result;
1808    }
1809
1810  switch (init_code)
1811    {
1812    case COMPLEX_CST:
1813    case COMPLEX_EXPR:
1814      FOR_EACH_ACTUAL_CHILD (sub, elt)
1815	{
1816	  if (sub->element == integer_zero_node)
1817	    t = (init_code == COMPLEX_EXPR
1818		 ? TREE_OPERAND (init, 0) : TREE_REALPART (init));
1819	  else
1820	    t = (init_code == COMPLEX_EXPR
1821		 ? TREE_OPERAND (init, 1) : TREE_IMAGPART (init));
1822	  result &= generate_element_init_1 (sub, t, list_p);
1823	}
1824      break;
1825
1826    case CONSTRUCTOR:
1827      FOR_EACH_CONSTRUCTOR_ELT (CONSTRUCTOR_ELTS (init), idx, purpose, value)
1828	{
1829	  if (TREE_CODE (purpose) == RANGE_EXPR)
1830	    {
1831	      tree lower = TREE_OPERAND (purpose, 0);
1832	      tree upper = TREE_OPERAND (purpose, 1);
1833
1834	      while (1)
1835		{
1836	  	  sub = lookup_element (elt, lower, NULL, NO_INSERT);
1837		  if (sub != NULL)
1838		    result &= generate_element_init_1 (sub, value, list_p);
1839		  if (tree_int_cst_equal (lower, upper))
1840		    break;
1841		  lower = int_const_binop (PLUS_EXPR, lower,
1842					   integer_one_node, true);
1843		}
1844	    }
1845	  else
1846	    {
1847	      sub = lookup_element (elt, purpose, NULL, NO_INSERT);
1848	      if (sub != NULL)
1849		result &= generate_element_init_1 (sub, value, list_p);
1850	    }
1851	}
1852      break;
1853
1854    default:
1855      elt->visited = true;
1856      result = false;
1857    }
1858
1859  return result;
1860}
1861
1862/* A wrapper function for generate_element_init_1 that handles cleanup after
1863   gimplification.  */
1864
1865static bool
1866generate_element_init (struct sra_elt *elt, tree init, tree *list_p)
1867{
1868  bool ret;
1869
1870  push_gimplify_context ();
1871  ret = generate_element_init_1 (elt, init, list_p);
1872  pop_gimplify_context (NULL);
1873
1874  /* The replacement can expose previously unreferenced variables.  */
1875  if (ret && *list_p)
1876    {
1877      tree_stmt_iterator i;
1878
1879      for (i = tsi_start (*list_p); !tsi_end_p (i); tsi_next (&i))
1880	find_new_referenced_vars (tsi_stmt_ptr (i));
1881    }
1882
1883  return ret;
1884}
1885
1886/* Insert STMT on all the outgoing edges out of BB.  Note that if BB
1887   has more than one edge, STMT will be replicated for each edge.  Also,
1888   abnormal edges will be ignored.  */
1889
1890void
1891insert_edge_copies (tree stmt, basic_block bb)
1892{
1893  edge e;
1894  edge_iterator ei;
1895  bool first_copy;
1896
1897  first_copy = true;
1898  FOR_EACH_EDGE (e, ei, bb->succs)
1899    {
1900      /* We don't need to insert copies on abnormal edges.  The
1901	 value of the scalar replacement is not guaranteed to
1902	 be valid through an abnormal edge.  */
1903      if (!(e->flags & EDGE_ABNORMAL))
1904	{
1905	  if (first_copy)
1906	    {
1907	      bsi_insert_on_edge (e, stmt);
1908	      first_copy = false;
1909	    }
1910	  else
1911	    bsi_insert_on_edge (e, unsave_expr_now (stmt));
1912	}
1913    }
1914}
1915
1916/* Helper function to insert LIST before BSI, and set up line number info.  */
1917
1918void
1919sra_insert_before (block_stmt_iterator *bsi, tree list)
1920{
1921  tree stmt = bsi_stmt (*bsi);
1922
1923  if (EXPR_HAS_LOCATION (stmt))
1924    annotate_all_with_locus (&list, EXPR_LOCATION (stmt));
1925  bsi_insert_before (bsi, list, BSI_SAME_STMT);
1926}
1927
1928/* Similarly, but insert after BSI.  Handles insertion onto edges as well.  */
1929
1930void
1931sra_insert_after (block_stmt_iterator *bsi, tree list)
1932{
1933  tree stmt = bsi_stmt (*bsi);
1934
1935  if (EXPR_HAS_LOCATION (stmt))
1936    annotate_all_with_locus (&list, EXPR_LOCATION (stmt));
1937
1938  if (stmt_ends_bb_p (stmt))
1939    insert_edge_copies (list, bsi->bb);
1940  else
1941    bsi_insert_after (bsi, list, BSI_SAME_STMT);
1942}
1943
1944/* Similarly, but replace the statement at BSI.  */
1945
1946static void
1947sra_replace (block_stmt_iterator *bsi, tree list)
1948{
1949  sra_insert_before (bsi, list);
1950  bsi_remove (bsi, false);
1951  if (bsi_end_p (*bsi))
1952    *bsi = bsi_last (bsi->bb);
1953  else
1954    bsi_prev (bsi);
1955}
1956
1957/* Scalarize a USE.  To recap, this is either a simple reference to ELT,
1958   if elt is scalar, or some occurrence of ELT that requires a complete
1959   aggregate.  IS_OUTPUT is true if ELT is being modified.  */
1960
1961static void
1962scalarize_use (struct sra_elt *elt, tree *expr_p, block_stmt_iterator *bsi,
1963	       bool is_output, bool use_all)
1964{
1965  tree list = NULL, stmt = bsi_stmt (*bsi);
1966
1967  if (elt->replacement)
1968    {
1969      /* If we have a replacement, then updating the reference is as
1970	 simple as modifying the existing statement in place.  */
1971      if (is_output)
1972	mark_all_v_defs (stmt);
1973      *expr_p = elt->replacement;
1974      update_stmt (stmt);
1975    }
1976  else
1977    {
1978      /* Otherwise we need some copies.  If ELT is being read, then we want
1979	 to store all (modified) sub-elements back into the structure before
1980	 the reference takes place.  If ELT is being written, then we want to
1981	 load the changed values back into our shadow variables.  */
1982      /* ??? We don't check modified for reads, we just always write all of
1983	 the values.  We should be able to record the SSA number of the VOP
1984	 for which the values were last read.  If that number matches the
1985	 SSA number of the VOP in the current statement, then we needn't
1986	 emit an assignment.  This would also eliminate double writes when
1987	 a structure is passed as more than one argument to a function call.
1988	 This optimization would be most effective if sra_walk_function
1989	 processed the blocks in dominator order.  */
1990
1991      generate_copy_inout (elt, is_output, generate_element_ref (elt), &list);
1992      if (list == NULL)
1993	return;
1994      mark_all_v_defs (list);
1995      if (is_output)
1996	sra_insert_after (bsi, list);
1997      else
1998	{
1999	  sra_insert_before (bsi, list);
2000	  if (use_all)
2001	    mark_no_warning (elt);
2002	}
2003    }
2004}
2005
2006/* Scalarize a COPY.  To recap, this is an assignment statement between
2007   two scalarizable references, LHS_ELT and RHS_ELT.  */
2008
2009static void
2010scalarize_copy (struct sra_elt *lhs_elt, struct sra_elt *rhs_elt,
2011		block_stmt_iterator *bsi)
2012{
2013  tree list, stmt;
2014
2015  if (lhs_elt->replacement && rhs_elt->replacement)
2016    {
2017      /* If we have two scalar operands, modify the existing statement.  */
2018      stmt = bsi_stmt (*bsi);
2019
2020      /* See the commentary in sra_walk_function concerning
2021	 RETURN_EXPR, and why we should never see one here.  */
2022      gcc_assert (TREE_CODE (stmt) == MODIFY_EXPR);
2023
2024      TREE_OPERAND (stmt, 0) = lhs_elt->replacement;
2025      TREE_OPERAND (stmt, 1) = rhs_elt->replacement;
2026      update_stmt (stmt);
2027    }
2028  else if (lhs_elt->use_block_copy || rhs_elt->use_block_copy)
2029    {
2030      /* If either side requires a block copy, then sync the RHS back
2031	 to the original structure, leave the original assignment
2032	 statement (which will perform the block copy), then load the
2033	 LHS values out of its now-updated original structure.  */
2034      /* ??? Could perform a modified pair-wise element copy.  That
2035	 would at least allow those elements that are instantiated in
2036	 both structures to be optimized well.  */
2037
2038      list = NULL;
2039      generate_copy_inout (rhs_elt, false,
2040			   generate_element_ref (rhs_elt), &list);
2041      if (list)
2042	{
2043	  mark_all_v_defs (list);
2044	  sra_insert_before (bsi, list);
2045	}
2046
2047      list = NULL;
2048      generate_copy_inout (lhs_elt, true,
2049			   generate_element_ref (lhs_elt), &list);
2050      if (list)
2051	{
2052	  mark_all_v_defs (list);
2053	  sra_insert_after (bsi, list);
2054	}
2055    }
2056  else
2057    {
2058      /* Otherwise both sides must be fully instantiated.  In which
2059	 case perform pair-wise element assignments and replace the
2060	 original block copy statement.  */
2061
2062      stmt = bsi_stmt (*bsi);
2063      mark_all_v_defs (stmt);
2064
2065      list = NULL;
2066      generate_element_copy (lhs_elt, rhs_elt, &list);
2067      gcc_assert (list);
2068      mark_all_v_defs (list);
2069      sra_replace (bsi, list);
2070    }
2071}
2072
2073/* Scalarize an INIT.  To recap, this is an assignment to a scalarizable
2074   reference from some form of constructor: CONSTRUCTOR, COMPLEX_CST or
2075   COMPLEX_EXPR.  If RHS is NULL, it should be treated as an empty
2076   CONSTRUCTOR.  */
2077
2078static void
2079scalarize_init (struct sra_elt *lhs_elt, tree rhs, block_stmt_iterator *bsi)
2080{
2081  bool result = true;
2082  tree list = NULL;
2083
2084  /* Generate initialization statements for all members extant in the RHS.  */
2085  if (rhs)
2086    {
2087      /* Unshare the expression just in case this is from a decl's initial.  */
2088      rhs = unshare_expr (rhs);
2089      result = generate_element_init (lhs_elt, rhs, &list);
2090    }
2091
2092  /* CONSTRUCTOR is defined such that any member not mentioned is assigned
2093     a zero value.  Initialize the rest of the instantiated elements.  */
2094  generate_element_zero (lhs_elt, &list);
2095
2096  if (!result)
2097    {
2098      /* If we failed to convert the entire initializer, then we must
2099	 leave the structure assignment in place and must load values
2100	 from the structure into the slots for which we did not find
2101	 constants.  The easiest way to do this is to generate a complete
2102	 copy-out, and then follow that with the constant assignments
2103	 that we were able to build.  DCE will clean things up.  */
2104      tree list0 = NULL;
2105      generate_copy_inout (lhs_elt, true, generate_element_ref (lhs_elt),
2106			   &list0);
2107      append_to_statement_list (list, &list0);
2108      list = list0;
2109    }
2110
2111  if (lhs_elt->use_block_copy || !result)
2112    {
2113      /* Since LHS is not fully instantiated, we must leave the structure
2114	 assignment in place.  Treating this case differently from a USE
2115	 exposes constants to later optimizations.  */
2116      if (list)
2117	{
2118	  mark_all_v_defs (list);
2119	  sra_insert_after (bsi, list);
2120	}
2121    }
2122  else
2123    {
2124      /* The LHS is fully instantiated.  The list of initializations
2125	 replaces the original structure assignment.  */
2126      gcc_assert (list);
2127      mark_all_v_defs (bsi_stmt (*bsi));
2128      mark_all_v_defs (list);
2129      sra_replace (bsi, list);
2130    }
2131}
2132
2133/* A subroutine of scalarize_ldst called via walk_tree.  Set TREE_NO_TRAP
2134   on all INDIRECT_REFs.  */
2135
2136static tree
2137mark_notrap (tree *tp, int *walk_subtrees, void *data ATTRIBUTE_UNUSED)
2138{
2139  tree t = *tp;
2140
2141  if (TREE_CODE (t) == INDIRECT_REF)
2142    {
2143      TREE_THIS_NOTRAP (t) = 1;
2144      *walk_subtrees = 0;
2145    }
2146  else if (IS_TYPE_OR_DECL_P (t))
2147    *walk_subtrees = 0;
2148
2149  return NULL;
2150}
2151
2152/* Scalarize a LDST.  To recap, this is an assignment between one scalarizable
2153   reference ELT and one non-scalarizable reference OTHER.  IS_OUTPUT is true
2154   if ELT is on the left-hand side.  */
2155
2156static void
2157scalarize_ldst (struct sra_elt *elt, tree other,
2158		block_stmt_iterator *bsi, bool is_output)
2159{
2160  /* Shouldn't have gotten called for a scalar.  */
2161  gcc_assert (!elt->replacement);
2162
2163  if (elt->use_block_copy)
2164    {
2165      /* Since ELT is not fully instantiated, we have to leave the
2166	 block copy in place.  Treat this as a USE.  */
2167      scalarize_use (elt, NULL, bsi, is_output, false);
2168    }
2169  else
2170    {
2171      /* The interesting case is when ELT is fully instantiated.  In this
2172	 case we can have each element stored/loaded directly to/from the
2173	 corresponding slot in OTHER.  This avoids a block copy.  */
2174
2175      tree list = NULL, stmt = bsi_stmt (*bsi);
2176
2177      mark_all_v_defs (stmt);
2178      generate_copy_inout (elt, is_output, other, &list);
2179      mark_all_v_defs (list);
2180      gcc_assert (list);
2181
2182      /* Preserve EH semantics.  */
2183      if (stmt_ends_bb_p (stmt))
2184	{
2185	  tree_stmt_iterator tsi;
2186	  tree first;
2187
2188	  /* Extract the first statement from LIST.  */
2189	  tsi = tsi_start (list);
2190	  first = tsi_stmt (tsi);
2191	  tsi_delink (&tsi);
2192
2193	  /* Replace the old statement with this new representative.  */
2194	  bsi_replace (bsi, first, true);
2195
2196	  if (!tsi_end_p (tsi))
2197	    {
2198	      /* If any reference would trap, then they all would.  And more
2199		 to the point, the first would.  Therefore none of the rest
2200		 will trap since the first didn't.  Indicate this by
2201		 iterating over the remaining statements and set
2202		 TREE_THIS_NOTRAP in all INDIRECT_REFs.  */
2203	      do
2204		{
2205		  walk_tree (tsi_stmt_ptr (tsi), mark_notrap, NULL, NULL);
2206		  tsi_next (&tsi);
2207		}
2208	      while (!tsi_end_p (tsi));
2209
2210	      insert_edge_copies (list, bsi->bb);
2211	    }
2212	}
2213      else
2214	sra_replace (bsi, list);
2215    }
2216}
2217
2218/* Generate initializations for all scalarizable parameters.  */
2219
2220static void
2221scalarize_parms (void)
2222{
2223  tree list = NULL;
2224  unsigned i;
2225  bitmap_iterator bi;
2226
2227  EXECUTE_IF_SET_IN_BITMAP (needs_copy_in, 0, i, bi)
2228    {
2229      tree var = referenced_var (i);
2230      struct sra_elt *elt = lookup_element (NULL, var, NULL, NO_INSERT);
2231      generate_copy_inout (elt, true, var, &list);
2232    }
2233
2234  if (list)
2235    {
2236      insert_edge_copies (list, ENTRY_BLOCK_PTR);
2237      mark_all_v_defs (list);
2238    }
2239}
2240
2241/* Entry point to phase 4.  Update the function to match replacements.  */
2242
2243static void
2244scalarize_function (void)
2245{
2246  static const struct sra_walk_fns fns = {
2247    scalarize_use, scalarize_copy, scalarize_init, scalarize_ldst, false
2248  };
2249
2250  sra_walk_function (&fns);
2251  scalarize_parms ();
2252  bsi_commit_edge_inserts ();
2253}
2254
2255
2256/* Debug helper function.  Print ELT in a nice human-readable format.  */
2257
2258static void
2259dump_sra_elt_name (FILE *f, struct sra_elt *elt)
2260{
2261  if (elt->parent && TREE_CODE (elt->parent->type) == COMPLEX_TYPE)
2262    {
2263      fputs (elt->element == integer_zero_node ? "__real__ " : "__imag__ ", f);
2264      dump_sra_elt_name (f, elt->parent);
2265    }
2266  else
2267    {
2268      if (elt->parent)
2269        dump_sra_elt_name (f, elt->parent);
2270      if (DECL_P (elt->element))
2271	{
2272	  if (TREE_CODE (elt->element) == FIELD_DECL)
2273	    fputc ('.', f);
2274	  print_generic_expr (f, elt->element, dump_flags);
2275	}
2276      else if (TREE_CODE (elt->element) == RANGE_EXPR)
2277	fprintf (f, "["HOST_WIDE_INT_PRINT_DEC".."HOST_WIDE_INT_PRINT_DEC"]",
2278		 TREE_INT_CST_LOW (TREE_OPERAND (elt->element, 0)),
2279		 TREE_INT_CST_LOW (TREE_OPERAND (elt->element, 1)));
2280      else
2281	fprintf (f, "[" HOST_WIDE_INT_PRINT_DEC "]",
2282		 TREE_INT_CST_LOW (elt->element));
2283    }
2284}
2285
2286/* Likewise, but callable from the debugger.  */
2287
2288void
2289debug_sra_elt_name (struct sra_elt *elt)
2290{
2291  dump_sra_elt_name (stderr, elt);
2292  fputc ('\n', stderr);
2293}
2294
2295void
2296sra_init_cache (void)
2297{
2298  if (sra_type_decomp_cache)
2299    return;
2300
2301  sra_type_decomp_cache = BITMAP_ALLOC (NULL);
2302  sra_type_inst_cache = BITMAP_ALLOC (NULL);
2303}
2304
2305/* Main entry point.  */
2306
2307static unsigned int
2308tree_sra (void)
2309{
2310  /* Initialize local variables.  */
2311  todoflags = 0;
2312  gcc_obstack_init (&sra_obstack);
2313  sra_candidates = BITMAP_ALLOC (NULL);
2314  needs_copy_in = BITMAP_ALLOC (NULL);
2315  sra_init_cache ();
2316  sra_map = htab_create (101, sra_elt_hash, sra_elt_eq, NULL);
2317
2318  /* Scan.  If we find anything, instantiate and scalarize.  */
2319  if (find_candidates_for_sra ())
2320    {
2321      scan_function ();
2322      decide_instantiations ();
2323      scalarize_function ();
2324    }
2325
2326  /* Free allocated memory.  */
2327  htab_delete (sra_map);
2328  sra_map = NULL;
2329  BITMAP_FREE (sra_candidates);
2330  BITMAP_FREE (needs_copy_in);
2331  BITMAP_FREE (sra_type_decomp_cache);
2332  BITMAP_FREE (sra_type_inst_cache);
2333  obstack_free (&sra_obstack, NULL);
2334  return todoflags;
2335}
2336
2337static bool
2338gate_sra (void)
2339{
2340  return flag_tree_sra != 0;
2341}
2342
2343struct tree_opt_pass pass_sra =
2344{
2345  "sra",				/* name */
2346  gate_sra,				/* gate */
2347  tree_sra,				/* execute */
2348  NULL,					/* sub */
2349  NULL,					/* next */
2350  0,					/* static_pass_number */
2351  TV_TREE_SRA,				/* tv_id */
2352  PROP_cfg | PROP_ssa | PROP_alias,	/* properties_required */
2353  0,					/* properties_provided */
2354  PROP_smt_usage,		        /* properties_destroyed */
2355  0,					/* todo_flags_start */
2356  TODO_dump_func /* todo_flags_finish */
2357  | TODO_update_ssa
2358  | TODO_ggc_collect | TODO_verify_ssa,
2359  0					/* letter */
2360};
2361