1/* Decompose multiword subregs.
2   Copyright (C) 2007-2015 Free Software Foundation, Inc.
3   Contributed by Richard Henderson <rth@redhat.com>
4		  Ian Lance Taylor <iant@google.com>
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
10Software Foundation; either version 3, or (at your option) any later
11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
19along with GCC; see the file COPYING3.  If not see
20<http://www.gnu.org/licenses/>.  */
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
25#include "machmode.h"
26#include "tm.h"
27#include "hash-set.h"
28#include "vec.h"
29#include "double-int.h"
30#include "input.h"
31#include "alias.h"
32#include "symtab.h"
33#include "wide-int.h"
34#include "inchash.h"
35#include "tree.h"
36#include "rtl.h"
37#include "tm_p.h"
38#include "flags.h"
39#include "insn-config.h"
40#include "obstack.h"
41#include "predict.h"
42#include "hard-reg-set.h"
43#include "function.h"
44#include "dominance.h"
45#include "cfg.h"
46#include "cfgrtl.h"
47#include "cfgbuild.h"
48#include "basic-block.h"
49#include "recog.h"
50#include "bitmap.h"
51#include "dce.h"
52#include "hashtab.h"
53#include "statistics.h"
54#include "real.h"
55#include "fixed-value.h"
56#include "expmed.h"
57#include "dojump.h"
58#include "explow.h"
59#include "calls.h"
60#include "emit-rtl.h"
61#include "varasm.h"
62#include "stmt.h"
63#include "expr.h"
64#include "except.h"
65#include "regs.h"
66#include "tree-pass.h"
67#include "df.h"
68#include "lower-subreg.h"
69#include "rtl-iter.h"
70
71#ifdef STACK_GROWS_DOWNWARD
72# undef STACK_GROWS_DOWNWARD
73# define STACK_GROWS_DOWNWARD 1
74#else
75# define STACK_GROWS_DOWNWARD 0
76#endif
77
78
79/* Decompose multi-word pseudo-registers into individual
80   pseudo-registers when possible and profitable.  This is possible
81   when all the uses of a multi-word register are via SUBREG, or are
82   copies of the register to another location.  Breaking apart the
83   register permits more CSE and permits better register allocation.
84   This is profitable if the machine does not have move instructions
85   to do this.
86
87   This pass only splits moves with modes that are wider than
88   word_mode and ASHIFTs, LSHIFTRTs, ASHIFTRTs and ZERO_EXTENDs with
89   integer modes that are twice the width of word_mode.  The latter
90   could be generalized if there was a need to do this, but the trend in
91   architectures is to not need this.
92
93   There are two useful preprocessor defines for use by maintainers:
94
95   #define LOG_COSTS 1
96
97   if you wish to see the actual cost estimates that are being used
98   for each mode wider than word mode and the cost estimates for zero
99   extension and the shifts.   This can be useful when port maintainers
100   are tuning insn rtx costs.
101
102   #define FORCE_LOWERING 1
103
104   if you wish to test the pass with all the transformation forced on.
105   This can be useful for finding bugs in the transformations.  */
106
107#define LOG_COSTS 0
108#define FORCE_LOWERING 0
109
110/* Bit N in this bitmap is set if regno N is used in a context in
111   which we can decompose it.  */
112static bitmap decomposable_context;
113
114/* Bit N in this bitmap is set if regno N is used in a context in
115   which it can not be decomposed.  */
116static bitmap non_decomposable_context;
117
118/* Bit N in this bitmap is set if regno N is used in a subreg
119   which changes the mode but not the size.  This typically happens
120   when the register accessed as a floating-point value; we want to
121   avoid generating accesses to its subwords in integer modes.  */
122static bitmap subreg_context;
123
124/* Bit N in the bitmap in element M of this array is set if there is a
125   copy from reg M to reg N.  */
126static vec<bitmap> reg_copy_graph;
127
128struct target_lower_subreg default_target_lower_subreg;
129#if SWITCHABLE_TARGET
130struct target_lower_subreg *this_target_lower_subreg
131  = &default_target_lower_subreg;
132#endif
133
134#define twice_word_mode \
135  this_target_lower_subreg->x_twice_word_mode
136#define choices \
137  this_target_lower_subreg->x_choices
138
139/* RTXes used while computing costs.  */
140struct cost_rtxes {
141  /* Source and target registers.  */
142  rtx source;
143  rtx target;
144
145  /* A twice_word_mode ZERO_EXTEND of SOURCE.  */
146  rtx zext;
147
148  /* A shift of SOURCE.  */
149  rtx shift;
150
151  /* A SET of TARGET.  */
152  rtx set;
153};
154
155/* Return the cost of a CODE shift in mode MODE by OP1 bits, using the
156   rtxes in RTXES.  SPEED_P selects between the speed and size cost.  */
157
158static int
159shift_cost (bool speed_p, struct cost_rtxes *rtxes, enum rtx_code code,
160	    machine_mode mode, int op1)
161{
162  PUT_CODE (rtxes->shift, code);
163  PUT_MODE (rtxes->shift, mode);
164  PUT_MODE (rtxes->source, mode);
165  XEXP (rtxes->shift, 1) = GEN_INT (op1);
166  return set_src_cost (rtxes->shift, speed_p);
167}
168
169/* For each X in the range [0, BITS_PER_WORD), set SPLITTING[X]
170   to true if it is profitable to split a double-word CODE shift
171   of X + BITS_PER_WORD bits.  SPEED_P says whether we are testing
172   for speed or size profitability.
173
174   Use the rtxes in RTXES to calculate costs.  WORD_MOVE_ZERO_COST is
175   the cost of moving zero into a word-mode register.  WORD_MOVE_COST
176   is the cost of moving between word registers.  */
177
178static void
179compute_splitting_shift (bool speed_p, struct cost_rtxes *rtxes,
180			 bool *splitting, enum rtx_code code,
181			 int word_move_zero_cost, int word_move_cost)
182{
183  int wide_cost, narrow_cost, upper_cost, i;
184
185  for (i = 0; i < BITS_PER_WORD; i++)
186    {
187      wide_cost = shift_cost (speed_p, rtxes, code, twice_word_mode,
188			      i + BITS_PER_WORD);
189      if (i == 0)
190	narrow_cost = word_move_cost;
191      else
192	narrow_cost = shift_cost (speed_p, rtxes, code, word_mode, i);
193
194      if (code != ASHIFTRT)
195	upper_cost = word_move_zero_cost;
196      else if (i == BITS_PER_WORD - 1)
197	upper_cost = word_move_cost;
198      else
199	upper_cost = shift_cost (speed_p, rtxes, code, word_mode,
200				 BITS_PER_WORD - 1);
201
202      if (LOG_COSTS)
203	fprintf (stderr, "%s %s by %d: original cost %d, split cost %d + %d\n",
204		 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (code),
205		 i + BITS_PER_WORD, wide_cost, narrow_cost, upper_cost);
206
207      if (FORCE_LOWERING || wide_cost >= narrow_cost + upper_cost)
208	splitting[i] = true;
209    }
210}
211
212/* Compute what we should do when optimizing for speed or size; SPEED_P
213   selects which.  Use RTXES for computing costs.  */
214
215static void
216compute_costs (bool speed_p, struct cost_rtxes *rtxes)
217{
218  unsigned int i;
219  int word_move_zero_cost, word_move_cost;
220
221  PUT_MODE (rtxes->target, word_mode);
222  SET_SRC (rtxes->set) = CONST0_RTX (word_mode);
223  word_move_zero_cost = set_rtx_cost (rtxes->set, speed_p);
224
225  SET_SRC (rtxes->set) = rtxes->source;
226  word_move_cost = set_rtx_cost (rtxes->set, speed_p);
227
228  if (LOG_COSTS)
229    fprintf (stderr, "%s move: from zero cost %d, from reg cost %d\n",
230	     GET_MODE_NAME (word_mode), word_move_zero_cost, word_move_cost);
231
232  for (i = 0; i < MAX_MACHINE_MODE; i++)
233    {
234      machine_mode mode = (machine_mode) i;
235      int factor = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
236      if (factor > 1)
237	{
238	  int mode_move_cost;
239
240	  PUT_MODE (rtxes->target, mode);
241	  PUT_MODE (rtxes->source, mode);
242	  mode_move_cost = set_rtx_cost (rtxes->set, speed_p);
243
244	  if (LOG_COSTS)
245	    fprintf (stderr, "%s move: original cost %d, split cost %d * %d\n",
246		     GET_MODE_NAME (mode), mode_move_cost,
247		     word_move_cost, factor);
248
249	  if (FORCE_LOWERING || mode_move_cost >= word_move_cost * factor)
250	    {
251	      choices[speed_p].move_modes_to_split[i] = true;
252	      choices[speed_p].something_to_do = true;
253	    }
254	}
255    }
256
257  /* For the moves and shifts, the only case that is checked is one
258     where the mode of the target is an integer mode twice the width
259     of the word_mode.
260
261     If it is not profitable to split a double word move then do not
262     even consider the shifts or the zero extension.  */
263  if (choices[speed_p].move_modes_to_split[(int) twice_word_mode])
264    {
265      int zext_cost;
266
267      /* The only case here to check to see if moving the upper part with a
268	 zero is cheaper than doing the zext itself.  */
269      PUT_MODE (rtxes->source, word_mode);
270      zext_cost = set_src_cost (rtxes->zext, speed_p);
271
272      if (LOG_COSTS)
273	fprintf (stderr, "%s %s: original cost %d, split cost %d + %d\n",
274		 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (ZERO_EXTEND),
275		 zext_cost, word_move_cost, word_move_zero_cost);
276
277      if (FORCE_LOWERING || zext_cost >= word_move_cost + word_move_zero_cost)
278	choices[speed_p].splitting_zext = true;
279
280      compute_splitting_shift (speed_p, rtxes,
281			       choices[speed_p].splitting_ashift, ASHIFT,
282			       word_move_zero_cost, word_move_cost);
283      compute_splitting_shift (speed_p, rtxes,
284			       choices[speed_p].splitting_lshiftrt, LSHIFTRT,
285			       word_move_zero_cost, word_move_cost);
286      compute_splitting_shift (speed_p, rtxes,
287			       choices[speed_p].splitting_ashiftrt, ASHIFTRT,
288			       word_move_zero_cost, word_move_cost);
289    }
290}
291
292/* Do one-per-target initialisation.  This involves determining
293   which operations on the machine are profitable.  If none are found,
294   then the pass just returns when called.  */
295
296void
297init_lower_subreg (void)
298{
299  struct cost_rtxes rtxes;
300
301  memset (this_target_lower_subreg, 0, sizeof (*this_target_lower_subreg));
302
303  twice_word_mode = GET_MODE_2XWIDER_MODE (word_mode);
304
305  rtxes.target = gen_rtx_REG (word_mode, FIRST_PSEUDO_REGISTER);
306  rtxes.source = gen_rtx_REG (word_mode, FIRST_PSEUDO_REGISTER + 1);
307  rtxes.set = gen_rtx_SET (VOIDmode, rtxes.target, rtxes.source);
308  rtxes.zext = gen_rtx_ZERO_EXTEND (twice_word_mode, rtxes.source);
309  rtxes.shift = gen_rtx_ASHIFT (twice_word_mode, rtxes.source, const0_rtx);
310
311  if (LOG_COSTS)
312    fprintf (stderr, "\nSize costs\n==========\n\n");
313  compute_costs (false, &rtxes);
314
315  if (LOG_COSTS)
316    fprintf (stderr, "\nSpeed costs\n===========\n\n");
317  compute_costs (true, &rtxes);
318}
319
320static bool
321simple_move_operand (rtx x)
322{
323  if (GET_CODE (x) == SUBREG)
324    x = SUBREG_REG (x);
325
326  if (!OBJECT_P (x))
327    return false;
328
329  if (GET_CODE (x) == LABEL_REF
330      || GET_CODE (x) == SYMBOL_REF
331      || GET_CODE (x) == HIGH
332      || GET_CODE (x) == CONST)
333    return false;
334
335  if (MEM_P (x)
336      && (MEM_VOLATILE_P (x)
337	  || mode_dependent_address_p (XEXP (x, 0), MEM_ADDR_SPACE (x))))
338    return false;
339
340  return true;
341}
342
343/* If INSN is a single set between two objects that we want to split,
344   return the single set.  SPEED_P says whether we are optimizing
345   INSN for speed or size.
346
347   INSN should have been passed to recog and extract_insn before this
348   is called.  */
349
350static rtx
351simple_move (rtx_insn *insn, bool speed_p)
352{
353  rtx x;
354  rtx set;
355  machine_mode mode;
356
357  if (recog_data.n_operands != 2)
358    return NULL_RTX;
359
360  set = single_set (insn);
361  if (!set)
362    return NULL_RTX;
363
364  x = SET_DEST (set);
365  if (x != recog_data.operand[0] && x != recog_data.operand[1])
366    return NULL_RTX;
367  if (!simple_move_operand (x))
368    return NULL_RTX;
369
370  x = SET_SRC (set);
371  if (x != recog_data.operand[0] && x != recog_data.operand[1])
372    return NULL_RTX;
373  /* For the src we can handle ASM_OPERANDS, and it is beneficial for
374     things like x86 rdtsc which returns a DImode value.  */
375  if (GET_CODE (x) != ASM_OPERANDS
376      && !simple_move_operand (x))
377    return NULL_RTX;
378
379  /* We try to decompose in integer modes, to avoid generating
380     inefficient code copying between integer and floating point
381     registers.  That means that we can't decompose if this is a
382     non-integer mode for which there is no integer mode of the same
383     size.  */
384  mode = GET_MODE (SET_DEST (set));
385  if (!SCALAR_INT_MODE_P (mode)
386      && (mode_for_size (GET_MODE_SIZE (mode) * BITS_PER_UNIT, MODE_INT, 0)
387	  == BLKmode))
388    return NULL_RTX;
389
390  /* Reject PARTIAL_INT modes.  They are used for processor specific
391     purposes and it's probably best not to tamper with them.  */
392  if (GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
393    return NULL_RTX;
394
395  if (!choices[speed_p].move_modes_to_split[(int) mode])
396    return NULL_RTX;
397
398  return set;
399}
400
401/* If SET is a copy from one multi-word pseudo-register to another,
402   record that in reg_copy_graph.  Return whether it is such a
403   copy.  */
404
405static bool
406find_pseudo_copy (rtx set)
407{
408  rtx dest = SET_DEST (set);
409  rtx src = SET_SRC (set);
410  unsigned int rd, rs;
411  bitmap b;
412
413  if (!REG_P (dest) || !REG_P (src))
414    return false;
415
416  rd = REGNO (dest);
417  rs = REGNO (src);
418  if (HARD_REGISTER_NUM_P (rd) || HARD_REGISTER_NUM_P (rs))
419    return false;
420
421  b = reg_copy_graph[rs];
422  if (b == NULL)
423    {
424      b = BITMAP_ALLOC (NULL);
425      reg_copy_graph[rs] = b;
426    }
427
428  bitmap_set_bit (b, rd);
429
430  return true;
431}
432
433/* Look through the registers in DECOMPOSABLE_CONTEXT.  For each case
434   where they are copied to another register, add the register to
435   which they are copied to DECOMPOSABLE_CONTEXT.  Use
436   NON_DECOMPOSABLE_CONTEXT to limit this--we don't bother to track
437   copies of registers which are in NON_DECOMPOSABLE_CONTEXT.  */
438
439static void
440propagate_pseudo_copies (void)
441{
442  bitmap queue, propagate;
443
444  queue = BITMAP_ALLOC (NULL);
445  propagate = BITMAP_ALLOC (NULL);
446
447  bitmap_copy (queue, decomposable_context);
448  do
449    {
450      bitmap_iterator iter;
451      unsigned int i;
452
453      bitmap_clear (propagate);
454
455      EXECUTE_IF_SET_IN_BITMAP (queue, 0, i, iter)
456	{
457	  bitmap b = reg_copy_graph[i];
458	  if (b)
459	    bitmap_ior_and_compl_into (propagate, b, non_decomposable_context);
460	}
461
462      bitmap_and_compl (queue, propagate, decomposable_context);
463      bitmap_ior_into (decomposable_context, propagate);
464    }
465  while (!bitmap_empty_p (queue));
466
467  BITMAP_FREE (queue);
468  BITMAP_FREE (propagate);
469}
470
471/* A pointer to one of these values is passed to
472   find_decomposable_subregs.  */
473
474enum classify_move_insn
475{
476  /* Not a simple move from one location to another.  */
477  NOT_SIMPLE_MOVE,
478  /* A simple move we want to decompose.  */
479  DECOMPOSABLE_SIMPLE_MOVE,
480  /* Any other simple move.  */
481  SIMPLE_MOVE
482};
483
484/* If we find a SUBREG in *LOC which we could use to decompose a
485   pseudo-register, set a bit in DECOMPOSABLE_CONTEXT.  If we find an
486   unadorned register which is not a simple pseudo-register copy,
487   DATA will point at the type of move, and we set a bit in
488   DECOMPOSABLE_CONTEXT or NON_DECOMPOSABLE_CONTEXT as appropriate.  */
489
490static void
491find_decomposable_subregs (rtx *loc, enum classify_move_insn *pcmi)
492{
493  subrtx_var_iterator::array_type array;
494  FOR_EACH_SUBRTX_VAR (iter, array, *loc, NONCONST)
495    {
496      rtx x = *iter;
497      if (GET_CODE (x) == SUBREG)
498	{
499	  rtx inner = SUBREG_REG (x);
500	  unsigned int regno, outer_size, inner_size, outer_words, inner_words;
501
502	  if (!REG_P (inner))
503	    continue;
504
505	  regno = REGNO (inner);
506	  if (HARD_REGISTER_NUM_P (regno))
507	    {
508	      iter.skip_subrtxes ();
509	      continue;
510	    }
511
512	  outer_size = GET_MODE_SIZE (GET_MODE (x));
513	  inner_size = GET_MODE_SIZE (GET_MODE (inner));
514	  outer_words = (outer_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
515	  inner_words = (inner_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
516
517	  /* We only try to decompose single word subregs of multi-word
518	     registers.  When we find one, we return -1 to avoid iterating
519	     over the inner register.
520
521	     ??? This doesn't allow, e.g., DImode subregs of TImode values
522	     on 32-bit targets.  We would need to record the way the
523	     pseudo-register was used, and only decompose if all the uses
524	     were the same number and size of pieces.  Hopefully this
525	     doesn't happen much.  */
526
527	  if (outer_words == 1 && inner_words > 1)
528	    {
529	      bitmap_set_bit (decomposable_context, regno);
530	      iter.skip_subrtxes ();
531	      continue;
532	    }
533
534	  /* If this is a cast from one mode to another, where the modes
535	     have the same size, and they are not tieable, then mark this
536	     register as non-decomposable.  If we decompose it we are
537	     likely to mess up whatever the backend is trying to do.  */
538	  if (outer_words > 1
539	      && outer_size == inner_size
540	      && !MODES_TIEABLE_P (GET_MODE (x), GET_MODE (inner)))
541	    {
542	      bitmap_set_bit (non_decomposable_context, regno);
543	      bitmap_set_bit (subreg_context, regno);
544	      iter.skip_subrtxes ();
545	      continue;
546	    }
547	}
548      else if (REG_P (x))
549	{
550	  unsigned int regno;
551
552	  /* We will see an outer SUBREG before we see the inner REG, so
553	     when we see a plain REG here it means a direct reference to
554	     the register.
555
556	     If this is not a simple copy from one location to another,
557	     then we can not decompose this register.  If this is a simple
558	     copy we want to decompose, and the mode is right,
559	     then we mark the register as decomposable.
560	     Otherwise we don't say anything about this register --
561	     it could be decomposed, but whether that would be
562	     profitable depends upon how it is used elsewhere.
563
564	     We only set bits in the bitmap for multi-word
565	     pseudo-registers, since those are the only ones we care about
566	     and it keeps the size of the bitmaps down.  */
567
568	  regno = REGNO (x);
569	  if (!HARD_REGISTER_NUM_P (regno)
570	      && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
571	    {
572	      switch (*pcmi)
573		{
574		case NOT_SIMPLE_MOVE:
575		  bitmap_set_bit (non_decomposable_context, regno);
576		  break;
577		case DECOMPOSABLE_SIMPLE_MOVE:
578		  if (MODES_TIEABLE_P (GET_MODE (x), word_mode))
579		    bitmap_set_bit (decomposable_context, regno);
580		  break;
581		case SIMPLE_MOVE:
582		  break;
583		default:
584		  gcc_unreachable ();
585		}
586	    }
587	}
588      else if (MEM_P (x))
589	{
590	  enum classify_move_insn cmi_mem = NOT_SIMPLE_MOVE;
591
592	  /* Any registers used in a MEM do not participate in a
593	     SIMPLE_MOVE or DECOMPOSABLE_SIMPLE_MOVE.  Do our own recursion
594	     here, and return -1 to block the parent's recursion.  */
595	  find_decomposable_subregs (&XEXP (x, 0), &cmi_mem);
596	  iter.skip_subrtxes ();
597	}
598    }
599}
600
601/* Decompose REGNO into word-sized components.  We smash the REG node
602   in place.  This ensures that (1) something goes wrong quickly if we
603   fail to make some replacement, and (2) the debug information inside
604   the symbol table is automatically kept up to date.  */
605
606static void
607decompose_register (unsigned int regno)
608{
609  rtx reg;
610  unsigned int words, i;
611  rtvec v;
612
613  reg = regno_reg_rtx[regno];
614
615  regno_reg_rtx[regno] = NULL_RTX;
616
617  words = GET_MODE_SIZE (GET_MODE (reg));
618  words = (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
619
620  v = rtvec_alloc (words);
621  for (i = 0; i < words; ++i)
622    RTVEC_ELT (v, i) = gen_reg_rtx_offset (reg, word_mode, i * UNITS_PER_WORD);
623
624  PUT_CODE (reg, CONCATN);
625  XVEC (reg, 0) = v;
626
627  if (dump_file)
628    {
629      fprintf (dump_file, "; Splitting reg %u ->", regno);
630      for (i = 0; i < words; ++i)
631	fprintf (dump_file, " %u", REGNO (XVECEXP (reg, 0, i)));
632      fputc ('\n', dump_file);
633    }
634}
635
636/* Get a SUBREG of a CONCATN.  */
637
638static rtx
639simplify_subreg_concatn (machine_mode outermode, rtx op,
640			 unsigned int byte)
641{
642  unsigned int inner_size;
643  machine_mode innermode, partmode;
644  rtx part;
645  unsigned int final_offset;
646
647  gcc_assert (GET_CODE (op) == CONCATN);
648  gcc_assert (byte % GET_MODE_SIZE (outermode) == 0);
649
650  innermode = GET_MODE (op);
651  gcc_assert (byte < GET_MODE_SIZE (innermode));
652  gcc_assert (GET_MODE_SIZE (outermode) <= GET_MODE_SIZE (innermode));
653
654  inner_size = GET_MODE_SIZE (innermode) / XVECLEN (op, 0);
655  part = XVECEXP (op, 0, byte / inner_size);
656  partmode = GET_MODE (part);
657
658  /* VECTOR_CSTs in debug expressions are expanded into CONCATN instead of
659     regular CONST_VECTORs.  They have vector or integer modes, depending
660     on the capabilities of the target.  Cope with them.  */
661  if (partmode == VOIDmode && VECTOR_MODE_P (innermode))
662    partmode = GET_MODE_INNER (innermode);
663  else if (partmode == VOIDmode)
664    {
665      enum mode_class mclass = GET_MODE_CLASS (innermode);
666      partmode = mode_for_size (inner_size * BITS_PER_UNIT, mclass, 0);
667    }
668
669  final_offset = byte % inner_size;
670  if (final_offset + GET_MODE_SIZE (outermode) > inner_size)
671    return NULL_RTX;
672
673  return simplify_gen_subreg (outermode, part, partmode, final_offset);
674}
675
676/* Wrapper around simplify_gen_subreg which handles CONCATN.  */
677
678static rtx
679simplify_gen_subreg_concatn (machine_mode outermode, rtx op,
680			     machine_mode innermode, unsigned int byte)
681{
682  rtx ret;
683
684  /* We have to handle generating a SUBREG of a SUBREG of a CONCATN.
685     If OP is a SUBREG of a CONCATN, then it must be a simple mode
686     change with the same size and offset 0, or it must extract a
687     part.  We shouldn't see anything else here.  */
688  if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == CONCATN)
689    {
690      rtx op2;
691
692      if ((GET_MODE_SIZE (GET_MODE (op))
693	   == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))))
694	  && SUBREG_BYTE (op) == 0)
695	return simplify_gen_subreg_concatn (outermode, SUBREG_REG (op),
696					    GET_MODE (SUBREG_REG (op)), byte);
697
698      op2 = simplify_subreg_concatn (GET_MODE (op), SUBREG_REG (op),
699				     SUBREG_BYTE (op));
700      if (op2 == NULL_RTX)
701	{
702	  /* We don't handle paradoxical subregs here.  */
703	  gcc_assert (GET_MODE_SIZE (outermode)
704		      <= GET_MODE_SIZE (GET_MODE (op)));
705	  gcc_assert (GET_MODE_SIZE (GET_MODE (op))
706		      <= GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))));
707	  op2 = simplify_subreg_concatn (outermode, SUBREG_REG (op),
708					 byte + SUBREG_BYTE (op));
709	  gcc_assert (op2 != NULL_RTX);
710	  return op2;
711	}
712
713      op = op2;
714      gcc_assert (op != NULL_RTX);
715      gcc_assert (innermode == GET_MODE (op));
716    }
717
718  if (GET_CODE (op) == CONCATN)
719    return simplify_subreg_concatn (outermode, op, byte);
720
721  ret = simplify_gen_subreg (outermode, op, innermode, byte);
722
723  /* If we see an insn like (set (reg:DI) (subreg:DI (reg:SI) 0)) then
724     resolve_simple_move will ask for the high part of the paradoxical
725     subreg, which does not have a value.  Just return a zero.  */
726  if (ret == NULL_RTX
727      && GET_CODE (op) == SUBREG
728      && SUBREG_BYTE (op) == 0
729      && (GET_MODE_SIZE (innermode)
730	  > GET_MODE_SIZE (GET_MODE (SUBREG_REG (op)))))
731    return CONST0_RTX (outermode);
732
733  gcc_assert (ret != NULL_RTX);
734  return ret;
735}
736
737/* Return whether we should resolve X into the registers into which it
738   was decomposed.  */
739
740static bool
741resolve_reg_p (rtx x)
742{
743  return GET_CODE (x) == CONCATN;
744}
745
746/* Return whether X is a SUBREG of a register which we need to
747   resolve.  */
748
749static bool
750resolve_subreg_p (rtx x)
751{
752  if (GET_CODE (x) != SUBREG)
753    return false;
754  return resolve_reg_p (SUBREG_REG (x));
755}
756
757/* Look for SUBREGs in *LOC which need to be decomposed.  */
758
759static bool
760resolve_subreg_use (rtx *loc, rtx insn)
761{
762  subrtx_ptr_iterator::array_type array;
763  FOR_EACH_SUBRTX_PTR (iter, array, loc, NONCONST)
764    {
765      rtx *loc = *iter;
766      rtx x = *loc;
767      if (resolve_subreg_p (x))
768	{
769	  x = simplify_subreg_concatn (GET_MODE (x), SUBREG_REG (x),
770				       SUBREG_BYTE (x));
771
772	  /* It is possible for a note to contain a reference which we can
773	     decompose.  In this case, return 1 to the caller to indicate
774	     that the note must be removed.  */
775	  if (!x)
776	    {
777	      gcc_assert (!insn);
778	      return true;
779	    }
780
781	  validate_change (insn, loc, x, 1);
782	  iter.skip_subrtxes ();
783	}
784      else if (resolve_reg_p (x))
785	/* Return 1 to the caller to indicate that we found a direct
786	   reference to a register which is being decomposed.  This can
787	   happen inside notes, multiword shift or zero-extend
788	   instructions.  */
789	return true;
790    }
791
792  return false;
793}
794
795/* Resolve any decomposed registers which appear in register notes on
796   INSN.  */
797
798static void
799resolve_reg_notes (rtx_insn *insn)
800{
801  rtx *pnote, note;
802
803  note = find_reg_equal_equiv_note (insn);
804  if (note)
805    {
806      int old_count = num_validated_changes ();
807      if (resolve_subreg_use (&XEXP (note, 0), NULL_RTX))
808	remove_note (insn, note);
809      else
810	if (old_count != num_validated_changes ())
811	  df_notes_rescan (insn);
812    }
813
814  pnote = &REG_NOTES (insn);
815  while (*pnote != NULL_RTX)
816    {
817      bool del = false;
818
819      note = *pnote;
820      switch (REG_NOTE_KIND (note))
821	{
822	case REG_DEAD:
823	case REG_UNUSED:
824	  if (resolve_reg_p (XEXP (note, 0)))
825	    del = true;
826	  break;
827
828	default:
829	  break;
830	}
831
832      if (del)
833	*pnote = XEXP (note, 1);
834      else
835	pnote = &XEXP (note, 1);
836    }
837}
838
839/* Return whether X can be decomposed into subwords.  */
840
841static bool
842can_decompose_p (rtx x)
843{
844  if (REG_P (x))
845    {
846      unsigned int regno = REGNO (x);
847
848      if (HARD_REGISTER_NUM_P (regno))
849	{
850	  unsigned int byte, num_bytes;
851
852	  num_bytes = GET_MODE_SIZE (GET_MODE (x));
853	  for (byte = 0; byte < num_bytes; byte += UNITS_PER_WORD)
854	    if (simplify_subreg_regno (regno, GET_MODE (x), byte, word_mode) < 0)
855	      return false;
856	  return true;
857	}
858      else
859	return !bitmap_bit_p (subreg_context, regno);
860    }
861
862  return true;
863}
864
865/* Decompose the registers used in a simple move SET within INSN.  If
866   we don't change anything, return INSN, otherwise return the start
867   of the sequence of moves.  */
868
869static rtx_insn *
870resolve_simple_move (rtx set, rtx_insn *insn)
871{
872  rtx src, dest, real_dest;
873  rtx_insn *insns;
874  machine_mode orig_mode, dest_mode;
875  unsigned int words;
876  bool pushing;
877
878  src = SET_SRC (set);
879  dest = SET_DEST (set);
880  orig_mode = GET_MODE (dest);
881
882  words = (GET_MODE_SIZE (orig_mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
883  gcc_assert (words > 1);
884
885  start_sequence ();
886
887  /* We have to handle copying from a SUBREG of a decomposed reg where
888     the SUBREG is larger than word size.  Rather than assume that we
889     can take a word_mode SUBREG of the destination, we copy to a new
890     register and then copy that to the destination.  */
891
892  real_dest = NULL_RTX;
893
894  if (GET_CODE (src) == SUBREG
895      && resolve_reg_p (SUBREG_REG (src))
896      && (SUBREG_BYTE (src) != 0
897	  || (GET_MODE_SIZE (orig_mode)
898	      != GET_MODE_SIZE (GET_MODE (SUBREG_REG (src))))))
899    {
900      real_dest = dest;
901      dest = gen_reg_rtx (orig_mode);
902      if (REG_P (real_dest))
903	REG_ATTRS (dest) = REG_ATTRS (real_dest);
904    }
905
906  /* Similarly if we are copying to a SUBREG of a decomposed reg where
907     the SUBREG is larger than word size.  */
908
909  if (GET_CODE (dest) == SUBREG
910      && resolve_reg_p (SUBREG_REG (dest))
911      && (SUBREG_BYTE (dest) != 0
912	  || (GET_MODE_SIZE (orig_mode)
913	      != GET_MODE_SIZE (GET_MODE (SUBREG_REG (dest))))))
914    {
915      rtx reg, smove;
916      rtx_insn *minsn;
917
918      reg = gen_reg_rtx (orig_mode);
919      minsn = emit_move_insn (reg, src);
920      smove = single_set (minsn);
921      gcc_assert (smove != NULL_RTX);
922      resolve_simple_move (smove, minsn);
923      src = reg;
924    }
925
926  /* If we didn't have any big SUBREGS of decomposed registers, and
927     neither side of the move is a register we are decomposing, then
928     we don't have to do anything here.  */
929
930  if (src == SET_SRC (set)
931      && dest == SET_DEST (set)
932      && !resolve_reg_p (src)
933      && !resolve_subreg_p (src)
934      && !resolve_reg_p (dest)
935      && !resolve_subreg_p (dest))
936    {
937      end_sequence ();
938      return insn;
939    }
940
941  /* It's possible for the code to use a subreg of a decomposed
942     register while forming an address.  We need to handle that before
943     passing the address to emit_move_insn.  We pass NULL_RTX as the
944     insn parameter to resolve_subreg_use because we can not validate
945     the insn yet.  */
946  if (MEM_P (src) || MEM_P (dest))
947    {
948      int acg;
949
950      if (MEM_P (src))
951	resolve_subreg_use (&XEXP (src, 0), NULL_RTX);
952      if (MEM_P (dest))
953	resolve_subreg_use (&XEXP (dest, 0), NULL_RTX);
954      acg = apply_change_group ();
955      gcc_assert (acg);
956    }
957
958  /* If SRC is a register which we can't decompose, or has side
959     effects, we need to move via a temporary register.  */
960
961  if (!can_decompose_p (src)
962      || side_effects_p (src)
963      || GET_CODE (src) == ASM_OPERANDS)
964    {
965      rtx reg;
966
967      reg = gen_reg_rtx (orig_mode);
968
969#ifdef AUTO_INC_DEC
970      {
971	rtx move = emit_move_insn (reg, src);
972	if (MEM_P (src))
973	  {
974	    rtx note = find_reg_note (insn, REG_INC, NULL_RTX);
975	    if (note)
976	      add_reg_note (move, REG_INC, XEXP (note, 0));
977	  }
978      }
979#else
980      emit_move_insn (reg, src);
981#endif
982      src = reg;
983    }
984
985  /* If DEST is a register which we can't decompose, or has side
986     effects, we need to first move to a temporary register.  We
987     handle the common case of pushing an operand directly.  We also
988     go through a temporary register if it holds a floating point
989     value.  This gives us better code on systems which can't move
990     data easily between integer and floating point registers.  */
991
992  dest_mode = orig_mode;
993  pushing = push_operand (dest, dest_mode);
994  if (!can_decompose_p (dest)
995      || (side_effects_p (dest) && !pushing)
996      || (!SCALAR_INT_MODE_P (dest_mode)
997	  && !resolve_reg_p (dest)
998	  && !resolve_subreg_p (dest)))
999    {
1000      if (real_dest == NULL_RTX)
1001	real_dest = dest;
1002      if (!SCALAR_INT_MODE_P (dest_mode))
1003	{
1004	  dest_mode = mode_for_size (GET_MODE_SIZE (dest_mode) * BITS_PER_UNIT,
1005				     MODE_INT, 0);
1006	  gcc_assert (dest_mode != BLKmode);
1007	}
1008      dest = gen_reg_rtx (dest_mode);
1009      if (REG_P (real_dest))
1010	REG_ATTRS (dest) = REG_ATTRS (real_dest);
1011    }
1012
1013  if (pushing)
1014    {
1015      unsigned int i, j, jinc;
1016
1017      gcc_assert (GET_MODE_SIZE (orig_mode) % UNITS_PER_WORD == 0);
1018      gcc_assert (GET_CODE (XEXP (dest, 0)) != PRE_MODIFY);
1019      gcc_assert (GET_CODE (XEXP (dest, 0)) != POST_MODIFY);
1020
1021      if (WORDS_BIG_ENDIAN == STACK_GROWS_DOWNWARD)
1022	{
1023	  j = 0;
1024	  jinc = 1;
1025	}
1026      else
1027	{
1028	  j = words - 1;
1029	  jinc = -1;
1030	}
1031
1032      for (i = 0; i < words; ++i, j += jinc)
1033	{
1034	  rtx temp;
1035
1036	  temp = copy_rtx (XEXP (dest, 0));
1037	  temp = adjust_automodify_address_nv (dest, word_mode, temp,
1038					       j * UNITS_PER_WORD);
1039	  emit_move_insn (temp,
1040			  simplify_gen_subreg_concatn (word_mode, src,
1041						       orig_mode,
1042						       j * UNITS_PER_WORD));
1043	}
1044    }
1045  else
1046    {
1047      unsigned int i;
1048
1049      if (REG_P (dest) && !HARD_REGISTER_NUM_P (REGNO (dest)))
1050	emit_clobber (dest);
1051
1052      for (i = 0; i < words; ++i)
1053	emit_move_insn (simplify_gen_subreg_concatn (word_mode, dest,
1054						     dest_mode,
1055						     i * UNITS_PER_WORD),
1056			simplify_gen_subreg_concatn (word_mode, src,
1057						     orig_mode,
1058						     i * UNITS_PER_WORD));
1059    }
1060
1061  if (real_dest != NULL_RTX)
1062    {
1063      rtx mdest, smove;
1064      rtx_insn *minsn;
1065
1066      if (dest_mode == orig_mode)
1067	mdest = dest;
1068      else
1069	mdest = simplify_gen_subreg (orig_mode, dest, GET_MODE (dest), 0);
1070      minsn = emit_move_insn (real_dest, mdest);
1071
1072#ifdef AUTO_INC_DEC
1073  if (MEM_P (real_dest)
1074      && !(resolve_reg_p (real_dest) || resolve_subreg_p (real_dest)))
1075    {
1076      rtx note = find_reg_note (insn, REG_INC, NULL_RTX);
1077      if (note)
1078	add_reg_note (minsn, REG_INC, XEXP (note, 0));
1079    }
1080#endif
1081
1082      smove = single_set (minsn);
1083      gcc_assert (smove != NULL_RTX);
1084
1085      resolve_simple_move (smove, minsn);
1086    }
1087
1088  insns = get_insns ();
1089  end_sequence ();
1090
1091  copy_reg_eh_region_note_forward (insn, insns, NULL_RTX);
1092
1093  emit_insn_before (insns, insn);
1094
1095  /* If we get here via self-recursion, then INSN is not yet in the insns
1096     chain and delete_insn will fail.  We only want to remove INSN from the
1097     current sequence.  See PR56738.  */
1098  if (in_sequence_p ())
1099    remove_insn (insn);
1100  else
1101    delete_insn (insn);
1102
1103  return insns;
1104}
1105
1106/* Change a CLOBBER of a decomposed register into a CLOBBER of the
1107   component registers.  Return whether we changed something.  */
1108
1109static bool
1110resolve_clobber (rtx pat, rtx_insn *insn)
1111{
1112  rtx reg;
1113  machine_mode orig_mode;
1114  unsigned int words, i;
1115  int ret;
1116
1117  reg = XEXP (pat, 0);
1118  if (!resolve_reg_p (reg) && !resolve_subreg_p (reg))
1119    return false;
1120
1121  orig_mode = GET_MODE (reg);
1122  words = GET_MODE_SIZE (orig_mode);
1123  words = (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1124
1125  ret = validate_change (NULL_RTX, &XEXP (pat, 0),
1126			 simplify_gen_subreg_concatn (word_mode, reg,
1127						      orig_mode, 0),
1128			 0);
1129  df_insn_rescan (insn);
1130  gcc_assert (ret != 0);
1131
1132  for (i = words - 1; i > 0; --i)
1133    {
1134      rtx x;
1135
1136      x = simplify_gen_subreg_concatn (word_mode, reg, orig_mode,
1137				       i * UNITS_PER_WORD);
1138      x = gen_rtx_CLOBBER (VOIDmode, x);
1139      emit_insn_after (x, insn);
1140    }
1141
1142  resolve_reg_notes (insn);
1143
1144  return true;
1145}
1146
1147/* A USE of a decomposed register is no longer meaningful.  Return
1148   whether we changed something.  */
1149
1150static bool
1151resolve_use (rtx pat, rtx_insn *insn)
1152{
1153  if (resolve_reg_p (XEXP (pat, 0)) || resolve_subreg_p (XEXP (pat, 0)))
1154    {
1155      delete_insn (insn);
1156      return true;
1157    }
1158
1159  resolve_reg_notes (insn);
1160
1161  return false;
1162}
1163
1164/* A VAR_LOCATION can be simplified.  */
1165
1166static void
1167resolve_debug (rtx_insn *insn)
1168{
1169  subrtx_ptr_iterator::array_type array;
1170  FOR_EACH_SUBRTX_PTR (iter, array, &PATTERN (insn), NONCONST)
1171    {
1172      rtx *loc = *iter;
1173      rtx x = *loc;
1174      if (resolve_subreg_p (x))
1175	{
1176	  x = simplify_subreg_concatn (GET_MODE (x), SUBREG_REG (x),
1177				       SUBREG_BYTE (x));
1178
1179	  if (x)
1180	    *loc = x;
1181	  else
1182	    x = copy_rtx (*loc);
1183	}
1184      if (resolve_reg_p (x))
1185	*loc = copy_rtx (x);
1186    }
1187
1188  df_insn_rescan (insn);
1189
1190  resolve_reg_notes (insn);
1191}
1192
1193/* Check if INSN is a decomposable multiword-shift or zero-extend and
1194   set the decomposable_context bitmap accordingly.  SPEED_P is true
1195   if we are optimizing INSN for speed rather than size.  Return true
1196   if INSN is decomposable.  */
1197
1198static bool
1199find_decomposable_shift_zext (rtx_insn *insn, bool speed_p)
1200{
1201  rtx set;
1202  rtx op;
1203  rtx op_operand;
1204
1205  set = single_set (insn);
1206  if (!set)
1207    return false;
1208
1209  op = SET_SRC (set);
1210  if (GET_CODE (op) != ASHIFT
1211      && GET_CODE (op) != LSHIFTRT
1212      && GET_CODE (op) != ASHIFTRT
1213      && GET_CODE (op) != ZERO_EXTEND)
1214    return false;
1215
1216  op_operand = XEXP (op, 0);
1217  if (!REG_P (SET_DEST (set)) || !REG_P (op_operand)
1218      || HARD_REGISTER_NUM_P (REGNO (SET_DEST (set)))
1219      || HARD_REGISTER_NUM_P (REGNO (op_operand))
1220      || GET_MODE (op) != twice_word_mode)
1221    return false;
1222
1223  if (GET_CODE (op) == ZERO_EXTEND)
1224    {
1225      if (GET_MODE (op_operand) != word_mode
1226	  || !choices[speed_p].splitting_zext)
1227	return false;
1228    }
1229  else /* left or right shift */
1230    {
1231      bool *splitting = (GET_CODE (op) == ASHIFT
1232			 ? choices[speed_p].splitting_ashift
1233			 : GET_CODE (op) == ASHIFTRT
1234			 ? choices[speed_p].splitting_ashiftrt
1235			 : choices[speed_p].splitting_lshiftrt);
1236      if (!CONST_INT_P (XEXP (op, 1))
1237	  || !IN_RANGE (INTVAL (XEXP (op, 1)), BITS_PER_WORD,
1238			2 * BITS_PER_WORD - 1)
1239	  || !splitting[INTVAL (XEXP (op, 1)) - BITS_PER_WORD])
1240	return false;
1241
1242      bitmap_set_bit (decomposable_context, REGNO (op_operand));
1243    }
1244
1245  bitmap_set_bit (decomposable_context, REGNO (SET_DEST (set)));
1246
1247  return true;
1248}
1249
1250/* Decompose a more than word wide shift (in INSN) of a multiword
1251   pseudo or a multiword zero-extend of a wordmode pseudo into a move
1252   and 'set to zero' insn.  Return a pointer to the new insn when a
1253   replacement was done.  */
1254
1255static rtx_insn *
1256resolve_shift_zext (rtx_insn *insn)
1257{
1258  rtx set;
1259  rtx op;
1260  rtx op_operand;
1261  rtx_insn *insns;
1262  rtx src_reg, dest_reg, dest_upper, upper_src = NULL_RTX;
1263  int src_reg_num, dest_reg_num, offset1, offset2, src_offset;
1264
1265  set = single_set (insn);
1266  if (!set)
1267    return NULL;
1268
1269  op = SET_SRC (set);
1270  if (GET_CODE (op) != ASHIFT
1271      && GET_CODE (op) != LSHIFTRT
1272      && GET_CODE (op) != ASHIFTRT
1273      && GET_CODE (op) != ZERO_EXTEND)
1274    return NULL;
1275
1276  op_operand = XEXP (op, 0);
1277
1278  /* We can tear this operation apart only if the regs were already
1279     torn apart.  */
1280  if (!resolve_reg_p (SET_DEST (set)) && !resolve_reg_p (op_operand))
1281    return NULL;
1282
1283  /* src_reg_num is the number of the word mode register which we
1284     are operating on.  For a left shift and a zero_extend on little
1285     endian machines this is register 0.  */
1286  src_reg_num = (GET_CODE (op) == LSHIFTRT || GET_CODE (op) == ASHIFTRT)
1287		? 1 : 0;
1288
1289  if (WORDS_BIG_ENDIAN
1290      && GET_MODE_SIZE (GET_MODE (op_operand)) > UNITS_PER_WORD)
1291    src_reg_num = 1 - src_reg_num;
1292
1293  if (GET_CODE (op) == ZERO_EXTEND)
1294    dest_reg_num = WORDS_BIG_ENDIAN ? 1 : 0;
1295  else
1296    dest_reg_num = 1 - src_reg_num;
1297
1298  offset1 = UNITS_PER_WORD * dest_reg_num;
1299  offset2 = UNITS_PER_WORD * (1 - dest_reg_num);
1300  src_offset = UNITS_PER_WORD * src_reg_num;
1301
1302  start_sequence ();
1303
1304  dest_reg = simplify_gen_subreg_concatn (word_mode, SET_DEST (set),
1305                                          GET_MODE (SET_DEST (set)),
1306                                          offset1);
1307  dest_upper = simplify_gen_subreg_concatn (word_mode, SET_DEST (set),
1308					    GET_MODE (SET_DEST (set)),
1309					    offset2);
1310  src_reg = simplify_gen_subreg_concatn (word_mode, op_operand,
1311                                         GET_MODE (op_operand),
1312                                         src_offset);
1313  if (GET_CODE (op) == ASHIFTRT
1314      && INTVAL (XEXP (op, 1)) != 2 * BITS_PER_WORD - 1)
1315    upper_src = expand_shift (RSHIFT_EXPR, word_mode, copy_rtx (src_reg),
1316			      BITS_PER_WORD - 1, NULL_RTX, 0);
1317
1318  if (GET_CODE (op) != ZERO_EXTEND)
1319    {
1320      int shift_count = INTVAL (XEXP (op, 1));
1321      if (shift_count > BITS_PER_WORD)
1322	src_reg = expand_shift (GET_CODE (op) == ASHIFT ?
1323				LSHIFT_EXPR : RSHIFT_EXPR,
1324				word_mode, src_reg,
1325				shift_count - BITS_PER_WORD,
1326				dest_reg, GET_CODE (op) != ASHIFTRT);
1327    }
1328
1329  if (dest_reg != src_reg)
1330    emit_move_insn (dest_reg, src_reg);
1331  if (GET_CODE (op) != ASHIFTRT)
1332    emit_move_insn (dest_upper, CONST0_RTX (word_mode));
1333  else if (INTVAL (XEXP (op, 1)) == 2 * BITS_PER_WORD - 1)
1334    emit_move_insn (dest_upper, copy_rtx (src_reg));
1335  else
1336    emit_move_insn (dest_upper, upper_src);
1337  insns = get_insns ();
1338
1339  end_sequence ();
1340
1341  emit_insn_before (insns, insn);
1342
1343  if (dump_file)
1344    {
1345      rtx_insn *in;
1346      fprintf (dump_file, "; Replacing insn: %d with insns: ", INSN_UID (insn));
1347      for (in = insns; in != insn; in = NEXT_INSN (in))
1348	fprintf (dump_file, "%d ", INSN_UID (in));
1349      fprintf (dump_file, "\n");
1350    }
1351
1352  delete_insn (insn);
1353  return insns;
1354}
1355
1356/* Print to dump_file a description of what we're doing with shift code CODE.
1357   SPLITTING[X] is true if we are splitting shifts by X + BITS_PER_WORD.  */
1358
1359static void
1360dump_shift_choices (enum rtx_code code, bool *splitting)
1361{
1362  int i;
1363  const char *sep;
1364
1365  fprintf (dump_file,
1366	   "  Splitting mode %s for %s lowering with shift amounts = ",
1367	   GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (code));
1368  sep = "";
1369  for (i = 0; i < BITS_PER_WORD; i++)
1370    if (splitting[i])
1371      {
1372	fprintf (dump_file, "%s%d", sep, i + BITS_PER_WORD);
1373	sep = ",";
1374      }
1375  fprintf (dump_file, "\n");
1376}
1377
1378/* Print to dump_file a description of what we're doing when optimizing
1379   for speed or size; SPEED_P says which.  DESCRIPTION is a description
1380   of the SPEED_P choice.  */
1381
1382static void
1383dump_choices (bool speed_p, const char *description)
1384{
1385  unsigned int i;
1386
1387  fprintf (dump_file, "Choices when optimizing for %s:\n", description);
1388
1389  for (i = 0; i < MAX_MACHINE_MODE; i++)
1390    if (GET_MODE_SIZE ((machine_mode) i) > UNITS_PER_WORD)
1391      fprintf (dump_file, "  %s mode %s for copy lowering.\n",
1392	       choices[speed_p].move_modes_to_split[i]
1393	       ? "Splitting"
1394	       : "Skipping",
1395	       GET_MODE_NAME ((machine_mode) i));
1396
1397  fprintf (dump_file, "  %s mode %s for zero_extend lowering.\n",
1398	   choices[speed_p].splitting_zext ? "Splitting" : "Skipping",
1399	   GET_MODE_NAME (twice_word_mode));
1400
1401  dump_shift_choices (ASHIFT, choices[speed_p].splitting_ashift);
1402  dump_shift_choices (LSHIFTRT, choices[speed_p].splitting_lshiftrt);
1403  dump_shift_choices (ASHIFTRT, choices[speed_p].splitting_ashiftrt);
1404  fprintf (dump_file, "\n");
1405}
1406
1407/* Look for registers which are always accessed via word-sized SUBREGs
1408   or -if DECOMPOSE_COPIES is true- via copies.  Decompose these
1409   registers into several word-sized pseudo-registers.  */
1410
1411static void
1412decompose_multiword_subregs (bool decompose_copies)
1413{
1414  unsigned int max;
1415  basic_block bb;
1416  bool speed_p;
1417
1418  if (dump_file)
1419    {
1420      dump_choices (false, "size");
1421      dump_choices (true, "speed");
1422    }
1423
1424  /* Check if this target even has any modes to consider lowering.   */
1425  if (!choices[false].something_to_do && !choices[true].something_to_do)
1426    {
1427      if (dump_file)
1428	fprintf (dump_file, "Nothing to do!\n");
1429      return;
1430    }
1431
1432  max = max_reg_num ();
1433
1434  /* First see if there are any multi-word pseudo-registers.  If there
1435     aren't, there is nothing we can do.  This should speed up this
1436     pass in the normal case, since it should be faster than scanning
1437     all the insns.  */
1438  {
1439    unsigned int i;
1440    bool useful_modes_seen = false;
1441
1442    for (i = FIRST_PSEUDO_REGISTER; i < max; ++i)
1443      if (regno_reg_rtx[i] != NULL)
1444	{
1445	  machine_mode mode = GET_MODE (regno_reg_rtx[i]);
1446	  if (choices[false].move_modes_to_split[(int) mode]
1447	      || choices[true].move_modes_to_split[(int) mode])
1448	    {
1449	      useful_modes_seen = true;
1450	      break;
1451	    }
1452	}
1453
1454    if (!useful_modes_seen)
1455      {
1456	if (dump_file)
1457	  fprintf (dump_file, "Nothing to lower in this function.\n");
1458	return;
1459      }
1460  }
1461
1462  if (df)
1463    {
1464      df_set_flags (DF_DEFER_INSN_RESCAN);
1465      run_word_dce ();
1466    }
1467
1468  /* FIXME: It may be possible to change this code to look for each
1469     multi-word pseudo-register and to find each insn which sets or
1470     uses that register.  That should be faster than scanning all the
1471     insns.  */
1472
1473  decomposable_context = BITMAP_ALLOC (NULL);
1474  non_decomposable_context = BITMAP_ALLOC (NULL);
1475  subreg_context = BITMAP_ALLOC (NULL);
1476
1477  reg_copy_graph.create (max);
1478  reg_copy_graph.safe_grow_cleared (max);
1479  memset (reg_copy_graph.address (), 0, sizeof (bitmap) * max);
1480
1481  speed_p = optimize_function_for_speed_p (cfun);
1482  FOR_EACH_BB_FN (bb, cfun)
1483    {
1484      rtx_insn *insn;
1485
1486      FOR_BB_INSNS (bb, insn)
1487	{
1488	  rtx set;
1489	  enum classify_move_insn cmi;
1490	  int i, n;
1491
1492	  if (!INSN_P (insn)
1493	      || GET_CODE (PATTERN (insn)) == CLOBBER
1494	      || GET_CODE (PATTERN (insn)) == USE)
1495	    continue;
1496
1497	  recog_memoized (insn);
1498
1499	  if (find_decomposable_shift_zext (insn, speed_p))
1500	    continue;
1501
1502	  extract_insn (insn);
1503
1504	  set = simple_move (insn, speed_p);
1505
1506	  if (!set)
1507	    cmi = NOT_SIMPLE_MOVE;
1508	  else
1509	    {
1510	      /* We mark pseudo-to-pseudo copies as decomposable during the
1511		 second pass only.  The first pass is so early that there is
1512		 good chance such moves will be optimized away completely by
1513		 subsequent optimizations anyway.
1514
1515		 However, we call find_pseudo_copy even during the first pass
1516		 so as to properly set up the reg_copy_graph.  */
1517	      if (find_pseudo_copy (set))
1518		cmi = decompose_copies? DECOMPOSABLE_SIMPLE_MOVE : SIMPLE_MOVE;
1519	      else
1520		cmi = SIMPLE_MOVE;
1521	    }
1522
1523	  n = recog_data.n_operands;
1524	  for (i = 0; i < n; ++i)
1525	    {
1526	      find_decomposable_subregs (&recog_data.operand[i], &cmi);
1527
1528	      /* We handle ASM_OPERANDS as a special case to support
1529		 things like x86 rdtsc which returns a DImode value.
1530		 We can decompose the output, which will certainly be
1531		 operand 0, but not the inputs.  */
1532
1533	      if (cmi == SIMPLE_MOVE
1534		  && GET_CODE (SET_SRC (set)) == ASM_OPERANDS)
1535		{
1536		  gcc_assert (i == 0);
1537		  cmi = NOT_SIMPLE_MOVE;
1538		}
1539	    }
1540	}
1541    }
1542
1543  bitmap_and_compl_into (decomposable_context, non_decomposable_context);
1544  if (!bitmap_empty_p (decomposable_context))
1545    {
1546      sbitmap sub_blocks;
1547      unsigned int i;
1548      sbitmap_iterator sbi;
1549      bitmap_iterator iter;
1550      unsigned int regno;
1551
1552      propagate_pseudo_copies ();
1553
1554      sub_blocks = sbitmap_alloc (last_basic_block_for_fn (cfun));
1555      bitmap_clear (sub_blocks);
1556
1557      EXECUTE_IF_SET_IN_BITMAP (decomposable_context, 0, regno, iter)
1558	decompose_register (regno);
1559
1560      FOR_EACH_BB_FN (bb, cfun)
1561	{
1562	  rtx_insn *insn;
1563
1564	  FOR_BB_INSNS (bb, insn)
1565	    {
1566	      rtx pat;
1567
1568	      if (!INSN_P (insn))
1569		continue;
1570
1571	      pat = PATTERN (insn);
1572	      if (GET_CODE (pat) == CLOBBER)
1573		resolve_clobber (pat, insn);
1574	      else if (GET_CODE (pat) == USE)
1575		resolve_use (pat, insn);
1576	      else if (DEBUG_INSN_P (insn))
1577		resolve_debug (insn);
1578	      else
1579		{
1580		  rtx set;
1581		  int i;
1582
1583		  recog_memoized (insn);
1584		  extract_insn (insn);
1585
1586		  set = simple_move (insn, speed_p);
1587		  if (set)
1588		    {
1589		      rtx_insn *orig_insn = insn;
1590		      bool cfi = control_flow_insn_p (insn);
1591
1592		      /* We can end up splitting loads to multi-word pseudos
1593			 into separate loads to machine word size pseudos.
1594			 When this happens, we first had one load that can
1595			 throw, and after resolve_simple_move we'll have a
1596			 bunch of loads (at least two).  All those loads may
1597			 trap if we can have non-call exceptions, so they
1598			 all will end the current basic block.  We split the
1599			 block after the outer loop over all insns, but we
1600			 make sure here that we will be able to split the
1601			 basic block and still produce the correct control
1602			 flow graph for it.  */
1603		      gcc_assert (!cfi
1604				  || (cfun->can_throw_non_call_exceptions
1605				      && can_throw_internal (insn)));
1606
1607		      insn = resolve_simple_move (set, insn);
1608		      if (insn != orig_insn)
1609			{
1610			  recog_memoized (insn);
1611			  extract_insn (insn);
1612
1613			  if (cfi)
1614			    bitmap_set_bit (sub_blocks, bb->index);
1615			}
1616		    }
1617		  else
1618		    {
1619		      rtx_insn *decomposed_shift;
1620
1621		      decomposed_shift = resolve_shift_zext (insn);
1622		      if (decomposed_shift != NULL_RTX)
1623			{
1624			  insn = decomposed_shift;
1625			  recog_memoized (insn);
1626			  extract_insn (insn);
1627			}
1628		    }
1629
1630		  for (i = recog_data.n_operands - 1; i >= 0; --i)
1631		    resolve_subreg_use (recog_data.operand_loc[i], insn);
1632
1633		  resolve_reg_notes (insn);
1634
1635		  if (num_validated_changes () > 0)
1636		    {
1637		      for (i = recog_data.n_dups - 1; i >= 0; --i)
1638			{
1639			  rtx *pl = recog_data.dup_loc[i];
1640			  int dup_num = recog_data.dup_num[i];
1641			  rtx *px = recog_data.operand_loc[dup_num];
1642
1643			  validate_unshare_change (insn, pl, *px, 1);
1644			}
1645
1646		      i = apply_change_group ();
1647		      gcc_assert (i);
1648		    }
1649		}
1650	    }
1651	}
1652
1653      /* If we had insns to split that caused control flow insns in the middle
1654	 of a basic block, split those blocks now.  Note that we only handle
1655	 the case where splitting a load has caused multiple possibly trapping
1656	 loads to appear.  */
1657      EXECUTE_IF_SET_IN_BITMAP (sub_blocks, 0, i, sbi)
1658	{
1659	  rtx_insn *insn, *end;
1660	  edge fallthru;
1661
1662	  bb = BASIC_BLOCK_FOR_FN (cfun, i);
1663	  insn = BB_HEAD (bb);
1664	  end = BB_END (bb);
1665
1666	  while (insn != end)
1667	    {
1668	      if (control_flow_insn_p (insn))
1669		{
1670		  /* Split the block after insn.  There will be a fallthru
1671		     edge, which is OK so we keep it.  We have to create the
1672		     exception edges ourselves.  */
1673		  fallthru = split_block (bb, insn);
1674		  rtl_make_eh_edge (NULL, bb, BB_END (bb));
1675		  bb = fallthru->dest;
1676		  insn = BB_HEAD (bb);
1677		}
1678	      else
1679	        insn = NEXT_INSN (insn);
1680	    }
1681	}
1682
1683      sbitmap_free (sub_blocks);
1684    }
1685
1686  {
1687    unsigned int i;
1688    bitmap b;
1689
1690    FOR_EACH_VEC_ELT (reg_copy_graph, i, b)
1691      if (b)
1692	BITMAP_FREE (b);
1693  }
1694
1695  reg_copy_graph.release ();
1696
1697  BITMAP_FREE (decomposable_context);
1698  BITMAP_FREE (non_decomposable_context);
1699  BITMAP_FREE (subreg_context);
1700}
1701
1702/* Implement first lower subreg pass.  */
1703
1704namespace {
1705
1706const pass_data pass_data_lower_subreg =
1707{
1708  RTL_PASS, /* type */
1709  "subreg1", /* name */
1710  OPTGROUP_NONE, /* optinfo_flags */
1711  TV_LOWER_SUBREG, /* tv_id */
1712  0, /* properties_required */
1713  0, /* properties_provided */
1714  0, /* properties_destroyed */
1715  0, /* todo_flags_start */
1716  0, /* todo_flags_finish */
1717};
1718
1719class pass_lower_subreg : public rtl_opt_pass
1720{
1721public:
1722  pass_lower_subreg (gcc::context *ctxt)
1723    : rtl_opt_pass (pass_data_lower_subreg, ctxt)
1724  {}
1725
1726  /* opt_pass methods: */
1727  virtual bool gate (function *) { return flag_split_wide_types != 0; }
1728  virtual unsigned int execute (function *)
1729    {
1730      decompose_multiword_subregs (false);
1731      return 0;
1732    }
1733
1734}; // class pass_lower_subreg
1735
1736} // anon namespace
1737
1738rtl_opt_pass *
1739make_pass_lower_subreg (gcc::context *ctxt)
1740{
1741  return new pass_lower_subreg (ctxt);
1742}
1743
1744/* Implement second lower subreg pass.  */
1745
1746namespace {
1747
1748const pass_data pass_data_lower_subreg2 =
1749{
1750  RTL_PASS, /* type */
1751  "subreg2", /* name */
1752  OPTGROUP_NONE, /* optinfo_flags */
1753  TV_LOWER_SUBREG, /* tv_id */
1754  0, /* properties_required */
1755  0, /* properties_provided */
1756  0, /* properties_destroyed */
1757  0, /* todo_flags_start */
1758  TODO_df_finish, /* todo_flags_finish */
1759};
1760
1761class pass_lower_subreg2 : public rtl_opt_pass
1762{
1763public:
1764  pass_lower_subreg2 (gcc::context *ctxt)
1765    : rtl_opt_pass (pass_data_lower_subreg2, ctxt)
1766  {}
1767
1768  /* opt_pass methods: */
1769  virtual bool gate (function *) { return flag_split_wide_types != 0; }
1770  virtual unsigned int execute (function *)
1771    {
1772      decompose_multiword_subregs (true);
1773      return 0;
1774    }
1775
1776}; // class pass_lower_subreg2
1777
1778} // anon namespace
1779
1780rtl_opt_pass *
1781make_pass_lower_subreg2 (gcc::context *ctxt)
1782{
1783  return new pass_lower_subreg2 (ctxt);
1784}
1785