1/* Redundant Extension Elimination pass for the GNU compiler.
2   Copyright (C) 2010-2015 Free Software Foundation, Inc.
3   Contributed by Ilya Enkovich (ilya.enkovich@intel.com)
4
5   Based on the Redundant Zero-extension elimination pass contributed by
6   Sriraman Tallam (tmsriram@google.com) and Silvius Rus (rus@google.com).
7
8This file is part of GCC.
9
10GCC is free software; you can redistribute it and/or modify it under
11the terms of the GNU General Public License as published by the Free
12Software Foundation; either version 3, or (at your option) any later
13version.
14
15GCC is distributed in the hope that it will be useful, but WITHOUT ANY
16WARRANTY; without even the implied warranty of MERCHANTABILITY or
17FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
18for more details.
19
20You should have received a copy of the GNU General Public License
21along with GCC; see the file COPYING3.  If not see
22<http://www.gnu.org/licenses/>.  */
23
24
25/* Problem Description :
26   --------------------
27   This pass is intended to remove redundant extension instructions.
28   Such instructions appear for different reasons.  We expect some of
29   them due to implicit zero-extension in 64-bit registers after writing
30   to their lower 32-bit half (e.g. for the x86-64 architecture).
31   Another possible reason is a type cast which follows a load (for
32   instance a register restore) and which can be combined into a single
33   instruction, and for which earlier local passes, e.g. the combiner,
34   weren't able to optimize.
35
36   How does this pass work  ?
37   --------------------------
38
39   This pass is run after register allocation.  Hence, all registers that
40   this pass deals with are hard registers.  This pass first looks for an
41   extension instruction that could possibly be redundant.  Such extension
42   instructions show up in RTL with the pattern  :
43   (set (reg:<SWI248> x) (any_extend:<SWI248> (reg:<SWI124> x))),
44   where x can be any hard register.
45   Now, this pass tries to eliminate this instruction by merging the
46   extension with the definitions of register x.  For instance, if
47   one of the definitions of register x was  :
48   (set (reg:SI x) (plus:SI (reg:SI z1) (reg:SI z2))),
49   followed by extension  :
50   (set (reg:DI x) (zero_extend:DI (reg:SI x)))
51   then the combination converts this into :
52   (set (reg:DI x) (zero_extend:DI (plus:SI (reg:SI z1) (reg:SI z2)))).
53   If all the merged definitions are recognizable assembly instructions,
54   the extension is effectively eliminated.
55
56   For example, for the x86-64 architecture, implicit zero-extensions
57   are captured with appropriate patterns in the i386.md file.  Hence,
58   these merged definition can be matched to a single assembly instruction.
59   The original extension instruction is then deleted if all the
60   definitions can be merged.
61
62   However, there are cases where the definition instruction cannot be
63   merged with an extension.  Examples are CALL instructions.  In such
64   cases, the original extension is not redundant and this pass does
65   not delete it.
66
67   Handling conditional moves :
68   ----------------------------
69
70   Architectures like x86-64 support conditional moves whose semantics for
71   extension differ from the other instructions.  For instance, the
72   instruction *cmov ebx, eax*
73   zero-extends eax onto rax only when the move from ebx to eax happens.
74   Otherwise, eax may not be zero-extended.  Consider conditional moves as
75   RTL instructions of the form
76   (set (reg:SI x) (if_then_else (cond) (reg:SI y) (reg:SI z))).
77   This pass tries to merge an extension with a conditional move by
78   actually merging the definitions of y and z with an extension and then
79   converting the conditional move into :
80   (set (reg:DI x) (if_then_else (cond) (reg:DI y) (reg:DI z))).
81   Since registers y and z are extended, register x will also be extended
82   after the conditional move.  Note that this step has to be done
83   transitively since the definition of a conditional copy can be
84   another conditional copy.
85
86   Motivating Example I :
87   ---------------------
88   For this program :
89   **********************************************
90   bad_code.c
91
92   int mask[1000];
93
94   int foo(unsigned x)
95   {
96     if (x < 10)
97       x = x * 45;
98     else
99       x = x * 78;
100     return mask[x];
101   }
102   **********************************************
103
104   $ gcc -O2 bad_code.c
105     ........
106     400315:       b8 4e 00 00 00          mov    $0x4e,%eax
107     40031a:       0f af f8                imul   %eax,%edi
108     40031d:       89 ff                   mov    %edi,%edi - useless extension
109     40031f:       8b 04 bd 60 19 40 00    mov    0x401960(,%rdi,4),%eax
110     400326:       c3                      retq
111     ......
112     400330:       ba 2d 00 00 00          mov    $0x2d,%edx
113     400335:       0f af fa                imul   %edx,%edi
114     400338:       89 ff                   mov    %edi,%edi - useless extension
115     40033a:       8b 04 bd 60 19 40 00    mov    0x401960(,%rdi,4),%eax
116     400341:       c3                      retq
117
118   $ gcc -O2 -free bad_code.c
119     ......
120     400315:       6b ff 4e                imul   $0x4e,%edi,%edi
121     400318:       8b 04 bd 40 19 40 00    mov    0x401940(,%rdi,4),%eax
122     40031f:       c3                      retq
123     400320:       6b ff 2d                imul   $0x2d,%edi,%edi
124     400323:       8b 04 bd 40 19 40 00    mov    0x401940(,%rdi,4),%eax
125     40032a:       c3                      retq
126
127   Motivating Example II :
128   ---------------------
129
130   Here is an example with a conditional move.
131
132   For this program :
133   **********************************************
134
135   unsigned long long foo(unsigned x , unsigned y)
136   {
137     unsigned z;
138     if (x > 100)
139       z = x + y;
140     else
141       z = x - y;
142     return (unsigned long long)(z);
143   }
144
145   $ gcc -O2 bad_code.c
146     ............
147     400360:       8d 14 3e                lea    (%rsi,%rdi,1),%edx
148     400363:       89 f8                   mov    %edi,%eax
149     400365:       29 f0                   sub    %esi,%eax
150     400367:       83 ff 65                cmp    $0x65,%edi
151     40036a:       0f 43 c2                cmovae %edx,%eax
152     40036d:       89 c0                   mov    %eax,%eax - useless extension
153     40036f:       c3                      retq
154
155   $ gcc -O2 -free bad_code.c
156     .............
157     400360:       89 fa                   mov    %edi,%edx
158     400362:       8d 04 3e                lea    (%rsi,%rdi,1),%eax
159     400365:       29 f2                   sub    %esi,%edx
160     400367:       83 ff 65                cmp    $0x65,%edi
161     40036a:       89 d6                   mov    %edx,%esi
162     40036c:       48 0f 42 c6             cmovb  %rsi,%rax
163     400370:       c3                      retq
164
165  Motivating Example III :
166  ---------------------
167
168  Here is an example with a type cast.
169
170  For this program :
171  **********************************************
172
173  void test(int size, unsigned char *in, unsigned char *out)
174  {
175    int i;
176    unsigned char xr, xg, xy=0;
177
178    for (i = 0; i < size; i++) {
179      xr = *in++;
180      xg = *in++;
181      xy = (unsigned char) ((19595*xr + 38470*xg) >> 16);
182      *out++ = xy;
183    }
184  }
185
186  $ gcc -O2 bad_code.c
187    ............
188    10:   0f b6 0e                movzbl (%rsi),%ecx
189    13:   0f b6 46 01             movzbl 0x1(%rsi),%eax
190    17:   48 83 c6 02             add    $0x2,%rsi
191    1b:   0f b6 c9                movzbl %cl,%ecx - useless extension
192    1e:   0f b6 c0                movzbl %al,%eax - useless extension
193    21:   69 c9 8b 4c 00 00       imul   $0x4c8b,%ecx,%ecx
194    27:   69 c0 46 96 00 00       imul   $0x9646,%eax,%eax
195
196   $ gcc -O2 -free bad_code.c
197     .............
198    10:   0f b6 0e                movzbl (%rsi),%ecx
199    13:   0f b6 46 01             movzbl 0x1(%rsi),%eax
200    17:   48 83 c6 02             add    $0x2,%rsi
201    1b:   69 c9 8b 4c 00 00       imul   $0x4c8b,%ecx,%ecx
202    21:   69 c0 46 96 00 00       imul   $0x9646,%eax,%eax
203
204   Usefulness :
205   ----------
206
207   The original redundant zero-extension elimination pass reported reduction
208   of the dynamic instruction count of a compression benchmark by 2.8% and
209   improvement of its run time by about 1%.
210
211   The additional performance gain with the enhanced pass is mostly expected
212   on in-order architectures where redundancy cannot be compensated by out of
213   order execution.  Measurements showed up to 10% performance gain (reduced
214   run time) on EEMBC 2.0 benchmarks on Atom processor with geomean performance
215   gain 1%.  */
216
217
218#include "config.h"
219#include "system.h"
220#include "coretypes.h"
221#include "tm.h"
222#include "rtl.h"
223#include "hash-set.h"
224#include "machmode.h"
225#include "vec.h"
226#include "double-int.h"
227#include "input.h"
228#include "alias.h"
229#include "symtab.h"
230#include "wide-int.h"
231#include "inchash.h"
232#include "tree.h"
233#include "tm_p.h"
234#include "flags.h"
235#include "regs.h"
236#include "hard-reg-set.h"
237#include "predict.h"
238#include "function.h"
239#include "dominance.h"
240#include "cfg.h"
241#include "cfgrtl.h"
242#include "basic-block.h"
243#include "insn-config.h"
244#include "hashtab.h"
245#include "statistics.h"
246#include "real.h"
247#include "fixed-value.h"
248#include "expmed.h"
249#include "dojump.h"
250#include "explow.h"
251#include "calls.h"
252#include "emit-rtl.h"
253#include "varasm.h"
254#include "stmt.h"
255#include "expr.h"
256#include "insn-attr.h"
257#include "recog.h"
258#include "diagnostic-core.h"
259#include "target.h"
260#include "insn-codes.h"
261#include "optabs.h"
262#include "rtlhooks-def.h"
263#include "params.h"
264#include "tree-pass.h"
265#include "df.h"
266#include "hash-map.h"
267#include "is-a.h"
268#include "plugin-api.h"
269#include "ipa-ref.h"
270#include "cgraph.h"
271
272/* This structure represents a candidate for elimination.  */
273
274typedef struct ext_cand
275{
276  /* The expression.  */
277  const_rtx expr;
278
279  /* The kind of extension.  */
280  enum rtx_code code;
281
282  /* The destination mode.  */
283  machine_mode mode;
284
285  /* The instruction where it lives.  */
286  rtx_insn *insn;
287} ext_cand;
288
289
290static int max_insn_uid;
291
292/* Update or remove REG_EQUAL or REG_EQUIV notes for INSN.  */
293
294static bool
295update_reg_equal_equiv_notes (rtx_insn *insn, machine_mode new_mode,
296			      machine_mode old_mode, enum rtx_code code)
297{
298  rtx *loc = &REG_NOTES (insn);
299  while (*loc)
300    {
301      enum reg_note kind = REG_NOTE_KIND (*loc);
302      if (kind == REG_EQUAL || kind == REG_EQUIV)
303	{
304	  rtx orig_src = XEXP (*loc, 0);
305	  /* Update equivalency constants.  Recall that RTL constants are
306	     sign-extended.  */
307	  if (GET_CODE (orig_src) == CONST_INT
308	      && HOST_BITS_PER_WIDE_INT >= GET_MODE_BITSIZE (new_mode))
309	    {
310	      if (INTVAL (orig_src) >= 0 || code == SIGN_EXTEND)
311		/* Nothing needed.  */;
312	      else
313		{
314		  /* Zero-extend the negative constant by masking out the
315		     bits outside the source mode.  */
316		  rtx new_const_int
317		    = gen_int_mode (INTVAL (orig_src)
318				    & GET_MODE_MASK (old_mode),
319				    new_mode);
320		  if (!validate_change (insn, &XEXP (*loc, 0),
321					new_const_int, true))
322		    return false;
323		}
324	      loc = &XEXP (*loc, 1);
325	    }
326	  /* Drop all other notes, they assume a wrong mode.  */
327	  else if (!validate_change (insn, loc, XEXP (*loc, 1), true))
328	    return false;
329	}
330      else
331	loc = &XEXP (*loc, 1);
332    }
333  return true;
334}
335
336/* Given a insn (CURR_INSN), an extension candidate for removal (CAND)
337   and a pointer to the SET rtx (ORIG_SET) that needs to be modified,
338   this code modifies the SET rtx to a new SET rtx that extends the
339   right hand expression into a register on the left hand side.  Note
340   that multiple assumptions are made about the nature of the set that
341   needs to be true for this to work and is called from merge_def_and_ext.
342
343   Original :
344   (set (reg a) (expression))
345
346   Transform :
347   (set (reg a) (any_extend (expression)))
348
349   Special Cases :
350   If the expression is a constant or another extension, then directly
351   assign it to the register.  */
352
353static bool
354combine_set_extension (ext_cand *cand, rtx_insn *curr_insn, rtx *orig_set)
355{
356  rtx orig_src = SET_SRC (*orig_set);
357  machine_mode orig_mode = GET_MODE (SET_DEST (*orig_set));
358  rtx new_set;
359  rtx cand_pat = PATTERN (cand->insn);
360
361  /* If the extension's source/destination registers are not the same
362     then we need to change the original load to reference the destination
363     of the extension.  Then we need to emit a copy from that destination
364     to the original destination of the load.  */
365  rtx new_reg;
366  bool copy_needed
367    = (REGNO (SET_DEST (cand_pat)) != REGNO (XEXP (SET_SRC (cand_pat), 0)));
368  if (copy_needed)
369    new_reg = gen_rtx_REG (cand->mode, REGNO (SET_DEST (cand_pat)));
370  else
371    new_reg = gen_rtx_REG (cand->mode, REGNO (SET_DEST (*orig_set)));
372
373#if 0
374  /* Rethinking test.  Temporarily disabled.  */
375  /* We're going to be widening the result of DEF_INSN, ensure that doing so
376     doesn't change the number of hard registers needed for the result.  */
377  if (HARD_REGNO_NREGS (REGNO (new_reg), cand->mode)
378      != HARD_REGNO_NREGS (REGNO (SET_DEST (*orig_set)),
379			   GET_MODE (SET_DEST (*orig_set))))
380	return false;
381#endif
382
383  /* Merge constants by directly moving the constant into the register under
384     some conditions.  Recall that RTL constants are sign-extended.  */
385  if (GET_CODE (orig_src) == CONST_INT
386      && HOST_BITS_PER_WIDE_INT >= GET_MODE_BITSIZE (cand->mode))
387    {
388      if (INTVAL (orig_src) >= 0 || cand->code == SIGN_EXTEND)
389	new_set = gen_rtx_SET (VOIDmode, new_reg, orig_src);
390      else
391	{
392	  /* Zero-extend the negative constant by masking out the bits outside
393	     the source mode.  */
394	  rtx new_const_int
395	    = gen_int_mode (INTVAL (orig_src) & GET_MODE_MASK (orig_mode),
396			    GET_MODE (new_reg));
397	  new_set = gen_rtx_SET (VOIDmode, new_reg, new_const_int);
398	}
399    }
400  else if (GET_MODE (orig_src) == VOIDmode)
401    {
402      /* This is mostly due to a call insn that should not be optimized.  */
403      return false;
404    }
405  else if (GET_CODE (orig_src) == cand->code)
406    {
407      /* Here is a sequence of two extensions.  Try to merge them.  */
408      rtx temp_extension
409	= gen_rtx_fmt_e (cand->code, cand->mode, XEXP (orig_src, 0));
410      rtx simplified_temp_extension = simplify_rtx (temp_extension);
411      if (simplified_temp_extension)
412        temp_extension = simplified_temp_extension;
413      new_set = gen_rtx_SET (VOIDmode, new_reg, temp_extension);
414    }
415  else if (GET_CODE (orig_src) == IF_THEN_ELSE)
416    {
417      /* Only IF_THEN_ELSE of phi-type copies are combined.  Otherwise,
418         in general, IF_THEN_ELSE should not be combined.  */
419      return false;
420    }
421  else
422    {
423      /* This is the normal case.  */
424      rtx temp_extension
425	= gen_rtx_fmt_e (cand->code, cand->mode, orig_src);
426      rtx simplified_temp_extension = simplify_rtx (temp_extension);
427      if (simplified_temp_extension)
428        temp_extension = simplified_temp_extension;
429      new_set = gen_rtx_SET (VOIDmode, new_reg, temp_extension);
430    }
431
432  /* This change is a part of a group of changes.  Hence,
433     validate_change will not try to commit the change.  */
434  if (validate_change (curr_insn, orig_set, new_set, true)
435      && update_reg_equal_equiv_notes (curr_insn, cand->mode, orig_mode,
436				       cand->code))
437    {
438      if (dump_file)
439        {
440          fprintf (dump_file,
441		   "Tentatively merged extension with definition %s:\n",
442		   (copy_needed) ? "(copy needed)" : "");
443          print_rtl_single (dump_file, curr_insn);
444        }
445      return true;
446    }
447
448  return false;
449}
450
451/* Treat if_then_else insns, where the operands of both branches
452   are registers, as copies.  For instance,
453   Original :
454   (set (reg:SI a) (if_then_else (cond) (reg:SI b) (reg:SI c)))
455   Transformed :
456   (set (reg:DI a) (if_then_else (cond) (reg:DI b) (reg:DI c)))
457   DEF_INSN is the if_then_else insn.  */
458
459static bool
460transform_ifelse (ext_cand *cand, rtx_insn *def_insn)
461{
462  rtx set_insn = PATTERN (def_insn);
463  rtx srcreg, dstreg, srcreg2;
464  rtx map_srcreg, map_dstreg, map_srcreg2;
465  rtx ifexpr;
466  rtx cond;
467  rtx new_set;
468
469  gcc_assert (GET_CODE (set_insn) == SET);
470
471  cond = XEXP (SET_SRC (set_insn), 0);
472  dstreg = SET_DEST (set_insn);
473  srcreg = XEXP (SET_SRC (set_insn), 1);
474  srcreg2 = XEXP (SET_SRC (set_insn), 2);
475  /* If the conditional move already has the right or wider mode,
476     there is nothing to do.  */
477  if (GET_MODE_SIZE (GET_MODE (dstreg)) >= GET_MODE_SIZE (cand->mode))
478    return true;
479
480  map_srcreg = gen_rtx_REG (cand->mode, REGNO (srcreg));
481  map_srcreg2 = gen_rtx_REG (cand->mode, REGNO (srcreg2));
482  map_dstreg = gen_rtx_REG (cand->mode, REGNO (dstreg));
483  ifexpr = gen_rtx_IF_THEN_ELSE (cand->mode, cond, map_srcreg, map_srcreg2);
484  new_set = gen_rtx_SET (VOIDmode, map_dstreg, ifexpr);
485
486  if (validate_change (def_insn, &PATTERN (def_insn), new_set, true)
487      && update_reg_equal_equiv_notes (def_insn, cand->mode, GET_MODE (dstreg),
488				       cand->code))
489    {
490      if (dump_file)
491        {
492          fprintf (dump_file,
493		   "Mode of conditional move instruction extended:\n");
494          print_rtl_single (dump_file, def_insn);
495        }
496      return true;
497    }
498
499  return false;
500}
501
502/* Get all the reaching definitions of an instruction.  The definitions are
503   desired for REG used in INSN.  Return the definition list or NULL if a
504   definition is missing.  If DEST is non-NULL, additionally push the INSN
505   of the definitions onto DEST.  */
506
507static struct df_link *
508get_defs (rtx_insn *insn, rtx reg, vec<rtx_insn *> *dest)
509{
510  df_ref use;
511  struct df_link *ref_chain, *ref_link;
512
513  FOR_EACH_INSN_USE (use, insn)
514    {
515      if (GET_CODE (DF_REF_REG (use)) == SUBREG)
516        return NULL;
517      if (REGNO (DF_REF_REG (use)) == REGNO (reg))
518	break;
519    }
520
521  gcc_assert (use != NULL);
522
523  ref_chain = DF_REF_CHAIN (use);
524
525  for (ref_link = ref_chain; ref_link; ref_link = ref_link->next)
526    {
527      /* Problem getting some definition for this instruction.  */
528      if (ref_link->ref == NULL)
529        return NULL;
530      if (DF_REF_INSN_INFO (ref_link->ref) == NULL)
531        return NULL;
532    }
533
534  if (dest)
535    for (ref_link = ref_chain; ref_link; ref_link = ref_link->next)
536      dest->safe_push (DF_REF_INSN (ref_link->ref));
537
538  return ref_chain;
539}
540
541/* Return true if INSN is
542     (SET (reg REGNO (def_reg)) (if_then_else (cond) (REG x1) (REG x2)))
543   and store x1 and x2 in REG_1 and REG_2.  */
544
545static bool
546is_cond_copy_insn (rtx_insn *insn, rtx *reg1, rtx *reg2)
547{
548  rtx expr = single_set (insn);
549
550  if (expr != NULL_RTX
551      && GET_CODE (expr) == SET
552      && GET_CODE (SET_DEST (expr)) == REG
553      && GET_CODE (SET_SRC (expr))  == IF_THEN_ELSE
554      && GET_CODE (XEXP (SET_SRC (expr), 1)) == REG
555      && GET_CODE (XEXP (SET_SRC (expr), 2)) == REG)
556    {
557      *reg1 = XEXP (SET_SRC (expr), 1);
558      *reg2 = XEXP (SET_SRC (expr), 2);
559      return true;
560    }
561
562  return false;
563}
564
565enum ext_modified_kind
566{
567  /* The insn hasn't been modified by ree pass yet.  */
568  EXT_MODIFIED_NONE,
569  /* Changed into zero extension.  */
570  EXT_MODIFIED_ZEXT,
571  /* Changed into sign extension.  */
572  EXT_MODIFIED_SEXT
573};
574
575struct ATTRIBUTE_PACKED ext_modified
576{
577  /* Mode from which ree has zero or sign extended the destination.  */
578  ENUM_BITFIELD(machine_mode) mode : 8;
579
580  /* Kind of modification of the insn.  */
581  ENUM_BITFIELD(ext_modified_kind) kind : 2;
582
583  unsigned int do_not_reextend : 1;
584
585  /* True if the insn is scheduled to be deleted.  */
586  unsigned int deleted : 1;
587};
588
589/* Vectors used by combine_reaching_defs and its helpers.  */
590typedef struct ext_state
591{
592  /* In order to avoid constant alloc/free, we keep these
593     4 vectors live through the entire find_and_remove_re and just
594     truncate them each time.  */
595  vec<rtx_insn *> defs_list;
596  vec<rtx_insn *> copies_list;
597  vec<rtx_insn *> modified_list;
598  vec<rtx_insn *> work_list;
599
600  /* For instructions that have been successfully modified, this is
601     the original mode from which the insn is extending and
602     kind of extension.  */
603  struct ext_modified *modified;
604} ext_state;
605
606/* Reaching Definitions of the extended register could be conditional copies
607   or regular definitions.  This function separates the two types into two
608   lists, STATE->DEFS_LIST and STATE->COPIES_LIST.  This is necessary because,
609   if a reaching definition is a conditional copy, merging the extension with
610   this definition is wrong.  Conditional copies are merged by transitively
611   merging their definitions.  The defs_list is populated with all the reaching
612   definitions of the extension instruction (EXTEND_INSN) which must be merged
613   with an extension.  The copies_list contains all the conditional moves that
614   will later be extended into a wider mode conditional move if all the merges
615   are successful.  The function returns false upon failure, true upon
616   success.  */
617
618static bool
619make_defs_and_copies_lists (rtx_insn *extend_insn, const_rtx set_pat,
620			    ext_state *state)
621{
622  rtx src_reg = XEXP (SET_SRC (set_pat), 0);
623  bool *is_insn_visited;
624  bool ret = true;
625
626  state->work_list.truncate (0);
627
628  /* Initialize the work list.  */
629  if (!get_defs (extend_insn, src_reg, &state->work_list))
630    gcc_unreachable ();
631
632  is_insn_visited = XCNEWVEC (bool, max_insn_uid);
633
634  /* Perform transitive closure for conditional copies.  */
635  while (!state->work_list.is_empty ())
636    {
637      rtx_insn *def_insn = state->work_list.pop ();
638      rtx reg1, reg2;
639
640      gcc_assert (INSN_UID (def_insn) < max_insn_uid);
641
642      if (is_insn_visited[INSN_UID (def_insn)])
643	continue;
644      is_insn_visited[INSN_UID (def_insn)] = true;
645
646      if (is_cond_copy_insn (def_insn, &reg1, &reg2))
647	{
648	  /* Push it onto the copy list first.  */
649	  state->copies_list.safe_push (def_insn);
650
651	  /* Now perform the transitive closure.  */
652	  if (!get_defs (def_insn, reg1, &state->work_list)
653	      || !get_defs (def_insn, reg2, &state->work_list))
654	    {
655	      ret = false;
656	      break;
657	    }
658        }
659      else
660	state->defs_list.safe_push (def_insn);
661    }
662
663  XDELETEVEC (is_insn_visited);
664
665  return ret;
666}
667
668/* If DEF_INSN has single SET expression, possibly buried inside
669   a PARALLEL, return the address of the SET expression, else
670   return NULL.  This is similar to single_set, except that
671   single_set allows multiple SETs when all but one is dead.  */
672static rtx *
673get_sub_rtx (rtx_insn *def_insn)
674{
675  enum rtx_code code = GET_CODE (PATTERN (def_insn));
676  rtx *sub_rtx = NULL;
677
678  if (code == PARALLEL)
679    {
680      for (int i = 0; i < XVECLEN (PATTERN (def_insn), 0); i++)
681        {
682          rtx s_expr = XVECEXP (PATTERN (def_insn), 0, i);
683          if (GET_CODE (s_expr) != SET)
684            continue;
685
686          if (sub_rtx == NULL)
687            sub_rtx = &XVECEXP (PATTERN (def_insn), 0, i);
688          else
689            {
690              /* PARALLEL with multiple SETs.  */
691              return NULL;
692            }
693        }
694    }
695  else if (code == SET)
696    sub_rtx = &PATTERN (def_insn);
697  else
698    {
699      /* It is not a PARALLEL or a SET, what could it be ? */
700      return NULL;
701    }
702
703  gcc_assert (sub_rtx != NULL);
704  return sub_rtx;
705}
706
707/* Merge the DEF_INSN with an extension.  Calls combine_set_extension
708   on the SET pattern.  */
709
710static bool
711merge_def_and_ext (ext_cand *cand, rtx_insn *def_insn, ext_state *state)
712{
713  machine_mode ext_src_mode;
714  rtx *sub_rtx;
715
716  ext_src_mode = GET_MODE (XEXP (SET_SRC (cand->expr), 0));
717  sub_rtx = get_sub_rtx (def_insn);
718
719  if (sub_rtx == NULL)
720    return false;
721
722  if (REG_P (SET_DEST (*sub_rtx))
723      && (GET_MODE (SET_DEST (*sub_rtx)) == ext_src_mode
724	  || ((state->modified[INSN_UID (def_insn)].kind
725	       == (cand->code == ZERO_EXTEND
726		   ? EXT_MODIFIED_ZEXT : EXT_MODIFIED_SEXT))
727	      && state->modified[INSN_UID (def_insn)].mode
728		 == ext_src_mode)))
729    {
730      if (GET_MODE_SIZE (GET_MODE (SET_DEST (*sub_rtx)))
731	  >= GET_MODE_SIZE (cand->mode))
732	return true;
733      /* If def_insn is already scheduled to be deleted, don't attempt
734	 to modify it.  */
735      if (state->modified[INSN_UID (def_insn)].deleted)
736	return false;
737      if (combine_set_extension (cand, def_insn, sub_rtx))
738	{
739	  if (state->modified[INSN_UID (def_insn)].kind == EXT_MODIFIED_NONE)
740	    state->modified[INSN_UID (def_insn)].mode = ext_src_mode;
741	  return true;
742	}
743    }
744
745  return false;
746}
747
748/* Given SRC, which should be one or more extensions of a REG, strip
749   away the extensions and return the REG.  */
750
751static inline rtx
752get_extended_src_reg (rtx src)
753{
754  while (GET_CODE (src) == SIGN_EXTEND || GET_CODE (src) == ZERO_EXTEND)
755    src = XEXP (src, 0);
756  gcc_assert (REG_P (src));
757  return src;
758}
759
760/* This function goes through all reaching defs of the source
761   of the candidate for elimination (CAND) and tries to combine
762   the extension with the definition instruction.  The changes
763   are made as a group so that even if one definition cannot be
764   merged, all reaching definitions end up not being merged.
765   When a conditional copy is encountered, merging is attempted
766   transitively on its definitions.  It returns true upon success
767   and false upon failure.  */
768
769static bool
770combine_reaching_defs (ext_cand *cand, const_rtx set_pat, ext_state *state)
771{
772  rtx_insn *def_insn;
773  bool merge_successful = true;
774  int i;
775  int defs_ix;
776  bool outcome;
777
778  state->defs_list.truncate (0);
779  state->copies_list.truncate (0);
780
781  outcome = make_defs_and_copies_lists (cand->insn, set_pat, state);
782
783  if (!outcome)
784    return false;
785
786  /* If the destination operand of the extension is a different
787     register than the source operand, then additional restrictions
788     are needed.  Note we have to handle cases where we have nested
789     extensions in the source operand.  */
790  bool copy_needed
791    = (REGNO (SET_DEST (PATTERN (cand->insn)))
792       != REGNO (get_extended_src_reg (SET_SRC (PATTERN (cand->insn)))));
793  if (copy_needed)
794    {
795      /* Considering transformation of
796	 (set (reg1) (expression))
797	 ...
798	 (set (reg2) (any_extend (reg1)))
799
800	 into
801
802	 (set (reg2) (any_extend (expression)))
803	 (set (reg1) (reg2))
804	 ...  */
805
806      /* In theory we could handle more than one reaching def, it
807	 just makes the code to update the insn stream more complex.  */
808      if (state->defs_list.length () != 1)
809	return false;
810
811      /* We don't have the structure described above if there are
812	 conditional moves in between the def and the candidate,
813	 and we will not handle them correctly.  See PR68194.  */
814      if (state->copies_list.length () > 0)
815	return false;
816
817      /* We require the candidate not already be modified.  It may,
818	 for example have been changed from a (sign_extend (reg))
819	 into (zero_extend (sign_extend (reg))).
820
821	 Handling that case shouldn't be terribly difficult, but the code
822	 here and the code to emit copies would need auditing.  Until
823	 we see a need, this is the safe thing to do.  */
824      if (state->modified[INSN_UID (cand->insn)].kind != EXT_MODIFIED_NONE)
825	return false;
826
827      machine_mode dst_mode = GET_MODE (SET_DEST (PATTERN (cand->insn)));
828      rtx src_reg = get_extended_src_reg (SET_SRC (PATTERN (cand->insn)));
829
830      /* Ensure the number of hard registers of the copy match.  */
831      if (HARD_REGNO_NREGS (REGNO (src_reg), dst_mode)
832	  != HARD_REGNO_NREGS (REGNO (src_reg), GET_MODE (src_reg)))
833	return false;
834
835      /* There's only one reaching def.  */
836      rtx_insn *def_insn = state->defs_list[0];
837
838      /* The defining statement must not have been modified either.  */
839      if (state->modified[INSN_UID (def_insn)].kind != EXT_MODIFIED_NONE)
840	return false;
841
842      /* The defining statement and candidate insn must be in the same block.
843	 This is merely to keep the test for safety and updating the insn
844	 stream simple.  Also ensure that within the block the candidate
845	 follows the defining insn.  */
846      basic_block bb = BLOCK_FOR_INSN (cand->insn);
847      if (bb != BLOCK_FOR_INSN (def_insn)
848	  || DF_INSN_LUID (def_insn) > DF_INSN_LUID (cand->insn))
849	return false;
850
851      /* If there is an overlap between the destination of DEF_INSN and
852	 CAND->insn, then this transformation is not safe.  Note we have
853	 to test in the widened mode.  */
854      rtx *dest_sub_rtx = get_sub_rtx (def_insn);
855      if (dest_sub_rtx == NULL
856	  || !REG_P (SET_DEST (*dest_sub_rtx)))
857	return false;
858
859      rtx tmp_reg = gen_rtx_REG (GET_MODE (SET_DEST (PATTERN (cand->insn))),
860				 REGNO (SET_DEST (*dest_sub_rtx)));
861      if (reg_overlap_mentioned_p (tmp_reg, SET_DEST (PATTERN (cand->insn))))
862	return false;
863
864      /* The destination register of the extension insn must not be
865	 used or set between the def_insn and cand->insn exclusive.  */
866      if (reg_used_between_p (SET_DEST (PATTERN (cand->insn)),
867			      def_insn, cand->insn)
868	  || reg_set_between_p (SET_DEST (PATTERN (cand->insn)),
869				def_insn, cand->insn))
870	return false;
871
872      /* We must be able to copy between the two registers.   Generate,
873	 recognize and verify constraints of the copy.  Also fail if this
874	 generated more than one insn.
875
876         This generates garbage since we throw away the insn when we're
877	 done, only to recreate it later if this test was successful.
878
879	 Make sure to get the mode from the extension (cand->insn).  This
880	 is different than in the code to emit the copy as we have not
881	 modified the defining insn yet.  */
882      start_sequence ();
883      rtx pat = PATTERN (cand->insn);
884      rtx new_dst = gen_rtx_REG (GET_MODE (SET_DEST (pat)),
885                                 REGNO (get_extended_src_reg (SET_SRC (pat))));
886      rtx new_src = gen_rtx_REG (GET_MODE (SET_DEST (pat)),
887                                 REGNO (SET_DEST (pat)));
888      emit_move_insn (new_dst, new_src);
889
890      rtx_insn *insn = get_insns();
891      end_sequence ();
892      if (NEXT_INSN (insn))
893	return false;
894      if (recog_memoized (insn) == -1)
895	return false;
896      extract_insn (insn);
897      if (!constrain_operands (1, get_preferred_alternatives (insn, bb)))
898	return false;
899    }
900
901
902  /* If cand->insn has been already modified, update cand->mode to a wider
903     mode if possible, or punt.  */
904  if (state->modified[INSN_UID (cand->insn)].kind != EXT_MODIFIED_NONE)
905    {
906      machine_mode mode;
907      rtx set;
908
909      if (state->modified[INSN_UID (cand->insn)].kind
910	  != (cand->code == ZERO_EXTEND
911	      ? EXT_MODIFIED_ZEXT : EXT_MODIFIED_SEXT)
912	  || state->modified[INSN_UID (cand->insn)].mode != cand->mode
913	  || (set = single_set (cand->insn)) == NULL_RTX)
914	return false;
915      mode = GET_MODE (SET_DEST (set));
916      gcc_assert (GET_MODE_SIZE (mode) >= GET_MODE_SIZE (cand->mode));
917      cand->mode = mode;
918    }
919
920  merge_successful = true;
921
922  /* Go through the defs vector and try to merge all the definitions
923     in this vector.  */
924  state->modified_list.truncate (0);
925  FOR_EACH_VEC_ELT (state->defs_list, defs_ix, def_insn)
926    {
927      if (merge_def_and_ext (cand, def_insn, state))
928	state->modified_list.safe_push (def_insn);
929      else
930        {
931          merge_successful = false;
932          break;
933        }
934    }
935
936  /* Now go through the conditional copies vector and try to merge all
937     the copies in this vector.  */
938  if (merge_successful)
939    {
940      FOR_EACH_VEC_ELT (state->copies_list, i, def_insn)
941        {
942          if (transform_ifelse (cand, def_insn))
943	    state->modified_list.safe_push (def_insn);
944          else
945            {
946              merge_successful = false;
947              break;
948            }
949        }
950    }
951
952  if (merge_successful)
953    {
954      /* Commit the changes here if possible
955	 FIXME: It's an all-or-nothing scenario.  Even if only one definition
956	 cannot be merged, we entirely give up.  In the future, we should allow
957	 extensions to be partially eliminated along those paths where the
958	 definitions could be merged.  */
959      if (apply_change_group ())
960        {
961          if (dump_file)
962            fprintf (dump_file, "All merges were successful.\n");
963
964	  FOR_EACH_VEC_ELT (state->modified_list, i, def_insn)
965	    {
966	      ext_modified *modified = &state->modified[INSN_UID (def_insn)];
967	      if (modified->kind == EXT_MODIFIED_NONE)
968		modified->kind = (cand->code == ZERO_EXTEND ? EXT_MODIFIED_ZEXT
969						            : EXT_MODIFIED_SEXT);
970
971	      if (copy_needed)
972		modified->do_not_reextend = 1;
973	    }
974          return true;
975        }
976      else
977        {
978          /* Changes need not be cancelled explicitly as apply_change_group
979             does it.  Print list of definitions in the dump_file for debug
980             purposes.  This extension cannot be deleted.  */
981          if (dump_file)
982            {
983	      fprintf (dump_file,
984		       "Merge cancelled, non-mergeable definitions:\n");
985	      FOR_EACH_VEC_ELT (state->modified_list, i, def_insn)
986	        print_rtl_single (dump_file, def_insn);
987            }
988        }
989    }
990  else
991    {
992      /* Cancel any changes that have been made so far.  */
993      cancel_changes (0);
994    }
995
996  return false;
997}
998
999/* Add an extension pattern that could be eliminated.  */
1000
1001static void
1002add_removable_extension (const_rtx expr, rtx_insn *insn,
1003			 vec<ext_cand> *insn_list,
1004			 unsigned *def_map)
1005{
1006  enum rtx_code code;
1007  machine_mode mode;
1008  unsigned int idx;
1009  rtx src, dest;
1010
1011  /* We are looking for SET (REG N) (ANY_EXTEND (REG N)).  */
1012  if (GET_CODE (expr) != SET)
1013    return;
1014
1015  src = SET_SRC (expr);
1016  code = GET_CODE (src);
1017  dest = SET_DEST (expr);
1018  mode = GET_MODE (dest);
1019
1020  if (REG_P (dest)
1021      && (code == SIGN_EXTEND || code == ZERO_EXTEND)
1022      && REG_P (XEXP (src, 0)))
1023    {
1024      struct df_link *defs, *def;
1025      ext_cand *cand;
1026
1027      /* First, make sure we can get all the reaching definitions.  */
1028      defs = get_defs (insn, XEXP (src, 0), NULL);
1029      if (!defs)
1030	{
1031	  if (dump_file)
1032	    {
1033	      fprintf (dump_file, "Cannot eliminate extension:\n");
1034	      print_rtl_single (dump_file, insn);
1035	      fprintf (dump_file, " because of missing definition(s)\n");
1036	    }
1037	  return;
1038	}
1039
1040      /* Second, make sure the reaching definitions don't feed another and
1041	 different extension.  FIXME: this obviously can be improved.  */
1042      for (def = defs; def; def = def->next)
1043	if ((idx = def_map[INSN_UID (DF_REF_INSN (def->ref))])
1044	    && idx != -1U
1045	    && (cand = &(*insn_list)[idx - 1])
1046	    && cand->code != code)
1047	  {
1048	    if (dump_file)
1049	      {
1050	        fprintf (dump_file, "Cannot eliminate extension:\n");
1051		print_rtl_single (dump_file, insn);
1052	        fprintf (dump_file, " because of other extension\n");
1053	      }
1054	    return;
1055	  }
1056	/* For vector mode extensions, ensure that all uses of the
1057	   XEXP (src, 0) register are the same extension (both code
1058	   and to which mode), as unlike integral extensions lowpart
1059	   subreg of the sign/zero extended register are not equal
1060	   to the original register, so we have to change all uses or
1061	   none.  */
1062	else if (VECTOR_MODE_P (GET_MODE (XEXP (src, 0))))
1063	  {
1064	    if (idx == 0)
1065	      {
1066		struct df_link *ref_chain, *ref_link;
1067
1068		ref_chain = DF_REF_CHAIN (def->ref);
1069		for (ref_link = ref_chain; ref_link; ref_link = ref_link->next)
1070		  {
1071		    if (ref_link->ref == NULL
1072			|| DF_REF_INSN_INFO (ref_link->ref) == NULL)
1073		      {
1074			idx = -1U;
1075			break;
1076		      }
1077		    rtx_insn *use_insn = DF_REF_INSN (ref_link->ref);
1078		    const_rtx use_set;
1079		    if (use_insn == insn || DEBUG_INSN_P (use_insn))
1080		      continue;
1081		    if (!(use_set = single_set (use_insn))
1082			|| !REG_P (SET_DEST (use_set))
1083			|| GET_MODE (SET_DEST (use_set)) != GET_MODE (dest)
1084			|| GET_CODE (SET_SRC (use_set)) != code
1085			|| !rtx_equal_p (XEXP (SET_SRC (use_set), 0),
1086					 XEXP (src, 0)))
1087		      {
1088			idx = -1U;
1089			break;
1090		      }
1091		  }
1092		if (idx == -1U)
1093		  def_map[INSN_UID (DF_REF_INSN (def->ref))] = idx;
1094	      }
1095	    if (idx == -1U)
1096	      {
1097		if (dump_file)
1098		  {
1099		    fprintf (dump_file, "Cannot eliminate extension:\n");
1100		    print_rtl_single (dump_file, insn);
1101		    fprintf (dump_file,
1102			     " because some vector uses aren't extension\n");
1103		  }
1104		return;
1105	      }
1106	  }
1107
1108      /* Then add the candidate to the list and insert the reaching definitions
1109         into the definition map.  */
1110      ext_cand e = {expr, code, mode, insn};
1111      insn_list->safe_push (e);
1112      idx = insn_list->length ();
1113
1114      for (def = defs; def; def = def->next)
1115	def_map[INSN_UID (DF_REF_INSN (def->ref))] = idx;
1116    }
1117}
1118
1119/* Traverse the instruction stream looking for extensions and return the
1120   list of candidates.  */
1121
1122static vec<ext_cand>
1123find_removable_extensions (void)
1124{
1125  vec<ext_cand> insn_list = vNULL;
1126  basic_block bb;
1127  rtx_insn *insn;
1128  rtx set;
1129  unsigned *def_map = XCNEWVEC (unsigned, max_insn_uid);
1130
1131  FOR_EACH_BB_FN (bb, cfun)
1132    FOR_BB_INSNS (bb, insn)
1133      {
1134	if (!NONDEBUG_INSN_P (insn))
1135	  continue;
1136
1137	set = single_set (insn);
1138	if (set == NULL_RTX)
1139	  continue;
1140	add_removable_extension (set, insn, &insn_list, def_map);
1141      }
1142
1143  XDELETEVEC (def_map);
1144
1145  return insn_list;
1146}
1147
1148/* This is the main function that checks the insn stream for redundant
1149   extensions and tries to remove them if possible.  */
1150
1151static void
1152find_and_remove_re (void)
1153{
1154  ext_cand *curr_cand;
1155  rtx_insn *curr_insn = NULL;
1156  int num_re_opportunities = 0, num_realized = 0, i;
1157  vec<ext_cand> reinsn_list;
1158  auto_vec<rtx_insn *> reinsn_del_list;
1159  auto_vec<rtx_insn *> reinsn_copy_list;
1160  ext_state state;
1161
1162  /* Construct DU chain to get all reaching definitions of each
1163     extension instruction.  */
1164  df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
1165  df_chain_add_problem (DF_UD_CHAIN + DF_DU_CHAIN);
1166  df_analyze ();
1167  df_set_flags (DF_DEFER_INSN_RESCAN);
1168
1169  max_insn_uid = get_max_uid ();
1170  reinsn_list = find_removable_extensions ();
1171  state.defs_list.create (0);
1172  state.copies_list.create (0);
1173  state.modified_list.create (0);
1174  state.work_list.create (0);
1175  if (reinsn_list.is_empty ())
1176    state.modified = NULL;
1177  else
1178    state.modified = XCNEWVEC (struct ext_modified, max_insn_uid);
1179
1180  FOR_EACH_VEC_ELT (reinsn_list, i, curr_cand)
1181    {
1182      num_re_opportunities++;
1183
1184      /* Try to combine the extension with the definition.  */
1185      if (dump_file)
1186        {
1187          fprintf (dump_file, "Trying to eliminate extension:\n");
1188          print_rtl_single (dump_file, curr_cand->insn);
1189        }
1190
1191      if (combine_reaching_defs (curr_cand, curr_cand->expr, &state))
1192        {
1193          if (dump_file)
1194            fprintf (dump_file, "Eliminated the extension.\n");
1195          num_realized++;
1196	  /* If the RHS of the current candidate is not (extend (reg)), then
1197	     we do not allow the optimization of extensions where
1198	     the source and destination registers do not match.  Thus
1199	     checking REG_P here is correct.  */
1200	  if (REG_P (XEXP (SET_SRC (PATTERN (curr_cand->insn)), 0))
1201	      && (REGNO (SET_DEST (PATTERN (curr_cand->insn)))
1202		  != REGNO (XEXP (SET_SRC (PATTERN (curr_cand->insn)), 0))))
1203	    {
1204              reinsn_copy_list.safe_push (curr_cand->insn);
1205              reinsn_copy_list.safe_push (state.defs_list[0]);
1206	    }
1207	  reinsn_del_list.safe_push (curr_cand->insn);
1208	  state.modified[INSN_UID (curr_cand->insn)].deleted = 1;
1209        }
1210    }
1211
1212  /* The copy list contains pairs of insns which describe copies we
1213     need to insert into the INSN stream.
1214
1215     The first insn in each pair is the extension insn, from which
1216     we derive the source and destination of the copy.
1217
1218     The second insn in each pair is the memory reference where the
1219     extension will ultimately happen.  We emit the new copy
1220     immediately after this insn.
1221
1222     It may first appear that the arguments for the copy are reversed.
1223     Remember that the memory reference will be changed to refer to the
1224     destination of the extention.  So we're actually emitting a copy
1225     from the new destination to the old destination.  */
1226  for (unsigned int i = 0; i < reinsn_copy_list.length (); i += 2)
1227    {
1228      rtx_insn *curr_insn = reinsn_copy_list[i];
1229      rtx_insn *def_insn = reinsn_copy_list[i + 1];
1230
1231      /* Use the mode of the destination of the defining insn
1232	 for the mode of the copy.  This is necessary if the
1233	 defining insn was used to eliminate a second extension
1234	 that was wider than the first.  */
1235      rtx sub_rtx = *get_sub_rtx (def_insn);
1236      rtx pat = PATTERN (curr_insn);
1237      rtx new_dst = gen_rtx_REG (GET_MODE (SET_DEST (sub_rtx)),
1238				 REGNO (XEXP (SET_SRC (pat), 0)));
1239      rtx new_src = gen_rtx_REG (GET_MODE (SET_DEST (sub_rtx)),
1240				 REGNO (SET_DEST (pat)));
1241      rtx set = gen_rtx_SET (VOIDmode, new_dst, new_src);
1242      emit_insn_after (set, def_insn);
1243    }
1244
1245  /* Delete all useless extensions here in one sweep.  */
1246  FOR_EACH_VEC_ELT (reinsn_del_list, i, curr_insn)
1247    delete_insn (curr_insn);
1248
1249  reinsn_list.release ();
1250  state.defs_list.release ();
1251  state.copies_list.release ();
1252  state.modified_list.release ();
1253  state.work_list.release ();
1254  XDELETEVEC (state.modified);
1255
1256  if (dump_file && num_re_opportunities > 0)
1257    fprintf (dump_file, "Elimination opportunities = %d realized = %d\n",
1258	     num_re_opportunities, num_realized);
1259}
1260
1261/* Find and remove redundant extensions.  */
1262
1263static unsigned int
1264rest_of_handle_ree (void)
1265{
1266  timevar_push (TV_REE);
1267  find_and_remove_re ();
1268  timevar_pop (TV_REE);
1269  return 0;
1270}
1271
1272namespace {
1273
1274const pass_data pass_data_ree =
1275{
1276  RTL_PASS, /* type */
1277  "ree", /* name */
1278  OPTGROUP_NONE, /* optinfo_flags */
1279  TV_REE, /* tv_id */
1280  0, /* properties_required */
1281  0, /* properties_provided */
1282  0, /* properties_destroyed */
1283  0, /* todo_flags_start */
1284  TODO_df_finish, /* todo_flags_finish */
1285};
1286
1287class pass_ree : public rtl_opt_pass
1288{
1289public:
1290  pass_ree (gcc::context *ctxt)
1291    : rtl_opt_pass (pass_data_ree, ctxt)
1292  {}
1293
1294  /* opt_pass methods: */
1295  virtual bool gate (function *) { return (optimize > 0 && flag_ree); }
1296  virtual unsigned int execute (function *) { return rest_of_handle_ree (); }
1297
1298}; // class pass_ree
1299
1300} // anon namespace
1301
1302rtl_opt_pass *
1303make_pass_ree (gcc::context *ctxt)
1304{
1305  return new pass_ree (ctxt);
1306}
1307