1/* Output routines for GCC for Hitachi Super-H.
2   Copyright (C) 1993-1998 Free Software Foundation, Inc.
3
4This file is part of GNU CC.
5
6GNU CC is free software; you can redistribute it and/or modify
7it under the terms of the GNU General Public License as published by
8the Free Software Foundation; either version 2, or (at your option)
9any later version.
10
11GNU CC is distributed in the hope that it will be useful,
12but WITHOUT ANY WARRANTY; without even the implied warranty of
13MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14GNU General Public License for more details.
15
16You should have received a copy of the GNU General Public License
17along with GNU CC; see the file COPYING.  If not, write to
18the Free Software Foundation, 59 Temple Place - Suite 330,
19Boston, MA 02111-1307, USA.  */
20
21/* Contributed by Steve Chamberlain (sac@cygnus.com).
22   Improved by Jim Wilson (wilson@cygnus.com).  */
23
24#include "config.h"
25
26#include <stdio.h>
27
28#include "rtl.h"
29#include "tree.h"
30#include "flags.h"
31#include "insn-flags.h"
32#include "expr.h"
33#include "regs.h"
34#include "hard-reg-set.h"
35#include "output.h"
36#include "insn-attr.h"
37
38int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
39
40#define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0)
41#define LSW (TARGET_LITTLE_ENDIAN ? 0 : 1)
42
43/* ??? The pragma interrupt support will not work for SH3.  */
44/* This is set by #pragma interrupt and #pragma trapa, and causes gcc to
45   output code for the next function appropriate for an interrupt handler.  */
46int pragma_interrupt;
47
48/* This is set by the trap_exit attribute for functions.   It specifies
49   a trap number to be used in a trapa instruction at function exit
50   (instead of an rte instruction).  */
51int trap_exit;
52
53/* This is used by the sp_switch attribute for functions.  It specifies
54   a variable holding the address of the stack the interrupt function
55   should switch to/from at entry/exit.  */
56rtx sp_switch;
57
58/* This is set by #pragma trapa, and is similar to the above, except that
59   the compiler doesn't emit code to preserve all registers.  */
60static int pragma_trapa;
61
62/* This is set by #pragma nosave_low_regs.  This is useful on the SH3,
63   which has a separate set of low regs for User and Supervisor modes.
64   This should only be used for the lowest level of interrupts.  Higher levels
65   of interrupts must save the registers in case they themselves are
66   interrupted.  */
67int pragma_nosave_low_regs;
68
69/* This is used for communication between SETUP_INCOMING_VARARGS and
70   sh_expand_prologue.  */
71int current_function_anonymous_args;
72
73/* Global variables from toplev.c and final.c that are used within, but
74   not declared in any header file.  */
75extern char *version_string;
76extern int *insn_addresses;
77
78/* Global variables for machine-dependent things. */
79
80/* Which cpu are we scheduling for.  */
81enum processor_type sh_cpu;
82
83/* Saved operands from the last compare to use when we generate an scc
84   or bcc insn.  */
85
86rtx sh_compare_op0;
87rtx sh_compare_op1;
88
89enum machine_mode sh_addr_diff_vec_mode;
90
91/* Provides the class number of the smallest class containing
92   reg number.  */
93
94int regno_reg_class[FIRST_PSEUDO_REGISTER] =
95{
96  R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
97  GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
98  GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
99  GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
100  GENERAL_REGS, PR_REGS, T_REGS, NO_REGS,
101  MAC_REGS, MAC_REGS, FPUL_REGS, GENERAL_REGS,
102  FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
103  FP_REGS, FP_REGS, FP_REGS, FP_REGS,
104  FP_REGS, FP_REGS, FP_REGS, FP_REGS,
105  FP_REGS, FP_REGS, FP_REGS, FP_REGS,
106  DF_REGS, DF_REGS, DF_REGS, DF_REGS,
107  DF_REGS, DF_REGS, DF_REGS, DF_REGS,
108  FPSCR_REGS,
109};
110
111char fp_reg_names[][5] =
112{
113  "fr0", "fr1", "fr2", "fr3", "fr4", "fr5", "fr6", "fr7",
114  "fr8", "fr9", "fr10", "fr11", "fr12", "fr13", "fr14", "fr15",
115  "fpul",
116  "xd0","xd2","xd4", "xd6", "xd8", "xd10", "xd12", "xd14",
117};
118
119/* Provide reg_class from a letter such as appears in the machine
120   description.  */
121
122enum reg_class reg_class_from_letter[] =
123{
124  /* a */ ALL_REGS, /* b */ NO_REGS, /* c */ FPSCR_REGS, /* d */ DF_REGS,
125  /* e */ NO_REGS, /* f */ FP_REGS, /* g */ NO_REGS, /* h */ NO_REGS,
126  /* i */ NO_REGS, /* j */ NO_REGS, /* k */ NO_REGS, /* l */ PR_REGS,
127  /* m */ NO_REGS, /* n */ NO_REGS, /* o */ NO_REGS, /* p */ NO_REGS,
128  /* q */ NO_REGS, /* r */ NO_REGS, /* s */ NO_REGS, /* t */ T_REGS,
129  /* u */ NO_REGS, /* v */ NO_REGS, /* w */ FP0_REGS, /* x */ MAC_REGS,
130  /* y */ FPUL_REGS, /* z */ R0_REGS
131};
132
133int assembler_dialect;
134
135rtx get_fpscr_rtx ();
136void emit_sf_insn ();
137void emit_df_insn ();
138
139static void split_branches PROTO ((rtx));
140
141/* Print the operand address in x to the stream.  */
142
143void
144print_operand_address (stream, x)
145     FILE *stream;
146     rtx x;
147{
148  switch (GET_CODE (x))
149    {
150    case REG:
151    case SUBREG:
152      fprintf (stream, "@%s", reg_names[true_regnum (x)]);
153      break;
154
155    case PLUS:
156      {
157	rtx base = XEXP (x, 0);
158	rtx index = XEXP (x, 1);
159
160	switch (GET_CODE (index))
161	  {
162	  case CONST_INT:
163	    fprintf (stream, "@(%d,%s)", INTVAL (index),
164		     reg_names[true_regnum (base)]);
165	    break;
166
167	  case REG:
168	  case SUBREG:
169	    {
170	      int base_num = true_regnum (base);
171	      int index_num = true_regnum (index);
172
173	      fprintf (stream, "@(r0,%s)",
174		       reg_names[MAX (base_num, index_num)]);
175	      break;
176	    }
177
178	  default:
179	    debug_rtx (x);
180	    abort ();
181	  }
182      }
183      break;
184
185    case PRE_DEC:
186      fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
187      break;
188
189    case POST_INC:
190      fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
191      break;
192
193    default:
194      output_addr_const (stream, x);
195      break;
196    }
197}
198
199/* Print operand x (an rtx) in assembler syntax to file stream
200   according to modifier code.
201
202   '.'  print a .s if insn needs delay slot
203   ','  print LOCAL_LABEL_PREFIX
204   '@'  print trap, rte or rts depending upon pragma interruptness
205   '#'  output a nop if there is nothing to put in the delay slot
206   'O'  print a constant without the #
207   'R'  print the LSW of a dp value - changes if in little endian
208   'S'  print the MSW of a dp value - changes if in little endian
209   'T'  print the next word of a dp value - same as 'R' in big endian mode.
210   'o'  output an operator.  */
211
212void
213print_operand (stream, x, code)
214     FILE *stream;
215     rtx x;
216     int code;
217{
218  switch (code)
219    {
220    case '.':
221      if (final_sequence
222	  && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0)))
223	fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
224      break;
225    case ',':
226      fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
227      break;
228    case '@':
229      {
230	int interrupt_handler;
231
232	if ((lookup_attribute
233	     ("interrupt_handler",
234	      DECL_MACHINE_ATTRIBUTES (current_function_decl)))
235	    != NULL_TREE)
236	  interrupt_handler = 1;
237	else
238	  interrupt_handler = 0;
239
240      if (trap_exit)
241	fprintf (stream, "trapa #%d", trap_exit);
242      else if (interrupt_handler)
243	fprintf (stream, "rte");
244      else
245	fprintf (stream, "rts");
246      break;
247      }
248    case '#':
249      /* Output a nop if there's nothing in the delay slot.  */
250      if (dbr_sequence_length () == 0)
251	fprintf (stream, "\n\tnop");
252      break;
253    case 'O':
254      output_addr_const (stream, x);
255      break;
256    case 'R':
257      fputs (reg_names[REGNO (x) + LSW], (stream));
258      break;
259    case 'S':
260      fputs (reg_names[REGNO (x) + MSW], (stream));
261      break;
262    case 'T':
263      /* Next word of a double.  */
264      switch (GET_CODE (x))
265	{
266	case REG:
267	  fputs (reg_names[REGNO (x) + 1], (stream));
268	  break;
269	case MEM:
270	  if (GET_CODE (XEXP (x, 0)) != PRE_DEC
271	      && GET_CODE (XEXP (x, 0)) != POST_INC)
272	    x = adj_offsettable_operand (x, 4);
273	  print_operand_address (stream, XEXP (x, 0));
274	  break;
275	}
276      break;
277    case 'o':
278      switch (GET_CODE (x))
279	{
280	case PLUS:  fputs ("add", stream); break;
281	case MINUS: fputs ("sub", stream); break;
282	case MULT:  fputs ("mul", stream); break;
283	case DIV:   fputs ("div", stream); break;
284	}
285      break;
286    default:
287      switch (GET_CODE (x))
288	{
289	case REG:
290	  if (REGNO (x) >= FIRST_FP_REG && REGNO (x) <= LAST_FP_REG
291	      && GET_MODE_SIZE (GET_MODE (x)) > 4)
292	    fprintf ((stream), "d%s", reg_names[REGNO (x)]+1);
293	  else
294	    fputs (reg_names[REGNO (x)], (stream));
295	  break;
296	case MEM:
297	  output_address (XEXP (x, 0));
298	  break;
299	default:
300	  fputc ('#', stream);
301	  output_addr_const (stream, x);
302	  break;
303	}
304      break;
305    }
306}
307
308static void force_into PROTO ((rtx, rtx));
309
310/* Like force_operand, but guarantees that VALUE ends up in TARGET.  */
311static void
312force_into (value, target)
313     rtx value, target;
314{
315  value = force_operand (value, target);
316  if (! rtx_equal_p (value, target))
317    emit_insn (gen_move_insn (target, value));
318}
319
320/* Emit code to perform a block move.  Choose the best method.
321
322   OPERANDS[0] is the destination.
323   OPERANDS[1] is the source.
324   OPERANDS[2] is the size.
325   OPERANDS[3] is the alignment safe to use.  */
326
327int
328expand_block_move (operands)
329     rtx *operands;
330{
331  int align = INTVAL (operands[3]);
332  int constp = (GET_CODE (operands[2]) == CONST_INT);
333  int bytes = (constp ? INTVAL (operands[2]) : 0);
334
335  /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
336     alignment, or if it isn't a multiple of 4 bytes, then fail.  */
337  if (! constp || align < 4 || (bytes % 4 != 0))
338    return 0;
339
340  if (TARGET_HARD_SH4)
341    {
342      if (bytes < 12)
343	return 0;
344      else if (bytes == 12)
345	{
346	  tree entry_name;
347	  rtx func_addr_rtx;
348	  rtx r4 = gen_rtx (REG, SImode, 4);
349	  rtx r5 = gen_rtx (REG, SImode, 5);
350
351	  entry_name = get_identifier ("__movstrSI12_i4");
352
353	  func_addr_rtx
354	    = copy_to_mode_reg (Pmode,
355				gen_rtx_SYMBOL_REF (Pmode,
356						    IDENTIFIER_POINTER (entry_name)));
357	  force_into (XEXP (operands[0], 0), r4);
358	  force_into (XEXP (operands[1], 0), r5);
359	  emit_insn (gen_block_move_real_i4 (func_addr_rtx));
360	  return 1;
361	}
362      else if (! TARGET_SMALLCODE)
363	{
364	  tree entry_name;
365	  rtx func_addr_rtx;
366	  int dwords;
367	  rtx r4 = gen_rtx (REG, SImode, 4);
368	  rtx r5 = gen_rtx (REG, SImode, 5);
369	  rtx r6 = gen_rtx (REG, SImode, 6);
370
371	  entry_name = get_identifier (bytes & 4
372				       ? "__movstr_i4_odd"
373				       : "__movstr_i4_even");
374	  func_addr_rtx
375	    = copy_to_mode_reg (Pmode,
376				gen_rtx_SYMBOL_REF (Pmode,
377						    IDENTIFIER_POINTER (entry_name)));
378	  force_into (XEXP (operands[0], 0), r4);
379	  force_into (XEXP (operands[1], 0), r5);
380
381	  dwords = bytes >> 3;
382	  emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
383	  emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
384	  return 1;
385	}
386      else
387	return 0;
388    }
389  if (bytes < 64)
390    {
391      char entry[30];
392      tree entry_name;
393      rtx func_addr_rtx;
394      rtx r4 = gen_rtx (REG, SImode, 4);
395      rtx r5 = gen_rtx (REG, SImode, 5);
396
397      sprintf (entry, "__movstrSI%d", bytes);
398      entry_name = get_identifier (entry);
399
400      func_addr_rtx
401	= copy_to_mode_reg (Pmode,
402			    gen_rtx (SYMBOL_REF, Pmode,
403				     IDENTIFIER_POINTER (entry_name)));
404      force_into (XEXP (operands[0], 0), r4);
405      force_into (XEXP (operands[1], 0), r5);
406      emit_insn (gen_block_move_real (func_addr_rtx));
407      return 1;
408    }
409
410  /* This is the same number of bytes as a memcpy call, but to a different
411     less common function name, so this will occasionally use more space.  */
412  if (! TARGET_SMALLCODE)
413    {
414      tree entry_name;
415      rtx func_addr_rtx;
416      int final_switch, while_loop;
417      rtx r4 = gen_rtx (REG, SImode, 4);
418      rtx r5 = gen_rtx (REG, SImode, 5);
419      rtx r6 = gen_rtx (REG, SImode, 6);
420
421      entry_name = get_identifier ("__movstr");
422      func_addr_rtx
423	= copy_to_mode_reg (Pmode,
424			    gen_rtx (SYMBOL_REF, Pmode,
425				     IDENTIFIER_POINTER (entry_name)));
426      force_into (XEXP (operands[0], 0), r4);
427      force_into (XEXP (operands[1], 0), r5);
428
429      /* r6 controls the size of the move.  16 is decremented from it
430	 for each 64 bytes moved.  Then the negative bit left over is used
431	 as an index into a list of move instructions.  e.g., a 72 byte move
432	 would be set up with size(r6) = 14, for one iteration through the
433	 big while loop, and a switch of -2 for the last part.  */
434
435      final_switch = 16 - ((bytes / 4) % 16);
436      while_loop = ((bytes / 4) / 16 - 1) * 16;
437      emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
438      emit_insn (gen_block_lump_real (func_addr_rtx));
439      return 1;
440    }
441
442  return 0;
443}
444
445/* Prepare operands for a move define_expand; specifically, one of the
446   operands must be in a register.  */
447
448int
449prepare_move_operands (operands, mode)
450     rtx operands[];
451     enum machine_mode mode;
452{
453  if (! reload_in_progress && ! reload_completed)
454    {
455      /* Copy the source to a register if both operands aren't registers.  */
456      if (! register_operand (operands[0], mode)
457	  && ! register_operand (operands[1], mode))
458	operands[1] = copy_to_mode_reg (mode, operands[1]);
459
460      /* This case can happen while generating code to move the result
461	 of a library call to the target.  Reject `st r0,@(rX,rY)' because
462	 reload will fail to find a spill register for rX, since r0 is already
463	 being used for the source.  */
464      else if (GET_CODE (operands[1]) == REG && REGNO (operands[1]) == 0
465	       && GET_CODE (operands[0]) == MEM
466	       && GET_CODE (XEXP (operands[0], 0)) == PLUS
467	       && GET_CODE (XEXP (XEXP (operands[0], 0), 1)) == REG)
468	operands[1] = copy_to_mode_reg (mode, operands[1]);
469    }
470
471  return 0;
472}
473
474/* Prepare the operands for an scc instruction; make sure that the
475   compare has been done.  */
476rtx
477prepare_scc_operands (code)
478     enum rtx_code code;
479{
480  rtx t_reg = gen_rtx (REG, SImode, T_REG);
481  enum rtx_code oldcode = code;
482  enum machine_mode mode;
483
484  /* First need a compare insn.  */
485  switch (code)
486    {
487    case NE:
488      /* It isn't possible to handle this case.  */
489      abort ();
490    case LT:
491      code = GT;
492      break;
493    case LE:
494      code = GE;
495      break;
496    case LTU:
497      code = GTU;
498      break;
499    case LEU:
500      code = GEU;
501      break;
502    }
503  if (code != oldcode)
504    {
505      rtx tmp = sh_compare_op0;
506      sh_compare_op0 = sh_compare_op1;
507      sh_compare_op1 = tmp;
508    }
509
510  mode = GET_MODE (sh_compare_op0);
511  if (mode == VOIDmode)
512    mode = GET_MODE (sh_compare_op1);
513
514  sh_compare_op0 = force_reg (mode, sh_compare_op0);
515  if ((code != EQ && code != NE
516       && (sh_compare_op1 != const0_rtx
517	   || code == GTU  || code == GEU || code == LTU || code == LEU))
518      || TARGET_SH3E && GET_MODE_CLASS (mode) == MODE_FLOAT)
519    sh_compare_op1 = force_reg (mode, sh_compare_op1);
520
521  if (TARGET_SH4 && GET_MODE_CLASS (mode) == MODE_FLOAT)
522    (mode == SFmode ? emit_sf_insn : emit_df_insn)
523     (gen_rtx (PARALLEL, VOIDmode, gen_rtvec (2,
524		gen_rtx (SET, VOIDmode, t_reg,
525			 gen_rtx (code, SImode,
526				  sh_compare_op0, sh_compare_op1)),
527		gen_rtx (USE, VOIDmode, get_fpscr_rtx ()))));
528  else
529    emit_insn (gen_rtx (SET, VOIDmode, t_reg,
530			gen_rtx (code, SImode, sh_compare_op0,
531				 sh_compare_op1)));
532
533  return t_reg;
534}
535
536/* Called from the md file, set up the operands of a compare instruction.  */
537
538void
539from_compare (operands, code)
540     rtx *operands;
541     int code;
542{
543  enum machine_mode mode = GET_MODE (sh_compare_op0);
544  rtx insn;
545  if (mode == VOIDmode)
546    mode = GET_MODE (sh_compare_op1);
547  if (code != EQ
548      || mode == DImode
549      || (TARGET_SH3E && GET_MODE_CLASS (mode) == MODE_FLOAT))
550    {
551      /* Force args into regs, since we can't use constants here.  */
552      sh_compare_op0 = force_reg (mode, sh_compare_op0);
553      if (sh_compare_op1 != const0_rtx
554	  || code == GTU  || code == GEU
555	  || (TARGET_SH3E && GET_MODE_CLASS (mode) == MODE_FLOAT))
556	sh_compare_op1 = force_reg (mode, sh_compare_op1);
557    }
558  if (TARGET_SH3E && GET_MODE_CLASS (mode) == MODE_FLOAT && code == GE)
559    {
560      from_compare (operands, GT);
561      insn = gen_ieee_ccmpeqsf_t (sh_compare_op0, sh_compare_op1);
562    }
563  else
564    insn = gen_rtx (SET, VOIDmode,
565		    gen_rtx (REG, SImode, 18),
566		    gen_rtx (code, SImode, sh_compare_op0, sh_compare_op1));
567  if (TARGET_SH4 && GET_MODE_CLASS (mode) == MODE_FLOAT)
568    {
569      insn = gen_rtx (PARALLEL, VOIDmode,
570		      gen_rtvec (2, insn,
571				 gen_rtx (USE, VOIDmode, get_fpscr_rtx ())));
572      (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn);
573    }
574  else
575    emit_insn (insn);
576}
577
578/* Functions to output assembly code.  */
579
580/* Return a sequence of instructions to perform DI or DF move.
581
582   Since the SH cannot move a DI or DF in one instruction, we have
583   to take care when we see overlapping source and dest registers.  */
584
585char *
586output_movedouble (insn, operands, mode)
587     rtx insn;
588     rtx operands[];
589     enum machine_mode mode;
590{
591  rtx dst = operands[0];
592  rtx src = operands[1];
593
594  if (GET_CODE (dst) == MEM
595      && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
596    return "mov.l	%T1,%0\n\tmov.l	%1,%0";
597
598  if (register_operand (dst, mode)
599      && register_operand (src, mode))
600    {
601      if (REGNO (src) == MACH_REG)
602	return "sts	mach,%S0\n\tsts	macl,%R0";
603
604      /* When mov.d r1,r2 do r2->r3 then r1->r2;
605         when mov.d r1,r0 do r1->r0 then r2->r1.  */
606
607      if (REGNO (src) + 1 == REGNO (dst))
608	return "mov	%T1,%T0\n\tmov	%1,%0";
609      else
610	return "mov	%1,%0\n\tmov	%T1,%T0";
611    }
612  else if (GET_CODE (src) == CONST_INT)
613    {
614      if (INTVAL (src) < 0)
615	output_asm_insn ("mov	#-1,%S0", operands);
616      else
617	output_asm_insn ("mov	#0,%S0", operands);
618
619      return "mov	%1,%R0";
620    }
621  else if (GET_CODE (src) == MEM)
622    {
623      int ptrreg = -1;
624      int dreg = REGNO (dst);
625      rtx inside = XEXP (src, 0);
626
627      if (GET_CODE (inside) == REG)
628	ptrreg = REGNO (inside);
629      else if (GET_CODE (inside) == SUBREG)
630	ptrreg = REGNO (SUBREG_REG (inside)) + SUBREG_WORD (inside);
631      else if (GET_CODE (inside) == PLUS)
632	{
633	  ptrreg = REGNO (XEXP (inside, 0));
634	  /* ??? A r0+REG address shouldn't be possible here, because it isn't
635	     an offsettable address.  Unfortunately, offsettable addresses use
636	     QImode to check the offset, and a QImode offsettable address
637	     requires r0 for the other operand, which is not currently
638	     supported, so we can't use the 'o' constraint.
639	     Thus we must check for and handle r0+REG addresses here.
640	     We punt for now, since this is likely very rare.  */
641	  if (GET_CODE (XEXP (inside, 1)) == REG)
642	    abort ();
643	}
644      else if (GET_CODE (inside) == LABEL_REF)
645	return "mov.l	%1,%0\n\tmov.l	%1+4,%T0";
646      else if (GET_CODE (inside) == POST_INC)
647	return "mov.l	%1,%0\n\tmov.l	%1,%T0";
648      else
649	abort ();
650
651      /* Work out the safe way to copy.  Copy into the second half first.  */
652      if (dreg == ptrreg)
653	return "mov.l	%T1,%T0\n\tmov.l	%1,%0";
654    }
655
656  return "mov.l	%1,%0\n\tmov.l	%T1,%T0";
657}
658
659/* Print an instruction which would have gone into a delay slot after
660   another instruction, but couldn't because the other instruction expanded
661   into a sequence where putting the slot insn at the end wouldn't work.  */
662
663static void
664print_slot (insn)
665     rtx insn;
666{
667  final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 0, 1);
668
669  INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1;
670}
671
672char *
673output_far_jump (insn, op)
674     rtx insn;
675     rtx op;
676{
677  struct { rtx lab, reg, op; } this;
678  char *jump;
679  int far;
680  int offset = branch_dest (insn) - insn_addresses[INSN_UID (insn)];
681
682  this.lab = gen_label_rtx ();
683
684  if (TARGET_SH2
685      && offset >= -32764
686      && offset - get_attr_length (insn) <= 32766)
687    {
688      far = 0;
689      jump = "mov.w	%O0,%1;braf	%1";
690    }
691  else
692    {
693      far = 1;
694      jump = "mov.l	%O0,%1;jmp	@%1";
695    }
696  /* If we have a scratch register available, use it.  */
697  if (GET_CODE (PREV_INSN (insn)) == INSN
698      && INSN_CODE (PREV_INSN (insn)) == CODE_FOR_indirect_jump_scratch)
699    {
700      this.reg = SET_DEST (PATTERN (PREV_INSN (insn)));
701      output_asm_insn (jump, &this.lab);
702      if (dbr_sequence_length ())
703	print_slot (final_sequence);
704      else
705	output_asm_insn ("nop", 0);
706    }
707  else
708    {
709      /* Output the delay slot insn first if any.  */
710      if (dbr_sequence_length ())
711	print_slot (final_sequence);
712
713      this.reg = gen_rtx (REG, SImode, 13);
714      output_asm_insn ("mov.l	r13,@-r15", 0);
715      output_asm_insn (jump, &this.lab);
716      output_asm_insn ("mov.l	@r15+,r13", 0);
717    }
718  if (far)
719    output_asm_insn (".align	2", 0);
720  ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (this.lab));
721  this.op = op;
722  output_asm_insn (far ? ".long	%O2" : ".word %O2-%O0", &this.lab);
723  return "";
724}
725
726/* Local label counter, used for constants in the pool and inside
727   pattern branches.  */
728
729static int lf = 100;
730
731/* Output code for ordinary branches.  */
732
733char *
734output_branch (logic, insn, operands)
735     int logic;
736     rtx insn;
737     rtx *operands;
738{
739  switch (get_attr_length (insn))
740    {
741    case 6:
742      /* This can happen if filling the delay slot has caused a forward
743	 branch to exceed its range (we could reverse it, but only
744	 when we know we won't overextend other branches; this should
745	 best be handled by relaxation).
746	 It can also happen when other condbranches hoist delay slot insn
747	 from their destination, thus leading to code size increase.
748	 But the branch will still be in the range -4092..+4098 bytes.  */
749
750      if (! TARGET_RELAX)
751	{
752	  int label = lf++;
753	  /* The call to print_slot will clobber the operands.  */
754	  rtx op0 = operands[0];
755
756	  /* If the instruction in the delay slot is annulled (true), then
757	     there is no delay slot where we can put it now.  The only safe
758	     place for it is after the label.  final will do that by default.  */
759
760	  if (final_sequence
761	      && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0)))
762	    {
763	      asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
764	                   ASSEMBLER_DIALECT ? "/" : ".", label);
765	      print_slot (final_sequence);
766	    }
767	  else
768	    asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
769
770	  output_asm_insn ("bra\t%l0", &op0);
771	  fprintf (asm_out_file, "\tnop\n");
772	  ASM_OUTPUT_INTERNAL_LABEL(asm_out_file, "LF", label);
773
774	  return "";
775	}
776      /* When relaxing, handle this like a short branch.  The linker
777	 will fix it up if it still doesn't fit after relaxation.  */
778    case 2:
779      return logic ? "bt%.\t%l0" : "bf%.\t%l0";
780    default:
781      abort ();
782    }
783}
784
785char *
786output_branchy_insn (code, template, insn, operands)
787     char *template;
788     enum rtx_code code;
789     rtx insn;
790     rtx *operands;
791{
792  rtx next_insn = NEXT_INSN (insn);
793  int label_nr;
794
795  if (next_insn && GET_CODE (next_insn) == JUMP_INSN && condjump_p (next_insn))
796    {
797      rtx src = SET_SRC (PATTERN (next_insn));
798      if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
799	{
800	  /* Following branch not taken */
801	  operands[9] = gen_label_rtx ();
802	  emit_label_after (operands[9], next_insn);
803	  return template;
804	}
805      else
806	{
807	  int offset = (branch_dest (next_insn)
808			- insn_addresses[INSN_UID (next_insn)] + 4);
809	  if (offset >= -252 && offset <= 258)
810	    {
811	      if (GET_CODE (src) == IF_THEN_ELSE)
812		/* branch_true */
813		src = XEXP (src, 1);
814	      operands[9] = src;
815	      return template;
816	    }
817	}
818    }
819  operands[9] = gen_label_rtx ();
820  emit_label_after (operands[9], insn);
821  return template;
822}
823
824char *
825output_ieee_ccmpeq (insn, operands)
826     rtx insn, operands;
827{
828  output_branchy_insn (NE, "bt\t%l9\\;fcmp/eq\t%1,%0", insn, operands);
829}
830
831/* Output to FILE the start of the assembler file.  */
832
833void
834output_file_start (file)
835     FILE *file;
836{
837  register int pos;
838
839  output_file_directive (file, main_input_filename);
840
841  /* Switch to the data section so that the coffsem symbol and the
842     gcc2_compiled. symbol aren't in the text section.  */
843  data_section ();
844
845  if (TARGET_LITTLE_ENDIAN)
846    fprintf (file, "\t.little\n");
847}
848
849/* Actual number of instructions used to make a shift by N.  */
850static char ashiftrt_insns[] =
851  { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
852
853/* Left shift and logical right shift are the same.  */
854static char shift_insns[]    =
855  { 0,1,1,2,2,3,3,4,1,2,2,3,3,4,3,3,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
856
857/* Individual shift amounts needed to get the above length sequences.
858   One bit right shifts clobber the T bit, so when possible, put one bit
859   shifts in the middle of the sequence, so the ends are eligible for
860   branch delay slots.  */
861static short shift_amounts[32][5] = {
862  {0}, {1}, {2}, {2, 1},
863  {2, 2}, {2, 1, 2}, {2, 2, 2}, {2, 2, 1, 2},
864  {8}, {8, 1}, {8, 2}, {8, 1, 2},
865  {8, 2, 2}, {8, 2, 1, 2}, {8, -2, 8}, {8, -1, 8},
866  {16}, {16, 1}, {16, 2}, {16, 1, 2},
867  {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
868  {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
869  {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
870
871/* Likewise, but for shift amounts < 16, up to three highmost bits
872   might be clobbered.  This is typically used when combined with some
873   kind of sign or zero extension.  */
874
875static char ext_shift_insns[]    =
876  { 0,1,1,2,2,3,2,2,1,2,2,3,3,3,2,2,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
877
878static short ext_shift_amounts[32][4] = {
879  {0}, {1}, {2}, {2, 1},
880  {2, 2}, {2, 1, 2}, {8, -2}, {8, -1},
881  {8}, {8, 1}, {8, 2}, {8, 1, 2},
882  {8, 2, 2}, {16, -2, -1}, {16, -2}, {16, -1},
883  {16}, {16, 1}, {16, 2}, {16, 1, 2},
884  {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
885  {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
886  {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
887
888/* Assuming we have a value that has been sign-extended by at least one bit,
889   can we use the ext_shift_amounts with the last shift turned to an arithmetic shift
890   to shift it by N without data loss, and quicker than by other means?  */
891#define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
892
893/* This is used in length attributes in sh.md to help compute the length
894   of arbitrary constant shift instructions.  */
895
896int
897shift_insns_rtx (insn)
898     rtx insn;
899{
900  rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
901  int shift_count = INTVAL (XEXP (set_src, 1));
902  enum rtx_code shift_code = GET_CODE (set_src);
903
904  switch (shift_code)
905    {
906    case ASHIFTRT:
907      return ashiftrt_insns[shift_count];
908    case LSHIFTRT:
909    case ASHIFT:
910      return shift_insns[shift_count];
911    default:
912      abort();
913    }
914}
915
916/* Return the cost of a shift.  */
917
918int
919shiftcosts (x)
920     rtx x;
921{
922  int value = INTVAL (XEXP (x, 1));
923
924  /* If shift by a non constant, then this will be expensive.  */
925  if (GET_CODE (XEXP (x, 1)) != CONST_INT)
926    return SH_DYNAMIC_SHIFT_COST;
927
928  /* Otherwise, return the true cost in instructions.  */
929  if (GET_CODE (x) == ASHIFTRT)
930    {
931      int cost = ashiftrt_insns[value];
932      /* If SH3, then we put the constant in a reg and use shad.  */
933      if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
934	cost = 1 + SH_DYNAMIC_SHIFT_COST;
935      return cost;
936    }
937  else
938    return shift_insns[value];
939}
940
941/* Return the cost of an AND operation.  */
942
943int
944andcosts (x)
945     rtx x;
946{
947  int i;
948
949  /* Anding with a register is a single cycle and instruction.  */
950  if (GET_CODE (XEXP (x, 1)) != CONST_INT)
951    return 1;
952
953  i = INTVAL (XEXP (x, 1));
954  /* These constants are single cycle extu.[bw] instructions.  */
955  if (i == 0xff || i == 0xffff)
956    return 1;
957  /* Constants that can be used in an and immediate instruction is a single
958     cycle, but this requires r0, so make it a little more expensive.  */
959  if (CONST_OK_FOR_L (i))
960    return 2;
961  /* Constants that can be loaded with a mov immediate and an and.
962     This case is probably unnecessary.  */
963  if (CONST_OK_FOR_I (i))
964    return 2;
965  /* Any other constants requires a 2 cycle pc-relative load plus an and.
966     This case is probably unnecessary.  */
967  return 3;
968}
969
970/* Return the cost of a multiply.  */
971int
972multcosts (x)
973     rtx x;
974{
975  if (TARGET_SH2)
976    {
977      /* We have a mul insn, so we can never take more than the mul and the
978	 read of the mac reg, but count more because of the latency and extra
979	 reg usage.  */
980      if (TARGET_SMALLCODE)
981	return 2;
982      return 3;
983    }
984
985  /* If we're aiming at small code, then just count the number of
986     insns in a multiply call sequence.  */
987  if (TARGET_SMALLCODE)
988    return 5;
989
990  /* Otherwise count all the insns in the routine we'd be calling too.  */
991  return 20;
992}
993
994/* Code to expand a shift.  */
995
996void
997gen_ashift (type, n, reg)
998     int type;
999     int n;
1000     rtx reg;
1001{
1002  /* Negative values here come from the shift_amounts array.  */
1003  if (n < 0)
1004    {
1005      if (type == ASHIFT)
1006	type = LSHIFTRT;
1007      else
1008	type = ASHIFT;
1009      n = -n;
1010    }
1011
1012  switch (type)
1013    {
1014    case ASHIFTRT:
1015      emit_insn (gen_ashrsi3_k (reg, reg, GEN_INT (n)));
1016      break;
1017    case LSHIFTRT:
1018      if (n == 1)
1019	emit_insn (gen_lshrsi3_m (reg, reg, GEN_INT (n)));
1020      else
1021	emit_insn (gen_lshrsi3_k (reg, reg, GEN_INT (n)));
1022      break;
1023    case ASHIFT:
1024      emit_insn (gen_ashlsi3_k (reg, reg, GEN_INT (n)));
1025      break;
1026    }
1027}
1028
1029/* Same for HImode */
1030
1031void
1032gen_ashift_hi (type, n, reg)
1033     int type;
1034     int n;
1035     rtx reg;
1036{
1037  /* Negative values here come from the shift_amounts array.  */
1038  if (n < 0)
1039    {
1040      if (type == ASHIFT)
1041	type = LSHIFTRT;
1042      else
1043	type = ASHIFT;
1044      n = -n;
1045    }
1046
1047  switch (type)
1048    {
1049    case ASHIFTRT:
1050    case LSHIFTRT:
1051      /* We don't have HImode right shift operations because using the
1052	 ordinary 32 bit shift instructions for that doesn't generate proper
1053	 zero/sign extension.
1054	 gen_ashift_hi is only called in contexts where we know that the
1055	 sign extension works out correctly.  */
1056      {
1057	int word = 0;
1058	if (GET_CODE (reg) == SUBREG)
1059	  {
1060	    word = SUBREG_WORD (reg);
1061	    reg = SUBREG_REG (reg);
1062	  }
1063	gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, word));
1064	break;
1065      }
1066    case ASHIFT:
1067      emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
1068      break;
1069    }
1070}
1071
1072/* Output RTL to split a constant shift into its component SH constant
1073   shift instructions.  */
1074
1075int
1076gen_shifty_op (code, operands)
1077     int code;
1078     rtx *operands;
1079{
1080  int value = INTVAL (operands[2]);
1081  int max, i;
1082
1083  /* Truncate the shift count in case it is out of bounds.  */
1084  value = value & 0x1f;
1085
1086  if (value == 31)
1087    {
1088      if (code == LSHIFTRT)
1089	{
1090	  emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
1091	  emit_insn (gen_movt (operands[0]));
1092	  return;
1093	}
1094      else if (code == ASHIFT)
1095	{
1096	  /* There is a two instruction sequence for 31 bit left shifts,
1097	     but it requires r0.  */
1098	  if (GET_CODE (operands[0]) == REG && REGNO (operands[0]) == 0)
1099	    {
1100	      emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
1101	      emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
1102	      return;
1103	    }
1104	}
1105    }
1106  else if (value == 0)
1107    {
1108      /* This can happen when not optimizing.  We must output something here
1109	 to prevent the compiler from aborting in final.c after the try_split
1110	 call.  */
1111      emit_insn (gen_nop ());
1112      return;
1113    }
1114
1115  max = shift_insns[value];
1116  for (i = 0; i < max; i++)
1117    gen_ashift (code, shift_amounts[value][i], operands[0]);
1118}
1119
1120/* Same as above, but optimized for values where the topmost bits don't
1121   matter.  */
1122
1123int
1124gen_shifty_hi_op (code, operands)
1125     int code;
1126     rtx *operands;
1127{
1128  int value = INTVAL (operands[2]);
1129  int max, i;
1130  void (*gen_fun)();
1131
1132  /* This operation is used by and_shl for SImode values with a few
1133     high bits known to be cleared.  */
1134  value &= 31;
1135  if (value == 0)
1136    {
1137      emit_insn (gen_nop ());
1138      return;
1139    }
1140
1141  gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
1142  if (code == ASHIFT)
1143    {
1144      max = ext_shift_insns[value];
1145      for (i = 0; i < max; i++)
1146	gen_fun (code, ext_shift_amounts[value][i], operands[0]);
1147    }
1148  else
1149    /* When shifting right, emit the shifts in reverse order, so that
1150       solitary negative values come first.  */
1151    for (i = ext_shift_insns[value] - 1; i >= 0; i--)
1152      gen_fun (code, ext_shift_amounts[value][i], operands[0]);
1153}
1154
1155/* Output RTL for an arithmetic right shift.  */
1156
1157/* ??? Rewrite to use super-optimizer sequences.  */
1158
1159int
1160expand_ashiftrt (operands)
1161     rtx *operands;
1162{
1163  rtx wrk;
1164  char func[18];
1165  tree func_name;
1166  int value;
1167
1168  if (TARGET_SH3)
1169    {
1170      if (GET_CODE (operands[2]) != CONST_INT)
1171	{
1172	  rtx count = copy_to_mode_reg (SImode, operands[2]);
1173	  emit_insn (gen_negsi2 (count, count));
1174	  emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
1175	  return 1;
1176	}
1177      else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
1178	       > 1 + SH_DYNAMIC_SHIFT_COST)
1179	{
1180	  rtx count
1181	    = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
1182	  emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
1183	  return 1;
1184	}
1185    }
1186  if (GET_CODE (operands[2]) != CONST_INT)
1187    return 0;
1188
1189  value = INTVAL (operands[2]) & 31;
1190
1191  if (value == 31)
1192    {
1193      emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
1194      return 1;
1195    }
1196  else if (value >= 16 && value <= 19)
1197    {
1198      wrk = gen_reg_rtx (SImode);
1199      emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
1200      value -= 16;
1201      while (value--)
1202	gen_ashift (ASHIFTRT, 1, wrk);
1203      emit_move_insn (operands[0], wrk);
1204      return 1;
1205    }
1206  /* Expand a short sequence inline, longer call a magic routine.  */
1207  else if (value <= 5)
1208    {
1209      wrk = gen_reg_rtx (SImode);
1210      emit_move_insn (wrk, operands[1]);
1211      while (value--)
1212	gen_ashift (ASHIFTRT, 1, wrk);
1213      emit_move_insn (operands[0], wrk);
1214      return 1;
1215    }
1216
1217  wrk = gen_reg_rtx (Pmode);
1218
1219  /* Load the value into an arg reg and call a helper.  */
1220  emit_move_insn (gen_rtx (REG, SImode, 4), operands[1]);
1221  sprintf (func, "__ashiftrt_r4_%d", value);
1222  func_name = get_identifier (func);
1223  emit_move_insn (wrk, gen_rtx (SYMBOL_REF, Pmode,
1224				IDENTIFIER_POINTER (func_name)));
1225  emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
1226  emit_move_insn (operands[0], gen_rtx (REG, SImode, 4));
1227  return 1;
1228}
1229
1230int sh_dynamicalize_shift_p (count)
1231     rtx count;
1232{
1233  return shift_insns[INTVAL (count)] > 1 + SH_DYNAMIC_SHIFT_COST;
1234}
1235
1236/* Try to find a good way to implement the combiner pattern
1237  [(set (match_operand:SI 0 "register_operand" "r")
1238        (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
1239                           (match_operand:SI 2 "const_int_operand" "n"))
1240                (match_operand:SI 3 "const_int_operand" "n"))) .
1241  LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
1242  return 0 for simple right / left or left/right shift combination.
1243  return 1 for a combination of shifts with zero_extend.
1244  return 2 for a combination of shifts with an AND that needs r0.
1245  return 3 for a combination of shifts with an AND that needs an extra
1246    scratch register, when the three highmost bits of the AND mask are clear.
1247  return 4 for a combination of shifts with an AND that needs an extra
1248    scratch register, when any of the three highmost bits of the AND mask
1249    is set.
1250  If ATTRP is set, store an initial right shift width in ATTRP[0],
1251  and the instruction length in ATTRP[1] .  These values are not valid
1252  when returning 0.
1253  When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
1254  shift_amounts for the last shift value that is to be used before the
1255  sign extend.  */
1256int
1257shl_and_kind (left_rtx, mask_rtx, attrp)
1258     rtx left_rtx, mask_rtx;
1259     int *attrp;
1260{
1261  unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
1262  int left = INTVAL (left_rtx), right;
1263  int best = 0;
1264  int cost, best_cost = 10000;
1265  int best_right = 0, best_len = 0;
1266  int i;
1267  int can_ext;
1268
1269  if (left < 0 || left > 31)
1270    return 0;
1271  if (GET_CODE (mask_rtx) == CONST_INT)
1272    mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
1273  else
1274    mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
1275  /* Can this be expressed as a right shift / left shift pair ? */
1276  lsb = ((mask ^ (mask - 1)) >> 1) + 1;
1277  right = exact_log2 (lsb);
1278  mask2 = ~(mask + lsb - 1);
1279  lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
1280  /* mask has no zeroes but trailing zeroes <==> ! mask2 */
1281  if (! mask2)
1282    best_cost = shift_insns[right] + shift_insns[right + left];
1283  /* mask has no trailing zeroes <==> ! right */
1284  else if (! right && mask2 == ~(lsb2 - 1))
1285    {
1286      int late_right = exact_log2 (lsb2);
1287      best_cost = shift_insns[left + late_right] + shift_insns[late_right];
1288    }
1289  /* Try to use zero extend */
1290  if (mask2 == ~(lsb2 - 1))
1291    {
1292      int width, first;
1293
1294      for (width = 8; width <= 16; width += 8)
1295	{
1296	  /* Can we zero-extend right away? */
1297	  if (lsb2 == (HOST_WIDE_INT)1 << width)
1298	    {
1299	      cost
1300		= 1 + ext_shift_insns[right] + ext_shift_insns[left + right];
1301	      if (cost < best_cost)
1302		{
1303		  best = 1;
1304		  best_cost = cost;
1305		  best_right = right;
1306		  best_len = cost;
1307		  if (attrp)
1308		    attrp[2] = -1;
1309		}
1310	      continue;
1311	    }
1312	  /* ??? Could try to put zero extend into initial right shift,
1313	     or even shift a bit left before the right shift. */
1314	  /* Determine value of first part of left shift, to get to the
1315	     zero extend cut-off point.  */
1316	  first = width - exact_log2 (lsb2) + right;
1317	  if (first >= 0 && right + left - first >= 0)
1318	    {
1319	      cost = ext_shift_insns[right] + ext_shift_insns[first] + 1
1320		+ ext_shift_insns[right + left - first];
1321	      if (cost < best_cost)
1322		{
1323		  best = 1;
1324		  best_cost = cost;
1325		  best_right = right;
1326		  best_len = cost;
1327		  if (attrp)
1328		    attrp[2] = first;
1329		  }
1330	    }
1331	}
1332    }
1333  /* Try to use r0 AND pattern */
1334  for (i = 0; i <= 2; i++)
1335    {
1336      if (i > right)
1337	break;
1338      if (! CONST_OK_FOR_L (mask >> i))
1339	continue;
1340      cost = (i != 0) + 2 + ext_shift_insns[left + i];
1341      if (cost < best_cost)
1342	{
1343	  best = 2;
1344	  best_cost = cost;
1345	  best_right = i;
1346	  best_len = cost - 1;
1347	}
1348    }
1349  /* Try to use a scratch register to hold the AND operand.  */
1350  can_ext = ((mask << left) & 0xe0000000) == 0;
1351  for (i = 0; i <= 2; i++)
1352    {
1353      if (i > right)
1354	break;
1355      cost = (i != 0) + (CONST_OK_FOR_I (mask >> i) ? 2 : 3)
1356	+ (can_ext ? ext_shift_insns : shift_insns)[left + i];
1357      if (cost < best_cost)
1358	{
1359	  best = 4 - can_ext;
1360	  best_cost = cost;
1361	  best_right = i;
1362	  best_len = cost - 1 - ! CONST_OK_FOR_I (mask >> i);
1363	}
1364    }
1365
1366  if (attrp)
1367    {
1368      attrp[0] = best_right;
1369      attrp[1] = best_len;
1370    }
1371  return best;
1372}
1373
1374/* This is used in length attributes of the unnamed instructions
1375   corresponding to shl_and_kind return values of 1 and 2.  */
1376int
1377shl_and_length (insn)
1378     rtx insn;
1379{
1380  rtx set_src, left_rtx, mask_rtx;
1381  int attributes[3];
1382
1383  set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
1384  left_rtx = XEXP (XEXP (set_src, 0), 1);
1385  mask_rtx = XEXP (set_src, 1);
1386  shl_and_kind (left_rtx, mask_rtx, attributes);
1387  return attributes[1];
1388}
1389
1390/* This is used in length attribute of the and_shl_scratch instruction.  */
1391
1392int
1393shl_and_scr_length (insn)
1394     rtx insn;
1395{
1396  rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
1397  int len = shift_insns[INTVAL (XEXP (set_src, 1))];
1398  rtx op = XEXP (set_src, 0);
1399  len += shift_insns[INTVAL (XEXP (op, 1))] + 1;
1400  op = XEXP (XEXP (op, 0), 0);
1401  return len + shift_insns[INTVAL (XEXP (op, 1))];
1402}
1403
1404/* Generating rtl? */
1405extern int rtx_equal_function_value_matters;
1406
1407/* Generate rtl for instructions for which shl_and_kind advised a particular
1408   method of generating them, i.e. returned zero.  */
1409
1410int
1411gen_shl_and (dest, left_rtx, mask_rtx, source)
1412     rtx dest, left_rtx, mask_rtx, source;
1413{
1414  int attributes[3];
1415  unsigned HOST_WIDE_INT mask;
1416  int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
1417  int right, total_shift;
1418  int (*shift_gen_fun) PROTO((int, rtx*)) = gen_shifty_hi_op;
1419
1420  right = attributes[0];
1421  total_shift = INTVAL (left_rtx) + right;
1422  mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
1423  switch (kind)
1424    {
1425    default:
1426      return -1;
1427    case 1:
1428      {
1429	int first = attributes[2];
1430	rtx operands[3];
1431
1432	if (first < 0)
1433	  {
1434	    emit_insn ((mask << right) <= 0xff
1435		       ? gen_zero_extendqisi2(dest,
1436					      gen_lowpart (QImode, source))
1437		       : gen_zero_extendhisi2(dest,
1438					      gen_lowpart (HImode, source)));
1439	    source = dest;
1440	  }
1441	if (source != dest)
1442	  emit_insn (gen_movsi (dest, source));
1443	operands[0] = dest;
1444	if (right)
1445	  {
1446	    operands[2] = GEN_INT (right);
1447	    gen_shifty_hi_op (LSHIFTRT, operands);
1448	  }
1449	if (first > 0)
1450	  {
1451	    operands[2] = GEN_INT (first);
1452	    gen_shifty_hi_op (ASHIFT, operands);
1453	    total_shift -= first;
1454	    mask <<= first;
1455	  }
1456	if (first >= 0)
1457	  emit_insn (mask <= 0xff
1458		     ? gen_zero_extendqisi2(dest, gen_lowpart (QImode, dest))
1459		     : gen_zero_extendhisi2(dest, gen_lowpart (HImode, dest)));
1460	if (total_shift > 0)
1461	  {
1462	    operands[2] = GEN_INT (total_shift);
1463	    gen_shifty_hi_op (ASHIFT, operands);
1464	  }
1465	break;
1466      }
1467    case 4:
1468      shift_gen_fun = gen_shifty_op;
1469    case 3:
1470      /* If the topmost bit that matters is set, set the topmost bits
1471	 that don't matter.  This way, we might be able to get a shorter
1472	 signed constant.  */
1473      if (mask & ((HOST_WIDE_INT)1 << 31 - total_shift))
1474	mask |= (HOST_WIDE_INT)~0 << (31 - total_shift);
1475    case 2:
1476      /* Don't expand fine-grained when combining, because that will
1477         make the pattern fail.  */
1478      if (rtx_equal_function_value_matters
1479	  || reload_in_progress || reload_completed)
1480	{
1481	  rtx operands[3];
1482
1483	  /* Cases 3 and 4 should be handled by this split
1484	     only while combining  */
1485	  if (kind > 2)
1486	    abort ();
1487	  if (right)
1488	    {
1489	      emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
1490	      source = dest;
1491	    }
1492	  emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
1493	  if (total_shift)
1494	    {
1495	      operands[0] = dest;
1496	      operands[1] = dest;
1497	      operands[2] = GEN_INT (total_shift);
1498	      shift_gen_fun (ASHIFT, operands);
1499	    }
1500	  break;
1501	}
1502      else
1503	{
1504	  int neg = 0;
1505	  if (kind != 4 && total_shift < 16)
1506	    {
1507	      neg = -ext_shift_amounts[total_shift][1];
1508	      if (neg > 0)
1509		neg -= ext_shift_amounts[total_shift][2];
1510	      else
1511		neg = 0;
1512	    }
1513	  emit_insn (gen_and_shl_scratch (dest, source,
1514					  GEN_INT (right),
1515					  GEN_INT (mask),
1516					  GEN_INT (total_shift + neg),
1517					  GEN_INT (neg)));
1518	  emit_insn (gen_movsi (dest, dest));
1519	  break;
1520	}
1521    }
1522  return 0;
1523}
1524
1525/* Try to find a good way to implement the combiner pattern
1526  [(set (match_operand:SI 0 "register_operand" "=r")
1527        (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
1528                                    (match_operand:SI 2 "const_int_operand" "n")
1529                         (match_operand:SI 3 "const_int_operand" "n")
1530                         (const_int 0)))
1531   (clobber (reg:SI 18))]
1532  LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
1533  return 0 for simple left / right shift combination.
1534  return 1 for left shift / 8 bit sign extend / left shift.
1535  return 2 for left shift / 16 bit sign extend / left shift.
1536  return 3 for left shift / 8 bit sign extend / shift / sign extend.
1537  return 4 for left shift / 16 bit sign extend / shift / sign extend.
1538  return 5 for left shift / 16 bit sign extend / right shift
1539  return 6 for < 8 bit sign extend / left shift.
1540  return 7 for < 8 bit sign extend / left shift / single right shift.
1541  If COSTP is nonzero, assign the calculated cost to *COSTP.  */
1542
1543int
1544shl_sext_kind (left_rtx, size_rtx, costp)
1545     rtx left_rtx, size_rtx;
1546     int *costp;
1547{
1548  int left, size, insize, ext;
1549  int cost, best_cost;
1550  int kind;
1551
1552  left = INTVAL (left_rtx);
1553  size = INTVAL (size_rtx);
1554  insize = size - left;
1555  if (insize <= 0)
1556    abort ();
1557  /* Default to left / right shift.  */
1558  kind = 0;
1559  best_cost = shift_insns[32 - insize] + ashiftrt_insns[32 - size];
1560  if (size <= 16)
1561    {
1562      /* 16 bit shift / sign extend / 16 bit shift */
1563      cost = shift_insns[16 - insize] + 1 + ashiftrt_insns[16 - size];
1564      /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
1565	 below, by alternative 3 or something even better.  */
1566      if (cost < best_cost)
1567	{
1568	  kind = 5;
1569	  best_cost = cost;
1570	}
1571    }
1572  /* Try a plain sign extend between two shifts.  */
1573  for (ext = 16; ext >= insize; ext -= 8)
1574    {
1575      if (ext <= size)
1576	{
1577	  cost = ext_shift_insns[ext - insize] + 1 + shift_insns[size - ext];
1578	  if (cost < best_cost)
1579	    {
1580	      kind = ext / 8U;
1581	      best_cost = cost;
1582	    }
1583	}
1584      /* Check if we can do a sloppy shift with a final signed shift
1585	 restoring the sign.  */
1586      if (EXT_SHIFT_SIGNED (size - ext))
1587	cost = ext_shift_insns[ext - insize] + ext_shift_insns[size - ext] + 1;
1588      /* If not, maybe it's still cheaper to do the second shift sloppy,
1589	 and do a final sign extend?  */
1590      else if (size <= 16)
1591	cost = ext_shift_insns[ext - insize] + 1
1592	  + ext_shift_insns[size > ext ? size - ext : ext - size] + 1;
1593      else
1594	continue;
1595      if (cost < best_cost)
1596	{
1597	  kind = ext / 8U + 2;
1598	  best_cost = cost;
1599	}
1600    }
1601  /* Check if we can sign extend in r0 */
1602  if (insize < 8)
1603    {
1604      cost = 3 + shift_insns[left];
1605      if (cost < best_cost)
1606	{
1607	  kind = 6;
1608	  best_cost = cost;
1609	}
1610      /* Try the same with a final signed shift.  */
1611      if (left < 31)
1612	{
1613	  cost = 3 + ext_shift_insns[left + 1] + 1;
1614	  if (cost < best_cost)
1615	    {
1616	      kind = 7;
1617	      best_cost = cost;
1618	    }
1619	}
1620    }
1621  if (TARGET_SH3)
1622    {
1623      /* Try to use a dynamic shift.  */
1624      cost = shift_insns[32 - insize] + 1 + SH_DYNAMIC_SHIFT_COST;
1625      if (cost < best_cost)
1626	{
1627	  kind = 0;
1628	  best_cost = cost;
1629	}
1630    }
1631  if (costp)
1632    *costp = cost;
1633  return kind;
1634}
1635
1636/* Function to be used in the length attribute of the instructions
1637   implementing this pattern.  */
1638
1639int
1640shl_sext_length (insn)
1641     rtx insn;
1642{
1643  rtx set_src, left_rtx, size_rtx;
1644  int cost;
1645
1646  set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
1647  left_rtx = XEXP (XEXP (set_src, 0), 1);
1648  size_rtx = XEXP (set_src, 1);
1649  shl_sext_kind (left_rtx, size_rtx, &cost);
1650  return cost;
1651}
1652
1653/* Generate rtl for this pattern */
1654
1655int
1656gen_shl_sext (dest, left_rtx, size_rtx, source)
1657     rtx dest, left_rtx, size_rtx, source;
1658{
1659  int kind;
1660  int left, size, insize, cost;
1661  rtx operands[3];
1662
1663  kind = shl_sext_kind (left_rtx, size_rtx, &cost);
1664  left = INTVAL (left_rtx);
1665  size = INTVAL (size_rtx);
1666  insize = size - left;
1667  switch (kind)
1668    {
1669    case 1:
1670    case 2:
1671    case 3:
1672    case 4:
1673      {
1674	int ext = kind & 1 ? 8 : 16;
1675	int shift2 = size - ext;
1676
1677	/* Don't expand fine-grained when combining, because that will
1678	   make the pattern fail.  */
1679	if (! rtx_equal_function_value_matters
1680	    && ! reload_in_progress && ! reload_completed)
1681	  {
1682	    emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
1683	    emit_insn (gen_movsi (dest, source));
1684	    break;
1685	  }
1686	if (dest != source)
1687	  emit_insn (gen_movsi (dest, source));
1688	operands[0] = dest;
1689	if (ext - insize)
1690	  {
1691	    operands[2] = GEN_INT (ext - insize);
1692	    gen_shifty_hi_op (ASHIFT, operands);
1693	  }
1694	emit_insn (kind & 1
1695		   ? gen_extendqisi2(dest, gen_lowpart (QImode, dest))
1696		   : gen_extendhisi2(dest, gen_lowpart (HImode, dest)));
1697	if (kind <= 2)
1698	  {
1699	    if (shift2)
1700	      {
1701		operands[2] = GEN_INT (shift2);
1702		gen_shifty_op (ASHIFT, operands);
1703	      }
1704	  }
1705	else
1706	  {
1707	    if (shift2 > 0)
1708	      {
1709		if (EXT_SHIFT_SIGNED (shift2))
1710		  {
1711		    operands[2] = GEN_INT (shift2 + 1);
1712		    gen_shifty_op (ASHIFT, operands);
1713		    operands[2] = GEN_INT (1);
1714		    gen_shifty_op (ASHIFTRT, operands);
1715		    break;
1716		  }
1717		operands[2] = GEN_INT (shift2);
1718		gen_shifty_hi_op (ASHIFT, operands);
1719	      }
1720	    else if (shift2)
1721	      {
1722		operands[2] = GEN_INT (-shift2);
1723		gen_shifty_hi_op (LSHIFTRT, operands);
1724	      }
1725	    emit_insn (size <= 8
1726		       ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
1727		       : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
1728	  }
1729	break;
1730      }
1731    case 5:
1732      {
1733	int i = 16 - size;
1734	if (! rtx_equal_function_value_matters
1735	    && ! reload_in_progress && ! reload_completed)
1736	  emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
1737	else
1738	  {
1739	    operands[0] = dest;
1740	    operands[2] = GEN_INT (16 - insize);
1741	    gen_shifty_hi_op (ASHIFT, operands);
1742	    emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
1743	  }
1744	/* Don't use gen_ashrsi3 because it generates new pseudos.  */
1745	while (--i >= 0)
1746	  gen_ashift (ASHIFTRT, 1, dest);
1747	break;
1748      }
1749    case 6:
1750    case 7:
1751      /* Don't expand fine-grained when combining, because that will
1752	 make the pattern fail.  */
1753      if (! rtx_equal_function_value_matters
1754	  && ! reload_in_progress && ! reload_completed)
1755	{
1756	  emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
1757	  emit_insn (gen_movsi (dest, source));
1758	  break;
1759	}
1760      emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
1761      emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
1762      emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
1763      operands[0] = dest;
1764      operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
1765      gen_shifty_op (ASHIFT, operands);
1766      if (kind == 7)
1767	emit_insn (gen_ashrsi3_k (dest, dest, GEN_INT (1)));
1768      break;
1769    default:
1770      return -1;
1771    }
1772  return 0;
1773}
1774
1775/* The SH cannot load a large constant into a register, constants have to
1776   come from a pc relative load.  The reference of a pc relative load
1777   instruction must be less than 1k infront of the instruction.  This
1778   means that we often have to dump a constant inside a function, and
1779   generate code to branch around it.
1780
1781   It is important to minimize this, since the branches will slow things
1782   down and make things bigger.
1783
1784   Worst case code looks like:
1785
1786   mov.l L1,rn
1787   bra   L2
1788   nop
1789   align
1790   L1:   .long value
1791   L2:
1792   ..
1793
1794   mov.l L3,rn
1795   bra   L4
1796   nop
1797   align
1798   L3:   .long value
1799   L4:
1800   ..
1801
1802   We fix this by performing a scan before scheduling, which notices which
1803   instructions need to have their operands fetched from the constant table
1804   and builds the table.
1805
1806   The algorithm is:
1807
1808   scan, find an instruction which needs a pcrel move.  Look forward, find the
1809   last barrier which is within MAX_COUNT bytes of the requirement.
1810   If there isn't one, make one.  Process all the instructions between
1811   the find and the barrier.
1812
1813   In the above example, we can tell that L3 is within 1k of L1, so
1814   the first move can be shrunk from the 3 insn+constant sequence into
1815   just 1 insn, and the constant moved to L3 to make:
1816
1817   mov.l        L1,rn
1818   ..
1819   mov.l        L3,rn
1820   bra          L4
1821   nop
1822   align
1823   L3:.long value
1824   L4:.long value
1825
1826   Then the second move becomes the target for the shortening process.  */
1827
1828typedef struct
1829{
1830  rtx value;			/* Value in table.  */
1831  rtx label;			/* Label of value.  */
1832  enum machine_mode mode;	/* Mode of value.  */
1833} pool_node;
1834
1835/* The maximum number of constants that can fit into one pool, since
1836   the pc relative range is 0...1020 bytes and constants are at least 4
1837   bytes long.  */
1838
1839#define MAX_POOL_SIZE (1020/4)
1840static pool_node pool_vector[MAX_POOL_SIZE];
1841static int pool_size;
1842
1843/* ??? If we need a constant in HImode which is the truncated value of a
1844   constant we need in SImode, we could combine the two entries thus saving
1845   two bytes.  Is this common enough to be worth the effort of implementing
1846   it?  */
1847
1848/* ??? This stuff should be done at the same time that we shorten branches.
1849   As it is now, we must assume that all branches are the maximum size, and
1850   this causes us to almost always output constant pools sooner than
1851   necessary.  */
1852
1853/* Add a constant to the pool and return its label.  */
1854
1855static rtx
1856add_constant (x, mode, last_value)
1857     rtx last_value;
1858     rtx x;
1859     enum machine_mode mode;
1860{
1861  int i;
1862  rtx lab;
1863
1864  /* First see if we've already got it.  */
1865  for (i = 0; i < pool_size; i++)
1866    {
1867      if (x->code == pool_vector[i].value->code
1868	  && mode == pool_vector[i].mode)
1869	{
1870	  if (x->code == CODE_LABEL)
1871	    {
1872	      if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
1873		continue;
1874	    }
1875	  if (rtx_equal_p (x, pool_vector[i].value))
1876	    {
1877	      lab = 0;
1878	      if (! last_value
1879		  || ! i
1880		  || ! rtx_equal_p (last_value, pool_vector[i-1].value))
1881		{
1882		  lab = pool_vector[i].label;
1883		  if (! lab)
1884		    pool_vector[i].label = lab = gen_label_rtx ();
1885		}
1886	      return lab;
1887	    }
1888	}
1889    }
1890
1891  /* Need a new one.  */
1892  pool_vector[pool_size].value = x;
1893  if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
1894    lab = 0;
1895  else
1896    lab = gen_label_rtx ();
1897  pool_vector[pool_size].mode = mode;
1898  pool_vector[pool_size].label = lab;
1899  pool_size++;
1900  return lab;
1901}
1902
1903/* Output the literal table.  */
1904
1905static void
1906dump_table (scan)
1907     rtx scan;
1908{
1909  int i;
1910  int need_align = 1;
1911
1912  /* Do two passes, first time dump out the HI sized constants.  */
1913
1914  for (i = 0; i < pool_size; i++)
1915    {
1916      pool_node *p = &pool_vector[i];
1917
1918      if (p->mode == HImode)
1919	{
1920	  if (need_align)
1921	    {
1922	      scan = emit_insn_after (gen_align_2 (), scan);
1923	      need_align = 0;
1924	    }
1925	  scan = emit_label_after (p->label, scan);
1926	  scan = emit_insn_after (gen_consttable_2 (p->value), scan);
1927	}
1928    }
1929
1930  need_align = 1;
1931
1932  for (i = 0; i < pool_size; i++)
1933    {
1934      pool_node *p = &pool_vector[i];
1935
1936      switch (p->mode)
1937	{
1938	case HImode:
1939	  break;
1940	case SImode:
1941	case SFmode:
1942	  if (need_align)
1943	    {
1944	      need_align = 0;
1945	      scan = emit_label_after (gen_label_rtx (), scan);
1946	      scan = emit_insn_after (gen_align_4 (), scan);
1947	    }
1948	  if (p->label)
1949	    scan = emit_label_after (p->label, scan);
1950	  scan = emit_insn_after (gen_consttable_4 (p->value), scan);
1951	  break;
1952	case DFmode:
1953	case DImode:
1954	  if (need_align)
1955	    {
1956	      need_align = 0;
1957	      scan = emit_label_after (gen_label_rtx (), scan);
1958	      scan = emit_insn_after (gen_align_4 (), scan);
1959	    }
1960	  if (p->label)
1961	    scan = emit_label_after (p->label, scan);
1962	  scan = emit_insn_after (gen_consttable_8 (p->value), scan);
1963	  break;
1964	default:
1965	  abort ();
1966	  break;
1967	}
1968    }
1969
1970  scan = emit_insn_after (gen_consttable_end (), scan);
1971  scan = emit_barrier_after (scan);
1972  pool_size = 0;
1973}
1974
1975/* Return non-zero if constant would be an ok source for a
1976   mov.w instead of a mov.l.  */
1977
1978static int
1979hi_const (src)
1980     rtx src;
1981{
1982  return (GET_CODE (src) == CONST_INT
1983	  && INTVAL (src) >= -32768
1984	  && INTVAL (src) <= 32767);
1985}
1986
1987/* Non-zero if the insn is a move instruction which needs to be fixed.  */
1988
1989/* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
1990   CONST_DOUBLE input value is CONST_OK_FOR_I.  For a SFmode move, we don't
1991   need to fix it if the input value is CONST_OK_FOR_I.  */
1992
1993static int
1994broken_move (insn)
1995     rtx insn;
1996{
1997  if (GET_CODE (insn) == INSN)
1998    {
1999      rtx pat = PATTERN (insn);
2000      if (GET_CODE (pat) == PARALLEL)
2001	pat = XVECEXP (pat, 0, 0);
2002      if (GET_CODE (pat) == SET
2003	  /* We can load any 8 bit value if we don't care what the high
2004	     order bits end up as.  */
2005	  && GET_MODE (SET_DEST (pat)) != QImode
2006	  && CONSTANT_P (SET_SRC (pat))
2007	  && ! (TARGET_SH3E
2008		&& GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
2009		&& (fp_zero_operand (SET_SRC (pat))
2010		    || fp_one_operand (SET_SRC (pat)))
2011		&& GET_CODE (SET_DEST (pat)) == REG
2012		&& REGNO (SET_DEST (pat)) >= FIRST_FP_REG
2013		&& REGNO (SET_DEST (pat)) <= LAST_FP_REG)
2014	  && (GET_CODE (SET_SRC (pat)) != CONST_INT
2015	      || ! CONST_OK_FOR_I (INTVAL (SET_SRC (pat)))))
2016	return 1;
2017    }
2018
2019  return 0;
2020}
2021
2022static int
2023mova_p (insn)
2024     rtx insn;
2025{
2026  return (GET_CODE (insn) == INSN
2027	  && GET_CODE (PATTERN (insn)) == SET
2028	  && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
2029	  && XINT (SET_SRC (PATTERN (insn)), 1) == 1);
2030}
2031
2032/* Find the last barrier from insn FROM which is close enough to hold the
2033   constant pool.  If we can't find one, then create one near the end of
2034   the range.  */
2035
2036static rtx
2037find_barrier (num_mova, mova, from)
2038     int num_mova;
2039     rtx mova, from;
2040{
2041  int count_si = 0;
2042  int count_hi = 0;
2043  int found_hi = 0;
2044  int found_si = 0;
2045  int hi_align = 2;
2046  int si_align = 2;
2047  int leading_mova = num_mova;
2048  rtx barrier_before_mova, found_barrier = 0, good_barrier = 0;
2049  int si_limit;
2050  int hi_limit;
2051
2052  /* For HImode: range is 510, add 4 because pc counts from address of
2053     second instruction after this one, subtract 2 for the jump instruction
2054     that we may need to emit before the table, subtract 2 for the instruction
2055     that fills the jump delay slot (in very rare cases, reorg will take an
2056     instruction from after the constant pool or will leave the delay slot
2057     empty).  This gives 510.
2058     For SImode: range is 1020, add 4 because pc counts from address of
2059     second instruction after this one, subtract 2 in case pc is 2 byte
2060     aligned, subtract 2 for the jump instruction that we may need to emit
2061     before the table, subtract 2 for the instruction that fills the jump
2062     delay slot.  This gives 1018.  */
2063
2064  /* The branch will always be shortened now that the reference address for
2065     forward branches is the successor address, thus we need no longer make
2066     adjustments to the [sh]i_limit for -O0.  */
2067
2068  si_limit = 1018;
2069  hi_limit = 510;
2070
2071  while (from && count_si < si_limit && count_hi < hi_limit)
2072    {
2073      int inc = get_attr_length (from);
2074      int new_align = 1;
2075
2076      if (GET_CODE (from) == CODE_LABEL)
2077	{
2078	  if (optimize)
2079	    new_align = 1 << label_to_alignment (from);
2080	  else if (GET_CODE (prev_nonnote_insn (from)) == BARRIER)
2081	    new_align = 1 << barrier_align (from);
2082	  else
2083	    new_align = 1;
2084	  inc = 0;
2085	}
2086
2087      if (GET_CODE (from) == BARRIER)
2088	{
2089
2090	  found_barrier = from;
2091
2092	  /* If we are at the end of the function, or in front of an alignment
2093	     instruction, we need not insert an extra alignment.  We prefer
2094	     this kind of barrier.  */
2095	  if (barrier_align (from) > 2)
2096	    good_barrier = from;
2097	}
2098
2099      if (broken_move (from))
2100	{
2101	  rtx pat, src, dst;
2102	  enum machine_mode mode;
2103
2104	  pat = PATTERN (from);
2105	  if (GET_CODE (pat) == PARALLEL)
2106	    pat = XVECEXP (pat, 0, 0);
2107	  src = SET_SRC (pat);
2108	  dst = SET_DEST (pat);
2109	  mode = GET_MODE (dst);
2110
2111	  /* We must explicitly check the mode, because sometimes the
2112	     front end will generate code to load unsigned constants into
2113	     HImode targets without properly sign extending them.  */
2114	  if (mode == HImode
2115	      || (mode == SImode && hi_const (src) && REGNO (dst) != FPUL_REG))
2116	    {
2117	      found_hi += 2;
2118	      /* We put the short constants before the long constants, so
2119		 we must count the length of short constants in the range
2120		 for the long constants.  */
2121	      /* ??? This isn't optimal, but is easy to do.  */
2122	      si_limit -= 2;
2123	    }
2124	  else
2125	    {
2126	      while (si_align > 2 && found_si + si_align - 2 > count_si)
2127		si_align >>= 1;
2128	      if (found_si > count_si)
2129		count_si = found_si;
2130	      found_si += GET_MODE_SIZE (mode);
2131	      if (num_mova)
2132		si_limit -= GET_MODE_SIZE (mode);
2133	    }
2134	}
2135
2136      if (mova_p (from))
2137	{
2138	  if (! num_mova++)
2139	    {
2140	      leading_mova = 0;
2141	      mova = from;
2142	      barrier_before_mova = good_barrier ? good_barrier : found_barrier;
2143	    }
2144	  if (found_si > count_si)
2145	    count_si = found_si;
2146	}
2147      else if (GET_CODE (from) == JUMP_INSN
2148	       && (GET_CODE (PATTERN (from)) == ADDR_VEC
2149		   || GET_CODE (PATTERN (from)) == ADDR_DIFF_VEC))
2150	{
2151	  if (num_mova)
2152	    num_mova--;
2153	  if (barrier_align (next_real_insn (from)) == CACHE_LOG)
2154	    {
2155	      /* We have just passed the barrier in front of the
2156		 ADDR_DIFF_VEC, which is stored in found_barrier.  Since
2157		 the ADDR_DIFF_VEC is accessed as data, just like our pool
2158		 constants, this is a good opportunity to accommodate what
2159		 we have gathered so far.
2160		 If we waited any longer, we could end up at a barrier in
2161		 front of code, which gives worse cache usage for separated
2162		 instruction / data caches.  */
2163	      good_barrier = found_barrier;
2164	      break;
2165	    }
2166	  else
2167	    {
2168	      rtx body = PATTERN (from);
2169	      inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
2170	    }
2171	}
2172
2173      if (found_si)
2174	{
2175	  if (new_align > si_align)
2176	    {
2177	      si_limit -= count_si - 1 & new_align - si_align;
2178	      si_align = new_align;
2179	    }
2180	  count_si = count_si + new_align - 1 & -new_align;
2181	  count_si += inc;
2182	}
2183      if (found_hi)
2184	{
2185	  if (new_align > hi_align)
2186	    {
2187	      hi_limit -= count_hi - 1 & new_align - hi_align;
2188	      hi_align = new_align;
2189	    }
2190	  count_hi = count_hi + new_align - 1 & -new_align;
2191	  count_hi += inc;
2192	}
2193      from = NEXT_INSN (from);
2194    }
2195
2196  if (num_mova)
2197    if (leading_mova)
2198      {
2199	/* Try as we might, the leading mova is out of range.  Change
2200	   it into a load (which will become a pcload) and retry.  */
2201	SET_SRC (PATTERN (mova)) = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
2202	INSN_CODE (mova) = -1;
2203        return find_barrier (0, 0, mova);
2204      }
2205    else
2206      {
2207	/* Insert the constant pool table before the mova instruction,
2208	   to prevent the mova label reference from going out of range.  */
2209	from = mova;
2210	good_barrier = found_barrier = barrier_before_mova;
2211      }
2212
2213  if (found_barrier)
2214    {
2215      if (good_barrier && next_real_insn (found_barrier))
2216	found_barrier = good_barrier;
2217    }
2218  else
2219    {
2220      /* We didn't find a barrier in time to dump our stuff,
2221	 so we'll make one.  */
2222      rtx label = gen_label_rtx ();
2223
2224      /* If we exceeded the range, then we must back up over the last
2225	 instruction we looked at.  Otherwise, we just need to undo the
2226	 NEXT_INSN at the end of the loop.  */
2227      if (count_hi > hi_limit || count_si > si_limit)
2228	from = PREV_INSN (PREV_INSN (from));
2229      else
2230	from = PREV_INSN (from);
2231
2232      /* Walk back to be just before any jump or label.
2233	 Putting it before a label reduces the number of times the branch
2234	 around the constant pool table will be hit.  Putting it before
2235	 a jump makes it more likely that the bra delay slot will be
2236	 filled.  */
2237      while (GET_CODE (from) == JUMP_INSN || GET_CODE (from) == NOTE
2238	     || GET_CODE (from) == CODE_LABEL)
2239	from = PREV_INSN (from);
2240
2241      from = emit_jump_insn_after (gen_jump (label), from);
2242      JUMP_LABEL (from) = label;
2243      LABEL_NUSES (label) = 1;
2244      found_barrier = emit_barrier_after (from);
2245      emit_label_after (label, found_barrier);
2246    }
2247
2248  return found_barrier;
2249}
2250
2251/* If the instruction INSN is implemented by a special function, and we can
2252   positively find the register that is used to call the sfunc, and this
2253   register is not used anywhere else in this instruction - except as the
2254   destination of a set, return this register; else, return 0.  */
2255rtx
2256sfunc_uses_reg (insn)
2257     rtx insn;
2258{
2259  int i;
2260  rtx pattern, part, reg_part, reg;
2261
2262  if (GET_CODE (insn) != INSN)
2263    return 0;
2264  pattern = PATTERN (insn);
2265  if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
2266    return 0;
2267
2268  for (reg_part = 0, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
2269    {
2270      part = XVECEXP (pattern, 0, i);
2271      if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
2272	reg_part = part;
2273    }
2274  if (! reg_part)
2275    return 0;
2276  reg = XEXP (reg_part, 0);
2277  for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
2278    {
2279      part = XVECEXP (pattern, 0, i);
2280      if (part == reg_part || GET_CODE (part) == CLOBBER)
2281	continue;
2282      if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
2283				  && GET_CODE (SET_DEST (part)) == REG)
2284				 ? SET_SRC (part) : part)))
2285	return 0;
2286    }
2287  return reg;
2288}
2289
2290/* See if the only way in which INSN uses REG is by calling it, or by
2291   setting it while calling it.  Set *SET to a SET rtx if the register
2292   is set by INSN.  */
2293
2294static int
2295noncall_uses_reg (reg, insn, set)
2296     rtx reg;
2297     rtx insn;
2298     rtx *set;
2299{
2300  rtx pattern, reg2;
2301
2302  *set = NULL_RTX;
2303
2304  reg2 = sfunc_uses_reg (insn);
2305  if (reg2 && REGNO (reg2) == REGNO (reg))
2306    {
2307      pattern = single_set (insn);
2308      if (pattern
2309	  && GET_CODE (SET_DEST (pattern)) == REG
2310	  && REGNO (reg) == REGNO (SET_DEST (pattern)))
2311	*set = pattern;
2312      return 0;
2313    }
2314  if (GET_CODE (insn) != CALL_INSN)
2315    {
2316      /* We don't use rtx_equal_p because we don't care if the mode is
2317	 different.  */
2318      pattern = single_set (insn);
2319      if (pattern
2320	  && GET_CODE (SET_DEST (pattern)) == REG
2321	  && REGNO (reg) == REGNO (SET_DEST (pattern)))
2322	{
2323	  rtx par, part;
2324	  int i;
2325
2326	  *set = pattern;
2327	  par = PATTERN (insn);
2328	  if (GET_CODE (par) == PARALLEL)
2329	    for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
2330	      {
2331		part = XVECEXP (par, 0, i);
2332		if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
2333		  return 1;
2334	      }
2335	  return reg_mentioned_p (reg, SET_SRC (pattern));
2336	}
2337
2338      return 1;
2339    }
2340
2341  pattern = PATTERN (insn);
2342
2343  if (GET_CODE (pattern) == PARALLEL)
2344    {
2345      int i;
2346
2347      for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
2348	if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
2349	  return 1;
2350      pattern = XVECEXP (pattern, 0, 0);
2351    }
2352
2353  if (GET_CODE (pattern) == SET)
2354    {
2355      if (reg_mentioned_p (reg, SET_DEST (pattern)))
2356	{
2357	  /* We don't use rtx_equal_p, because we don't care if the
2358             mode is different.  */
2359	  if (GET_CODE (SET_DEST (pattern)) != REG
2360	      || REGNO (reg) != REGNO (SET_DEST (pattern)))
2361	    return 1;
2362
2363	  *set = pattern;
2364	}
2365
2366      pattern = SET_SRC (pattern);
2367    }
2368
2369  if (GET_CODE (pattern) != CALL
2370      || GET_CODE (XEXP (pattern, 0)) != MEM
2371      || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
2372    return 1;
2373
2374  return 0;
2375}
2376
2377/* Given a X, a pattern of an insn or a part of it, return a mask of used
2378   general registers.  Bits 0..15 mean that the respective registers
2379   are used as inputs in the instruction.  Bits 16..31 mean that the
2380   registers 0..15, respectively, are used as outputs, or are clobbered.
2381   IS_DEST should be set to 16 if X is the destination of a SET, else to 0.  */
2382int
2383regs_used (x, is_dest)
2384     rtx x; int is_dest;
2385{
2386  enum rtx_code code;
2387  char *fmt;
2388  int i, used = 0;
2389
2390  if (! x)
2391    return used;
2392  code = GET_CODE (x);
2393  switch (code)
2394    {
2395    case REG:
2396      if (REGNO (x) < 16)
2397	return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
2398		<< (REGNO (x) + is_dest));
2399      return 0;
2400    case SUBREG:
2401      {
2402	rtx y = SUBREG_REG (x);
2403
2404	if (GET_CODE (y) != REG)
2405	  break;
2406	if (REGNO (y) < 16)
2407	  return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
2408		  << (REGNO (y) + SUBREG_WORD (x) + is_dest));
2409	return 0;
2410      }
2411    case SET:
2412      return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
2413    case RETURN:
2414      /* If there was a return value, it must have been indicated with USE.  */
2415      return 0x00ffff00;
2416    case CLOBBER:
2417      is_dest = 1;
2418      break;
2419    case MEM:
2420      is_dest = 0;
2421      break;
2422    case CALL:
2423      used |= 0x00ff00f0;
2424      break;
2425    }
2426
2427  fmt = GET_RTX_FORMAT (code);
2428
2429  for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
2430    {
2431      if (fmt[i] == 'E')
2432	{
2433	  register int j;
2434	  for (j = XVECLEN (x, i) - 1; j >= 0; j--)
2435	    used |= regs_used (XVECEXP (x, i, j), is_dest);
2436	}
2437      else if (fmt[i] == 'e')
2438	used |= regs_used (XEXP (x, i), is_dest);
2439    }
2440  return used;
2441}
2442
2443/* Create an instruction that prevents redirection of a conditional branch
2444   to the destination of the JUMP with address ADDR.
2445   If the branch needs to be implemented as an indirect jump, try to find
2446   a scratch register for it.
2447   If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
2448   If any preceding insn that doesn't fit into a delay slot is good enough,
2449   pass 1.  Pass 2 if a definite blocking insn is needed.
2450   -1 is used internally to avoid deep recursion.
2451   If a blocking instruction is made or recognized, return it.  */
2452
2453static rtx
2454gen_block_redirect (jump, addr, need_block)
2455     rtx jump;
2456     int addr, need_block;
2457{
2458  int dead = 0;
2459  rtx prev = prev_nonnote_insn (jump);
2460  rtx dest;
2461
2462  /* First, check if we already have an instruction that satisfies our need.  */
2463  if (prev && GET_CODE (prev) == INSN && ! INSN_DELETED_P (prev))
2464    {
2465      if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
2466	return prev;
2467      if (GET_CODE (PATTERN (prev)) == USE
2468	  || GET_CODE (PATTERN (prev)) == CLOBBER
2469	  || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
2470	prev = jump;
2471      else if ((need_block &= ~1) < 0)
2472	return prev;
2473      else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
2474	need_block = 0;
2475    }
2476  /* We can't use JUMP_LABEL here because it might be undefined
2477     when not optimizing.  */
2478  dest = XEXP (SET_SRC (PATTERN (jump)), 0);
2479  /* If the branch is out of range, try to find a scratch register for it.  */
2480  if (optimize
2481      && (insn_addresses[INSN_UID (dest)] - addr + 4092U > 4092 + 4098))
2482    {
2483      rtx scan;
2484      /* Don't look for the stack pointer as a scratch register,
2485	 it would cause trouble if an interrupt occurred.  */
2486      unsigned try = 0x7fff, used;
2487      int jump_left = flag_expensive_optimizations + 1;
2488
2489      /* It is likely that the most recent eligible instruction is wanted for
2490	 the delay slot.  Therefore, find out which registers it uses, and
2491	 try to avoid using them.  */
2492
2493      for (scan = jump; scan = PREV_INSN (scan); )
2494	{
2495	  enum rtx_code code;
2496
2497	  if (INSN_DELETED_P (scan))
2498	    continue;
2499	  code = GET_CODE (scan);
2500	  if (code == CODE_LABEL || code == JUMP_INSN)
2501	    break;
2502	  if (code == INSN
2503	      && GET_CODE (PATTERN (scan)) != USE
2504	      && GET_CODE (PATTERN (scan)) != CLOBBER
2505	      && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
2506	    {
2507	      try &= ~regs_used (PATTERN (scan), 0);
2508	      break;
2509	    }
2510	}
2511      for (used = dead = 0, scan = JUMP_LABEL (jump); scan = NEXT_INSN (scan); )
2512	{
2513	  enum rtx_code code;
2514
2515	  if (INSN_DELETED_P (scan))
2516	    continue;
2517	  code = GET_CODE (scan);
2518	  if (GET_RTX_CLASS (code) == 'i')
2519	    {
2520	      used |= regs_used (PATTERN (scan), 0);
2521	      if (code == CALL_INSN)
2522		used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
2523	      dead |= (used >> 16) & ~used;
2524	      if (dead & try)
2525		{
2526		  dead &= try;
2527		  break;
2528		}
2529	      if (code == JUMP_INSN)
2530		if (jump_left-- && simplejump_p (scan))
2531		  scan = JUMP_LABEL (scan);
2532		else
2533		  break;
2534	    }
2535	}
2536      /* Mask out the stack pointer again, in case it was
2537	 the only 'free' register we have found.  */
2538      dead &= 0x7fff;
2539    }
2540  /* If the immediate destination is still in range, check for possible
2541     threading with a jump beyond the delay slot insn.
2542     Don't check if we are called recursively; the jump has been or will be
2543     checked in a different invocation then.  */
2544
2545  else if (optimize && need_block >= 0)
2546    {
2547      rtx next = next_active_insn (next_active_insn (dest));
2548      if (next && GET_CODE (next) == JUMP_INSN
2549	  && GET_CODE (PATTERN (next)) == SET
2550	  && recog_memoized (next) == CODE_FOR_jump)
2551	{
2552	  dest = JUMP_LABEL (next);
2553	  if (dest
2554	      && insn_addresses[INSN_UID (dest)] - addr + 4092U > 4092 + 4098)
2555	    gen_block_redirect (next, insn_addresses[INSN_UID (next)], -1);
2556	}
2557    }
2558
2559  if (dead)
2560    {
2561      rtx reg = gen_rtx (REG, SImode, exact_log2 (dead & -dead));
2562
2563      /* It would be nice if we could convert the jump into an indirect
2564	 jump / far branch right now, and thus exposing all constituent
2565	 instructions to further optimization.  However, reorg uses
2566	 simplejump_p to determine if there is an unconditional jump where
2567	 it should try to schedule instructions from the target of the
2568	 branch; simplejump_p fails for indirect jumps even if they have
2569	 a JUMP_LABEL.  */
2570      rtx insn = emit_insn_before (gen_indirect_jump_scratch
2571				   (reg, GEN_INT (INSN_UID (JUMP_LABEL (jump))))
2572				   , jump);
2573      INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
2574      return insn;
2575    }
2576  else if (need_block)
2577    /* We can't use JUMP_LABEL here because it might be undefined
2578       when not optimizing.  */
2579    return emit_insn_before (gen_block_branch_redirect
2580		      (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))))
2581		      , jump);
2582  return prev;
2583}
2584
2585#define CONDJUMP_MIN -252
2586#define CONDJUMP_MAX 262
2587struct far_branch
2588{
2589  /* A label (to be placed) in front of the jump
2590     that jumps to our ultimate destination.  */
2591  rtx near_label;
2592  /* Where we are going to insert it if we cannot move the jump any farther,
2593     or the jump itself if we have picked up an existing jump.  */
2594  rtx insert_place;
2595  /* The ultimate destination.  */
2596  rtx far_label;
2597  struct far_branch *prev;
2598  /* If the branch has already been created, its address;
2599     else the address of its first prospective user.  */
2600  int address;
2601};
2602
2603enum mdep_reorg_phase_e mdep_reorg_phase;
2604void
2605gen_far_branch (bp)
2606     struct far_branch *bp;
2607{
2608  rtx insn = bp->insert_place;
2609  rtx jump;
2610  rtx label = gen_label_rtx ();
2611
2612  emit_label_after (label, insn);
2613  if (bp->far_label)
2614    {
2615      jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
2616      LABEL_NUSES (bp->far_label)++;
2617    }
2618  else
2619    jump = emit_jump_insn_after (gen_return (), insn);
2620  /* Emit a barrier so that reorg knows that any following instructions
2621     are not reachable via a fall-through path.
2622     But don't do this when not optimizing, since we wouldn't supress the
2623     alignment for the barrier then, and could end up with out-of-range
2624     pc-relative loads.  */
2625  if (optimize)
2626    emit_barrier_after (jump);
2627  emit_label_after (bp->near_label, insn);
2628  JUMP_LABEL (jump) = bp->far_label;
2629  if (! invert_jump (insn, label))
2630    abort ();
2631  /* Prevent reorg from undoing our splits.  */
2632  gen_block_redirect (jump, bp->address += 2, 2);
2633}
2634
2635/* Fix up ADDR_DIFF_VECs.  */
2636void
2637fixup_addr_diff_vecs (first)
2638     rtx first;
2639{
2640  rtx insn;
2641
2642  for (insn = first; insn; insn = NEXT_INSN (insn))
2643    {
2644      rtx vec_lab, pat, prev, prevpat, x, braf_label;
2645
2646      if (GET_CODE (insn) != JUMP_INSN
2647	  || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
2648	continue;
2649      pat = PATTERN (insn);
2650      vec_lab = XEXP (XEXP (pat, 0), 0);
2651
2652      /* Search the matching casesi_jump_2.  */
2653      for (prev = vec_lab; ; prev = PREV_INSN (prev))
2654	{
2655	  if (GET_CODE (prev) != JUMP_INSN)
2656	    continue;
2657	  prevpat = PATTERN (prev);
2658	  if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
2659	    continue;
2660	  x = XVECEXP (prevpat, 0, 1);
2661	  if (GET_CODE (x) != USE)
2662	    continue;
2663	  x = XEXP (x, 0);
2664	  if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
2665	    break;
2666	}
2667
2668      /* Emit the reference label of the braf where it belongs, right after
2669	 the casesi_jump_2 (i.e. braf).  */
2670      braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
2671      emit_label_after (braf_label, prev);
2672
2673      /* Fix up the ADDR_DIF_VEC to be relative
2674	 to the reference address of the braf.  */
2675      XEXP (XEXP (pat, 0), 0) = braf_label;
2676    }
2677}
2678
2679/* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
2680   a barrier.  Return the base 2 logarithm of the desired alignment.  */
2681int
2682barrier_align (barrier_or_label)
2683     rtx barrier_or_label;
2684{
2685  rtx next = next_real_insn (barrier_or_label), pat, prev;
2686  int slot, credit;
2687
2688  if (! next)
2689    return 0;
2690
2691  pat = PATTERN (next);
2692
2693  if (GET_CODE (pat) == ADDR_DIFF_VEC)
2694    return 2;
2695
2696  if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == 1)
2697    /* This is a barrier in front of a constant table.  */
2698    return 0;
2699
2700  prev = prev_real_insn (barrier_or_label);
2701  if (GET_CODE (PATTERN (prev)) == ADDR_DIFF_VEC)
2702    {
2703      pat = PATTERN (prev);
2704      /* If this is a very small table, we want to keep the alignment after
2705	 the table to the minimum for proper code alignment.  */
2706      return ((TARGET_SMALLCODE
2707	       || (XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
2708		   <= 1 << (CACHE_LOG - 2)))
2709	      ? 1 : CACHE_LOG);
2710    }
2711
2712  if (TARGET_SMALLCODE)
2713    return 0;
2714
2715  if (! TARGET_SH3 || ! optimize)
2716    return CACHE_LOG;
2717
2718  /* When fixing up pcloads, a constant table might be inserted just before
2719     the basic block that ends with the barrier.  Thus, we can't trust the
2720     instruction lengths before that.  */
2721  if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
2722    {
2723      /* Check if there is an immediately preceding branch to the insn beyond
2724	 the barrier.  We must weight the cost of discarding useful information
2725	 from the current cache line when executing this branch and there is
2726	 an alignment, against that of fetching unneeded insn in front of the
2727	 branch target when there is no alignment.  */
2728
2729      /* PREV is presumed to be the JUMP_INSN for the barrier under
2730	 investigation.  Skip to the insn before it.  */
2731      prev = prev_real_insn (prev);
2732
2733      for (slot = 2, credit = 1 << (CACHE_LOG - 2) + 2;
2734	   credit >= 0 && prev && GET_CODE (prev) == INSN;
2735	   prev = prev_real_insn (prev))
2736	{
2737	  if (GET_CODE (PATTERN (prev)) == USE
2738	      || GET_CODE (PATTERN (prev)) == CLOBBER)
2739	    continue;
2740	  if (GET_CODE (PATTERN (prev)) == SEQUENCE)
2741	    prev = XVECEXP (PATTERN (prev), 0, 1);
2742	  if (slot &&
2743	      get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
2744	    slot = 0;
2745	  credit -= get_attr_length (prev);
2746	}
2747      if (prev
2748	  && GET_CODE (prev) == JUMP_INSN
2749	  && JUMP_LABEL (prev)
2750	  && next_real_insn (JUMP_LABEL (prev)) == next_real_insn (barrier_or_label)
2751	  && (credit - slot >= (GET_CODE (SET_SRC (PATTERN (prev))) == PC ? 2 : 0)))
2752	return 0;
2753    }
2754
2755  return CACHE_LOG;
2756}
2757
2758/* Exported to toplev.c.
2759
2760   Do a final pass over the function, just before delayed branch
2761   scheduling.  */
2762
2763void
2764machine_dependent_reorg (first)
2765     rtx first;
2766{
2767  rtx insn, mova;
2768  int num_mova;
2769  rtx r0_rtx = gen_rtx (REG, Pmode, 0);
2770  rtx r0_inc_rtx = gen_rtx (POST_INC, Pmode, r0_rtx);
2771
2772  /* If relaxing, generate pseudo-ops to associate function calls with
2773     the symbols they call.  It does no harm to not generate these
2774     pseudo-ops.  However, when we can generate them, it enables to
2775     linker to potentially relax the jsr to a bsr, and eliminate the
2776     register load and, possibly, the constant pool entry.  */
2777
2778  mdep_reorg_phase = SH_INSERT_USES_LABELS;
2779  if (TARGET_RELAX)
2780    {
2781      /* Remove all REG_LABEL notes.  We want to use them for our own
2782	 purposes.  This works because none of the remaining passes
2783	 need to look at them.
2784
2785	 ??? But it may break in the future.  We should use a machine
2786	 dependent REG_NOTE, or some other approach entirely.  */
2787      for (insn = first; insn; insn = NEXT_INSN (insn))
2788	{
2789	  if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
2790	    {
2791	      rtx note;
2792
2793	      while ((note = find_reg_note (insn, REG_LABEL, NULL_RTX)) != 0)
2794		remove_note (insn, note);
2795	    }
2796	}
2797
2798      for (insn = first; insn; insn = NEXT_INSN (insn))
2799	{
2800	  rtx pattern, reg, link, set, scan, dies, label;
2801	  int rescan = 0, foundinsn = 0;
2802
2803	  if (GET_CODE (insn) == CALL_INSN)
2804	    {
2805	      pattern = PATTERN (insn);
2806
2807	      if (GET_CODE (pattern) == PARALLEL)
2808		pattern = XVECEXP (pattern, 0, 0);
2809	      if (GET_CODE (pattern) == SET)
2810		pattern = SET_SRC (pattern);
2811
2812	      if (GET_CODE (pattern) != CALL
2813		  || GET_CODE (XEXP (pattern, 0)) != MEM)
2814		continue;
2815
2816	      reg = XEXP (XEXP (pattern, 0), 0);
2817	    }
2818	  else
2819	    {
2820	      reg = sfunc_uses_reg (insn);
2821	      if (! reg)
2822		continue;
2823	    }
2824
2825	  if (GET_CODE (reg) != REG)
2826	    continue;
2827
2828	  /* This is a function call via REG.  If the only uses of REG
2829	     between the time that it is set and the time that it dies
2830	     are in function calls, then we can associate all the
2831	     function calls with the setting of REG.  */
2832
2833	  for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
2834	    {
2835	      if (REG_NOTE_KIND (link) != 0)
2836		continue;
2837	      set = single_set (XEXP (link, 0));
2838	      if (set && rtx_equal_p (reg, SET_DEST (set)))
2839		{
2840		  link = XEXP (link, 0);
2841		  break;
2842		}
2843	    }
2844
2845	  if (! link)
2846	    {
2847	      /* ??? Sometimes global register allocation will have
2848                 deleted the insn pointed to by LOG_LINKS.  Try
2849                 scanning backward to find where the register is set.  */
2850	      for (scan = PREV_INSN (insn);
2851		   scan && GET_CODE (scan) != CODE_LABEL;
2852		   scan = PREV_INSN (scan))
2853		{
2854		  if (GET_RTX_CLASS (GET_CODE (scan)) != 'i')
2855		    continue;
2856
2857		  if (! reg_mentioned_p (reg, scan))
2858		    continue;
2859
2860		  if (noncall_uses_reg (reg, scan, &set))
2861		    break;
2862
2863		  if (set)
2864		    {
2865		      link = scan;
2866		      break;
2867		    }
2868		}
2869	    }
2870
2871	  if (! link)
2872	    continue;
2873
2874	  /* The register is set at LINK.  */
2875
2876	  /* We can only optimize the function call if the register is
2877             being set to a symbol.  In theory, we could sometimes
2878             optimize calls to a constant location, but the assembler
2879             and linker do not support that at present.  */
2880	  if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
2881	      && GET_CODE (SET_SRC (set)) != LABEL_REF)
2882	    continue;
2883
2884	  /* Scan forward from LINK to the place where REG dies, and
2885             make sure that the only insns which use REG are
2886             themselves function calls.  */
2887
2888	  /* ??? This doesn't work for call targets that were allocated
2889	     by reload, since there may not be a REG_DEAD note for the
2890	     register.  */
2891
2892	  dies = NULL_RTX;
2893	  for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
2894	    {
2895	      rtx scanset;
2896
2897	      /* Don't try to trace forward past a CODE_LABEL if we haven't
2898		 seen INSN yet.  Ordinarily, we will only find the setting insn
2899		 in LOG_LINKS if it is in the same basic block.  However,
2900		 cross-jumping can insert code labels in between the load and
2901		 the call, and can result in situations where a single call
2902		 insn may have two targets depending on where we came from.  */
2903
2904	      if (GET_CODE (scan) == CODE_LABEL && ! foundinsn)
2905		break;
2906
2907	      if (GET_RTX_CLASS (GET_CODE (scan)) != 'i')
2908		continue;
2909
2910	      /* Don't try to trace forward past a JUMP.  To optimize
2911                 safely, we would have to check that all the
2912                 instructions at the jump destination did not use REG.  */
2913
2914	      if (GET_CODE (scan) == JUMP_INSN)
2915		break;
2916
2917	      if (! reg_mentioned_p (reg, scan))
2918		continue;
2919
2920	      if (noncall_uses_reg (reg, scan, &scanset))
2921		break;
2922
2923	      if (scan == insn)
2924		foundinsn = 1;
2925
2926	      if (scan != insn
2927		  && (GET_CODE (scan) == CALL_INSN || sfunc_uses_reg (scan)))
2928		{
2929		  /* There is a function call to this register other
2930                     than the one we are checking.  If we optimize
2931                     this call, we need to rescan again below.  */
2932		  rescan = 1;
2933		}
2934
2935	      /* ??? We shouldn't have to worry about SCANSET here.
2936		 We should just be able to check for a REG_DEAD note
2937		 on a function call.  However, the REG_DEAD notes are
2938		 apparently not dependable around libcalls; c-torture
2939		 execute/920501-2 is a test case.  If SCANSET is set,
2940		 then this insn sets the register, so it must have
2941		 died earlier.  Unfortunately, this will only handle
2942		 the cases in which the register is, in fact, set in a
2943		 later insn.  */
2944
2945	      /* ??? We shouldn't have to use FOUNDINSN here.
2946		 However, the LOG_LINKS fields are apparently not
2947		 entirely reliable around libcalls;
2948		 newlib/libm/math/e_pow.c is a test case.  Sometimes
2949		 an insn will appear in LOG_LINKS even though it is
2950		 not the most recent insn which sets the register. */
2951
2952	      if (foundinsn
2953		  && (scanset
2954		      || find_reg_note (scan, REG_DEAD, reg)))
2955		{
2956		  dies = scan;
2957		  break;
2958		}
2959	    }
2960
2961	  if (! dies)
2962	    {
2963	      /* Either there was a branch, or some insn used REG
2964                 other than as a function call address.  */
2965	      continue;
2966	    }
2967
2968	  /* Create a code label, and put it in a REG_LABEL note on
2969             the insn which sets the register, and on each call insn
2970             which uses the register.  In final_prescan_insn we look
2971             for the REG_LABEL notes, and output the appropriate label
2972             or pseudo-op.  */
2973
2974	  label = gen_label_rtx ();
2975	  REG_NOTES (link) = gen_rtx (EXPR_LIST, REG_LABEL, label,
2976				      REG_NOTES (link));
2977	  REG_NOTES (insn) = gen_rtx (EXPR_LIST, REG_LABEL, label,
2978				      REG_NOTES (insn));
2979	  if (rescan)
2980	    {
2981	      scan = link;
2982	      do
2983		{
2984		  rtx reg2;
2985
2986		  scan = NEXT_INSN (scan);
2987		  if (scan != insn
2988		      && ((GET_CODE (scan) == CALL_INSN
2989			   && reg_mentioned_p (reg, scan))
2990			  || ((reg2 = sfunc_uses_reg (scan))
2991			      && REGNO (reg2) == REGNO (reg))))
2992		    REG_NOTES (scan) = gen_rtx (EXPR_LIST, REG_LABEL,
2993						label, REG_NOTES (scan));
2994		}
2995	      while (scan != dies);
2996	    }
2997	}
2998    }
2999
3000  if (TARGET_SH2)
3001    fixup_addr_diff_vecs (first);
3002
3003  if (optimize)
3004    {
3005      mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
3006      shorten_branches (first);
3007    }
3008  /* Scan the function looking for move instructions which have to be
3009     changed to pc-relative loads and insert the literal tables.  */
3010
3011  mdep_reorg_phase = SH_FIXUP_PCLOAD;
3012  for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
3013    {
3014      if (mova_p (insn))
3015	{
3016	  if (! num_mova++)
3017	    mova = insn;
3018	}
3019      else if (GET_CODE (insn) == JUMP_INSN
3020	       && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
3021	       && num_mova)
3022	{
3023	  rtx scan;
3024	  int total;
3025
3026	  num_mova--;
3027
3028	  /* Some code might have been inserted between the mova and
3029	     its ADDR_DIFF_VEC.  Check if the mova is still in range.  */
3030	  for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
3031	    total += get_attr_length (scan);
3032
3033	  /* range of mova is 1020, add 4 because pc counts from address of
3034	     second instruction after this one, subtract 2 in case pc is 2
3035	     byte aligned.  Possible alignment needed for the ADDR_DIFF_VEC
3036	     cancels out with alignment effects of the mova itself.  */
3037	  if (total > 1022)
3038	    {
3039	      /* Change the mova into a load, and restart scanning
3040		 there.  broken_move will then return true for mova.  */
3041	      SET_SRC (PATTERN (mova))
3042		= XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
3043	      INSN_CODE (mova) = -1;
3044	      insn = mova;
3045	    }
3046	}
3047      if (broken_move (insn))
3048	{
3049	  rtx scan;
3050	  /* Scan ahead looking for a barrier to stick the constant table
3051	     behind.  */
3052	  rtx barrier = find_barrier (num_mova, mova, insn);
3053	  rtx last_float_move, last_float = 0, *last_float_addr;
3054
3055	  if (num_mova && ! mova_p (mova))
3056	    {
3057	      /* find_barrier had to change the first mova into a
3058		 pcload; thus, we have to start with this new pcload.  */
3059	      insn = mova;
3060	      num_mova = 0;
3061	    }
3062	  /* Now find all the moves between the points and modify them.  */
3063	  for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
3064	    {
3065	      if (GET_CODE (scan) == CODE_LABEL)
3066		last_float = 0;
3067	      if (broken_move (scan))
3068		{
3069		  rtx *patp = &PATTERN (scan), pat = *patp;
3070		  rtx src, dst;
3071		  rtx lab;
3072		  rtx newinsn;
3073		  rtx newsrc;
3074		  enum machine_mode mode;
3075
3076		  if (GET_CODE (pat) == PARALLEL)
3077		    patp = &XVECEXP (pat, 0, 0), pat = *patp;
3078		  src = SET_SRC (pat);
3079		  dst = SET_DEST (pat);
3080		  mode = GET_MODE (dst);
3081
3082		  if (mode == SImode && hi_const (src)
3083		      && REGNO (dst) != FPUL_REG)
3084		    {
3085		      int offset = 0;
3086
3087		      mode = HImode;
3088		      while (GET_CODE (dst) == SUBREG)
3089			{
3090			  offset += SUBREG_WORD (dst);
3091			  dst = SUBREG_REG (dst);
3092			}
3093		      dst = gen_rtx (REG, HImode, REGNO (dst) + offset);
3094		    }
3095
3096		  if (GET_CODE (dst) == REG
3097		      && ((REGNO (dst) >= FIRST_FP_REG
3098			   && REGNO (dst) <= LAST_XD_REG)
3099			  || REGNO (dst) == FPUL_REG))
3100		    {
3101		      if (last_float
3102			  && reg_set_between_p (r0_rtx, last_float_move, scan))
3103			last_float = 0;
3104		      lab = add_constant (src, mode, last_float);
3105		      if (lab)
3106			emit_insn_before (gen_mova (lab), scan);
3107		      else
3108			*last_float_addr = r0_inc_rtx;
3109		      last_float_move = scan;
3110		      last_float = src;
3111		      newsrc = gen_rtx (MEM, mode,
3112					((TARGET_SH4 && ! TARGET_FMOVD
3113					  || REGNO (dst) == FPUL_REG)
3114					 ? r0_inc_rtx
3115					 : r0_rtx));
3116		      last_float_addr = &XEXP (newsrc, 0);
3117		    }
3118		  else
3119		    {
3120		      lab = add_constant (src, mode, 0);
3121		      newsrc = gen_rtx (MEM, mode,
3122					gen_rtx (LABEL_REF, VOIDmode, lab));
3123		    }
3124		  RTX_UNCHANGING_P (newsrc) = 1;
3125		  *patp = gen_rtx (SET, VOIDmode, dst, newsrc);
3126		  INSN_CODE (scan) = -1;
3127		}
3128	    }
3129	  dump_table (barrier);
3130	  insn = barrier;
3131	}
3132    }
3133
3134  mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
3135  insn_addresses = 0;
3136  split_branches (first);
3137
3138  /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
3139     also has an effect on the register that holds the addres of the sfunc.
3140     Insert an extra dummy insn in front of each sfunc that pretends to
3141     use this register.  */
3142  if (flag_delayed_branch)
3143    {
3144      for (insn = first; insn; insn = NEXT_INSN (insn))
3145	{
3146	  rtx reg = sfunc_uses_reg (insn);
3147
3148	  if (! reg)
3149	    continue;
3150	  emit_insn_before (gen_use_sfunc_addr (reg), insn);
3151	}
3152    }
3153#if 0
3154  /* fpscr is not actually a user variable, but we pretend it is for the
3155     sake of the previous optimization passes, since we want it handled like
3156     one.  However, we don't have eny debugging information for it, so turn
3157     it into a non-user variable now.  */
3158  if (TARGET_SH4)
3159    REG_USERVAR_P (get_fpscr_rtx ()) = 0;
3160#endif
3161  if (optimize)
3162    sh_flag_remove_dead_before_cse = 1;
3163  mdep_reorg_phase = SH_AFTER_MDEP_REORG;
3164}
3165
3166int
3167get_dest_uid (label, max_uid)
3168     rtx label;
3169     int max_uid;
3170{
3171  rtx dest = next_real_insn (label);
3172  int dest_uid;
3173  if (! dest)
3174    /* This can happen for an undefined label.  */
3175    return 0;
3176  dest_uid = INSN_UID (dest);
3177  /* If this is a newly created branch redirection blocking instruction,
3178     we cannot index the branch_uid or insn_addresses arrays with its
3179     uid.  But then, we won't need to, because the actual destination is
3180     the following branch.  */
3181  while (dest_uid >= max_uid)
3182    {
3183      dest = NEXT_INSN (dest);
3184      dest_uid = INSN_UID (dest);
3185    }
3186  if (GET_CODE (dest) == JUMP_INSN && GET_CODE (PATTERN (dest)) == RETURN)
3187    return 0;
3188  return dest_uid;
3189}
3190
3191/* Split condbranches that are out of range.  Also add clobbers for
3192   scratch registers that are needed in far jumps.
3193   We do this before delay slot scheduling, so that it can take our
3194   newly created instructions into account.  It also allows us to
3195   find branches with common targets more easily.  */
3196
3197static void
3198split_branches (first)
3199     rtx first;
3200{
3201  rtx insn;
3202  struct far_branch **uid_branch, *far_branch_list = 0;
3203  int max_uid = get_max_uid ();
3204
3205  /* Find out which branches are out of range.  */
3206  shorten_branches (first);
3207
3208  uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
3209  bzero ((char *) uid_branch, max_uid * sizeof *uid_branch);
3210
3211  for (insn = first; insn; insn = NEXT_INSN (insn))
3212    if (GET_RTX_CLASS (GET_CODE (insn)) != 'i')
3213      continue;
3214    else if (INSN_DELETED_P (insn))
3215      {
3216	/* Shorten_branches would split this instruction again,
3217	   so transform it into a note.  */
3218	PUT_CODE (insn, NOTE);
3219	NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED;
3220	NOTE_SOURCE_FILE (insn) = 0;
3221      }
3222    else if (GET_CODE (insn) == JUMP_INSN
3223	     /* Don't mess with ADDR_DIFF_VEC */
3224	     && (GET_CODE (PATTERN (insn)) == SET
3225		 || GET_CODE (PATTERN (insn)) == RETURN))
3226      {
3227	enum attr_type type = get_attr_type (insn);
3228	if (type == TYPE_CBRANCH)
3229	  {
3230	    rtx next, beyond;
3231
3232	    if (get_attr_length (insn) > 4)
3233	      {
3234		rtx src = SET_SRC (PATTERN (insn));
3235		rtx cond = XEXP (src, 0);
3236		rtx olabel = XEXP (XEXP (src, 1), 0);
3237		rtx jump;
3238		int addr = insn_addresses[INSN_UID (insn)];
3239		rtx label = 0;
3240		int dest_uid = get_dest_uid (olabel, max_uid);
3241		struct far_branch *bp = uid_branch[dest_uid];
3242
3243		/* redirect_jump needs a valid JUMP_LABEL, and it might delete
3244		   the label if the LABEL_NUSES count drops to zero.  There is
3245		   always a jump_optimize pass that sets these values, but it
3246		   proceeds to delete unreferenced code, and then if not
3247		   optimizing, to un-delete the deleted instructions, thus
3248		   leaving labels with too low uses counts.  */
3249		if (! optimize)
3250		  {
3251		    JUMP_LABEL (insn) = olabel;
3252		    LABEL_NUSES (olabel)++;
3253		  }
3254		if (! bp)
3255		  {
3256		    bp = (struct far_branch *) alloca (sizeof *bp);
3257		    uid_branch[dest_uid] = bp;
3258		    bp->prev = far_branch_list;
3259		    far_branch_list = bp;
3260		    bp->far_label
3261		      = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0);
3262		    LABEL_NUSES (bp->far_label)++;
3263		  }
3264		else
3265		  {
3266		    label = bp->near_label;
3267		    if (! label && bp->address - addr >= CONDJUMP_MIN)
3268		      {
3269			rtx block = bp->insert_place;
3270
3271			if (GET_CODE (PATTERN (block)) == RETURN)
3272			  block = PREV_INSN (block);
3273			else
3274			  block = gen_block_redirect (block,
3275						      bp->address, 2);
3276			label = emit_label_after (gen_label_rtx (),
3277						  PREV_INSN (block));
3278			bp->near_label = label;
3279		      }
3280		    else if (label && ! NEXT_INSN (label))
3281		      if (addr + 2 - bp->address <= CONDJUMP_MAX)
3282			bp->insert_place = insn;
3283		      else
3284			gen_far_branch (bp);
3285		  }
3286		if (! label
3287		    || NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN)
3288		  {
3289		    bp->near_label = label = gen_label_rtx ();
3290		    bp->insert_place = insn;
3291		    bp->address = addr;
3292		  }
3293		if (! redirect_jump (insn, label))
3294		  abort ();
3295	      }
3296	    else
3297	      {
3298		/* get_attr_length (insn) == 2 */
3299		/* Check if we have a pattern where reorg wants to redirect
3300		   the branch to a label from an unconditional branch that
3301		   is too far away.  */
3302		/* We can't use JUMP_LABEL here because it might be undefined
3303		   when not optimizing.  */
3304		/* A syntax error might cause beyond to be NULL_RTX.  */
3305		beyond
3306		  = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
3307					    0));
3308
3309		if (beyond
3310		    && (GET_CODE (beyond) == JUMP_INSN
3311			|| (GET_CODE (beyond = next_active_insn (beyond))
3312			    == JUMP_INSN))
3313		    && GET_CODE (PATTERN (beyond)) == SET
3314		    && recog_memoized (beyond) == CODE_FOR_jump
3315		    && ((insn_addresses[INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0))]
3316			 - insn_addresses[INSN_UID (insn)] + 252U)
3317			> 252 + 258 + 2))
3318		  gen_block_redirect (beyond,
3319				      insn_addresses[INSN_UID (beyond)], 1);
3320	      }
3321
3322	    next = next_active_insn (insn);
3323
3324	    if ((GET_CODE (next) == JUMP_INSN
3325		 || GET_CODE (next = next_active_insn (next)) == JUMP_INSN)
3326		&& GET_CODE (PATTERN (next)) == SET
3327		&& recog_memoized (next) == CODE_FOR_jump
3328		&& ((insn_addresses[INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0))]
3329		     - insn_addresses[INSN_UID (insn)] + 252U)
3330		    > 252 + 258 + 2))
3331	      gen_block_redirect (next, insn_addresses[INSN_UID (next)], 1);
3332	  }
3333	else if (type == TYPE_JUMP || type == TYPE_RETURN)
3334	  {
3335	    int addr = insn_addresses[INSN_UID (insn)];
3336	    rtx far_label = 0;
3337	    int dest_uid = 0;
3338	    struct far_branch *bp;
3339
3340	    if (type == TYPE_JUMP)
3341	      {
3342		far_label = XEXP (SET_SRC (PATTERN (insn)), 0);
3343		dest_uid = get_dest_uid (far_label, max_uid);
3344		if (! dest_uid)
3345		  {
3346		    /* Parse errors can lead to labels outside
3347		      the insn stream.  */
3348		    if (! NEXT_INSN (far_label))
3349		      continue;
3350
3351		    if (! optimize)
3352		      {
3353			JUMP_LABEL (insn) = far_label;
3354			LABEL_NUSES (far_label)++;
3355		      }
3356		    redirect_jump (insn, NULL_RTX);
3357		    far_label = 0;
3358		  }
3359	      }
3360	    bp = uid_branch[dest_uid];
3361	    if (! bp)
3362	      {
3363		bp = (struct far_branch *) alloca (sizeof *bp);
3364		uid_branch[dest_uid] = bp;
3365		bp->prev = far_branch_list;
3366		far_branch_list = bp;
3367		bp->near_label = 0;
3368		bp->far_label = far_label;
3369		if (far_label)
3370		  LABEL_NUSES (far_label)++;
3371	      }
3372	    else if (bp->near_label && ! NEXT_INSN (bp->near_label))
3373	      if (addr - bp->address <= CONDJUMP_MAX)
3374		emit_label_after (bp->near_label, PREV_INSN (insn));
3375	      else
3376		{
3377		  gen_far_branch (bp);
3378		  bp->near_label = 0;
3379		}
3380	    else
3381	      bp->near_label = 0;
3382	    bp->address = addr;
3383	    bp->insert_place = insn;
3384	    if (! far_label)
3385	      emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
3386	    else
3387	      gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
3388	  }
3389      }
3390  /* Generate all pending far branches,
3391     and free our references to the far labels.  */
3392  while (far_branch_list)
3393    {
3394      if (far_branch_list->near_label
3395	  && ! NEXT_INSN (far_branch_list->near_label))
3396	gen_far_branch (far_branch_list);
3397      if (optimize
3398	  && far_branch_list->far_label
3399	  && ! --LABEL_NUSES (far_branch_list->far_label))
3400	delete_insn (far_branch_list->far_label);
3401      far_branch_list = far_branch_list->prev;
3402    }
3403
3404  /* Instruction length information is no longer valid due to the new
3405     instructions that have been generated.  */
3406  init_insn_lengths ();
3407}
3408
3409/* Dump out instruction addresses, which is useful for debugging the
3410   constant pool table stuff.
3411
3412   If relaxing, output the label and pseudo-ops used to link together
3413   calls and the instruction which set the registers.  */
3414
3415/* ??? This is unnecessary, and probably should be deleted.  This makes
3416   the insn_addresses declaration above unnecessary.  */
3417
3418/* ??? The addresses printed by this routine for insns are nonsense for
3419   insns which are inside of a sequence where none of the inner insns have
3420   variable length.  This is because the second pass of shorten_branches
3421   does not bother to update them.  */
3422
3423void
3424final_prescan_insn (insn, opvec, noperands)
3425     rtx insn;
3426     rtx *opvec;
3427     int noperands;
3428{
3429  if (TARGET_DUMPISIZE)
3430    fprintf (asm_out_file, "\n! at %04x\n", insn_addresses[INSN_UID (insn)]);
3431
3432  if (TARGET_RELAX)
3433    {
3434      rtx note;
3435
3436      note = find_reg_note (insn, REG_LABEL, NULL_RTX);
3437      if (note)
3438	{
3439	  rtx pattern;
3440
3441	  pattern = PATTERN (insn);
3442	  if (GET_CODE (pattern) == PARALLEL)
3443	    pattern = XVECEXP (pattern, 0, 0);
3444	  if (GET_CODE (pattern) == CALL
3445	      || (GET_CODE (pattern) == SET
3446		  && (GET_CODE (SET_SRC (pattern)) == CALL
3447		      || get_attr_type (insn) == TYPE_SFUNC)))
3448	    asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
3449			 CODE_LABEL_NUMBER (XEXP (note, 0)));
3450	  else if (GET_CODE (pattern) == SET)
3451	    ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
3452				       CODE_LABEL_NUMBER (XEXP (note, 0)));
3453	  else
3454	    abort ();
3455	}
3456    }
3457}
3458
3459/* Dump out any constants accumulated in the final pass.  These will
3460   only be labels.  */
3461
3462char *
3463output_jump_label_table ()
3464{
3465  int i;
3466
3467  if (pool_size)
3468    {
3469      fprintf (asm_out_file, "\t.align 2\n");
3470      for (i = 0; i < pool_size; i++)
3471	{
3472	  pool_node *p = &pool_vector[i];
3473
3474	  ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
3475				     CODE_LABEL_NUMBER (p->label));
3476	  output_asm_insn (".long	%O0", &p->value);
3477	}
3478      pool_size = 0;
3479    }
3480
3481  return "";
3482}
3483
3484/* A full frame looks like:
3485
3486   arg-5
3487   arg-4
3488   [ if current_function_anonymous_args
3489   arg-3
3490   arg-2
3491   arg-1
3492   arg-0 ]
3493   saved-fp
3494   saved-r10
3495   saved-r11
3496   saved-r12
3497   saved-pr
3498   local-n
3499   ..
3500   local-1
3501   local-0        <- fp points here.  */
3502
3503/* Number of bytes pushed for anonymous args, used to pass information
3504   between expand_prologue and expand_epilogue.  */
3505
3506static int extra_push;
3507
3508/* Adjust the stack by SIZE bytes.  REG holds the rtl of the register
3509  to be adjusted, and TEMP, if nonnegative, holds the register number
3510  of a general register that we may clobber.  */
3511
3512static void
3513output_stack_adjust (size, reg, temp)
3514     int size;
3515     rtx reg;
3516     int temp;
3517{
3518  if (size)
3519    {
3520      if (CONST_OK_FOR_I (size))
3521	emit_insn (gen_addsi3 (reg, reg, GEN_INT (size)));
3522      /* Try to do it with two partial adjustments; however, we must make
3523	 sure that the stack is properly aligned at all times, in case
3524	 an interrupt occurs between the two partial adjustments. */
3525      else if (CONST_OK_FOR_I (size / 2 & -4)
3526	       && CONST_OK_FOR_I (size - (size / 2 & -4)))
3527	{
3528	  emit_insn (gen_addsi3 (reg, reg, GEN_INT (size / 2 & -4)));
3529	  emit_insn (gen_addsi3 (reg, reg, GEN_INT (size - (size / 2 & -4))));
3530	}
3531      else
3532	{
3533	  rtx const_reg;
3534
3535	  /* If TEMP is invalid, we could temporarily save a general
3536	     register to MACL.  However, there is currently no need
3537	     to handle this case, so just abort when we see it.  */
3538	  if (temp < 0)
3539	    abort ();
3540	  const_reg = gen_rtx (REG, SImode, temp);
3541
3542	  /* If SIZE is negative, subtract the positive value.
3543	     This sometimes allows a constant pool entry to be shared
3544	     between prologue and epilogue code.  */
3545	  if (size < 0)
3546	    {
3547	      emit_insn (gen_movsi (const_reg, GEN_INT (-size)));
3548	      emit_insn (gen_subsi3 (reg, reg, const_reg));
3549	    }
3550	  else
3551	    {
3552	      emit_insn (gen_movsi (const_reg, GEN_INT (size)));
3553	      emit_insn (gen_addsi3 (reg, reg, const_reg));
3554	    }
3555	}
3556    }
3557}
3558
3559/* Output RTL to push register RN onto the stack.  */
3560
3561static void
3562push (rn)
3563     int rn;
3564{
3565  rtx x;
3566  if (rn == FPUL_REG)
3567    x = gen_push_fpul ();
3568  else if (TARGET_SH4 && TARGET_FMOVD && ! TARGET_FPU_SINGLE
3569	   && rn >= FIRST_FP_REG && rn <= LAST_XD_REG)
3570    {
3571      if ((rn - FIRST_FP_REG) & 1 && rn <= LAST_FP_REG)
3572	return;
3573      x = gen_push_4 (gen_rtx (REG, DFmode, rn));
3574    }
3575  else if (TARGET_SH3E && rn >= FIRST_FP_REG && rn <= LAST_FP_REG)
3576    x = gen_push_e (gen_rtx (REG, SFmode, rn));
3577  else
3578    x = gen_push (gen_rtx (REG, SImode, rn));
3579
3580  x = emit_insn (x);
3581  REG_NOTES (x) = gen_rtx (EXPR_LIST, REG_INC,
3582			   gen_rtx(REG, SImode, STACK_POINTER_REGNUM), 0);
3583}
3584
3585/* Output RTL to pop register RN from the stack.  */
3586
3587static void
3588pop (rn)
3589     int rn;
3590{
3591  rtx x;
3592  if (rn == FPUL_REG)
3593    x = gen_pop_fpul ();
3594  else if (TARGET_SH4 && TARGET_FMOVD && ! TARGET_FPU_SINGLE
3595	   && rn >= FIRST_FP_REG && rn <= LAST_XD_REG)
3596    {
3597      if ((rn - FIRST_FP_REG) & 1 && rn <= LAST_FP_REG)
3598	return;
3599      x = gen_pop_4 (gen_rtx (REG, DFmode, rn));
3600    }
3601  else if (TARGET_SH3E && rn >= FIRST_FP_REG && rn <= LAST_FP_REG)
3602    x = gen_pop_e (gen_rtx (REG, SFmode, rn));
3603  else
3604    x = gen_pop (gen_rtx (REG, SImode, rn));
3605
3606  x = emit_insn (x);
3607  REG_NOTES (x) = gen_rtx (EXPR_LIST, REG_INC,
3608			   gen_rtx(REG, SImode, STACK_POINTER_REGNUM), 0);
3609}
3610
3611/* Generate code to push the regs specified in the mask.  */
3612
3613static void
3614push_regs (mask, mask2)
3615     int mask, mask2;
3616{
3617  int i;
3618
3619  /* Push PR last; this gives better latencies after the prologue, and
3620     candidates for the return delay slot when there are no general
3621     registers pushed.  */
3622  for (i = 0; i < 32; i++)
3623    if (mask & (1 << i) && i != PR_REG)
3624      push (i);
3625  for (i = 32; i < FIRST_PSEUDO_REGISTER; i++)
3626    if (mask2 & (1 << (i - 32)))
3627      push (i);
3628  if (mask & (1 << PR_REG))
3629    push (PR_REG);
3630}
3631
3632/* Work out the registers which need to be saved, both as a mask and a
3633   count of saved words.
3634
3635   If doing a pragma interrupt function, then push all regs used by the
3636   function, and if we call another function (we can tell by looking at PR),
3637   make sure that all the regs it clobbers are safe too.  */
3638
3639static int
3640calc_live_regs (count_ptr, live_regs_mask2)
3641     int *count_ptr;
3642     int *live_regs_mask2;
3643{
3644  int reg;
3645  int live_regs_mask = 0;
3646  int count;
3647  int interrupt_handler;
3648
3649  if ((lookup_attribute
3650       ("interrupt_handler",
3651	DECL_MACHINE_ATTRIBUTES (current_function_decl)))
3652      != NULL_TREE)
3653    interrupt_handler = 1;
3654  else
3655    interrupt_handler = 0;
3656
3657  *live_regs_mask2 = 0;
3658  /* If we can save a lot of saves by switching to double mode, do that.  */
3659  if (TARGET_SH4 && TARGET_FMOVD && TARGET_FPU_SINGLE)
3660    for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
3661      if (regs_ever_live[reg] && regs_ever_live[reg+1]
3662	  && (! call_used_regs[reg] || (interrupt_handler && ! pragma_trapa))
3663	  && ++count > 2)
3664	{
3665	  target_flags &= ~FPU_SINGLE_BIT;
3666	  break;
3667	}
3668  for (count = 0, reg = FIRST_PSEUDO_REGISTER - 1; reg >= 0; reg--)
3669    {
3670      if ((interrupt_handler && ! pragma_trapa)
3671	  ? (/* Need to save all the regs ever live.  */
3672	     (regs_ever_live[reg]
3673	      || (call_used_regs[reg]
3674		  && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG)
3675		  && regs_ever_live[PR_REG]))
3676	     && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
3677	     && reg != RETURN_ADDRESS_POINTER_REGNUM
3678	     && reg != T_REG && reg != GBR_REG && reg != FPSCR_REG)
3679	  : (/* Only push those regs which are used and need to be saved.  */
3680	     regs_ever_live[reg] && ! call_used_regs[reg]))
3681	{
3682	  if (reg >= 32)
3683	    *live_regs_mask2 |= 1 << (reg - 32);
3684	  else
3685	    live_regs_mask |= 1 << reg;
3686	  count++;
3687	  if (TARGET_SH4 && TARGET_FMOVD && reg >= FIRST_FP_REG)
3688	    if (reg <= LAST_FP_REG)
3689	      {
3690		if (! TARGET_FPU_SINGLE && ! regs_ever_live[reg ^ 1])
3691		  {
3692		    if (reg >= 32)
3693		      *live_regs_mask2 |= 1 << ((reg ^ 1) - 32);
3694		    else
3695		      live_regs_mask |= 1 << (reg ^ 1);
3696		    count++;
3697		  }
3698	      }
3699	    else if (reg <= LAST_XD_REG)
3700	      {
3701		/* Must switch to double mode to access these registers.  */
3702		target_flags &= ~FPU_SINGLE_BIT;
3703		count++;
3704	      }
3705	}
3706    }
3707
3708  *count_ptr = count;
3709  return live_regs_mask;
3710}
3711
3712/* Code to generate prologue and epilogue sequences */
3713
3714void
3715sh_expand_prologue ()
3716{
3717  int live_regs_mask;
3718  int d, i;
3719  int live_regs_mask2;
3720  int save_flags = target_flags;
3721  int double_align = 0;
3722
3723  /* We have pretend args if we had an object sent partially in registers
3724     and partially on the stack, e.g. a large structure.  */
3725  output_stack_adjust (-current_function_pretend_args_size,
3726		       stack_pointer_rtx, 3);
3727
3728  extra_push = 0;
3729
3730  /* This is set by SETUP_VARARGS to indicate that this is a varargs
3731     routine.  Clear it here so that the next function isn't affected. */
3732  if (current_function_anonymous_args)
3733    {
3734      current_function_anonymous_args = 0;
3735
3736      /* This is not used by the SH3E calling convention  */
3737      if (! TARGET_SH3E && ! TARGET_HITACHI)
3738	{
3739	  /* Push arg regs as if they'd been provided by caller in stack.  */
3740	  for (i = 0; i < NPARM_REGS(SImode); i++)
3741	    {
3742	      int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
3743	      if (i >= (NPARM_REGS(SImode)
3744			- current_function_args_info.arg_count[(int) SH_ARG_INT]
3745			))
3746		break;
3747	      push (rn);
3748	      extra_push += 4;
3749	    }
3750	}
3751    }
3752
3753  /* If we're supposed to switch stacks at function entry, do so now.  */
3754  if (sp_switch)
3755    emit_insn (gen_sp_switch_1 ());
3756
3757  live_regs_mask = calc_live_regs (&d, &live_regs_mask2);
3758  /* ??? Maybe we could save some switching if we can move a mode switch
3759     that already happens to be at the function start into the prologue.  */
3760  if (target_flags != save_flags)
3761    emit_insn (gen_toggle_sz ());
3762  push_regs (live_regs_mask, live_regs_mask2);
3763  if (target_flags != save_flags)
3764    emit_insn (gen_toggle_sz ());
3765
3766  if (TARGET_ALIGN_DOUBLE && d & 1)
3767    double_align = 4;
3768
3769  target_flags = save_flags;
3770
3771  output_stack_adjust (-get_frame_size () - double_align,
3772		       stack_pointer_rtx, 3);
3773
3774  if (frame_pointer_needed)
3775    emit_insn (gen_movsi (frame_pointer_rtx, stack_pointer_rtx));
3776}
3777
3778void
3779sh_expand_epilogue ()
3780{
3781  int live_regs_mask;
3782  int d, i;
3783
3784  int live_regs_mask2;
3785  int save_flags = target_flags;
3786  int frame_size = get_frame_size ();
3787
3788  live_regs_mask = calc_live_regs (&d, &live_regs_mask2);
3789
3790  if (TARGET_ALIGN_DOUBLE && d & 1)
3791    frame_size += 4;
3792
3793  if (frame_pointer_needed)
3794    {
3795      output_stack_adjust (frame_size, frame_pointer_rtx, 7);
3796
3797      /* We must avoid moving the stack pointer adjustment past code
3798	 which reads from the local frame, else an interrupt could
3799	 occur after the SP adjustment and clobber data in the local
3800	 frame.  */
3801      emit_insn (gen_blockage ());
3802      emit_insn (gen_movsi (stack_pointer_rtx, frame_pointer_rtx));
3803    }
3804  else if (frame_size)
3805    {
3806      /* We must avoid moving the stack pointer adjustment past code
3807	 which reads from the local frame, else an interrupt could
3808	 occur after the SP adjustment and clobber data in the local
3809	 frame.  */
3810      emit_insn (gen_blockage ());
3811      output_stack_adjust (frame_size, stack_pointer_rtx, 7);
3812    }
3813
3814  /* Pop all the registers.  */
3815
3816  if (target_flags != save_flags)
3817    emit_insn (gen_toggle_sz ());
3818  if (live_regs_mask & (1 << PR_REG))
3819    pop (PR_REG);
3820  for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3821    {
3822      int j = (FIRST_PSEUDO_REGISTER - 1) - i;
3823      if (j < 32 && (live_regs_mask & (1 << j)) && j != PR_REG)
3824	pop (j);
3825      else if (j >= 32 && (live_regs_mask2 & (1 << (j - 32))))
3826	pop (j);
3827    }
3828  if (target_flags != save_flags)
3829    emit_insn (gen_toggle_sz ());
3830  target_flags = save_flags;
3831
3832  output_stack_adjust (extra_push + current_function_pretend_args_size,
3833		       stack_pointer_rtx, 7);
3834
3835  /* Switch back to the normal stack if necessary.  */
3836  if (sp_switch)
3837    emit_insn (gen_sp_switch_2 ());
3838}
3839
3840/* Clear variables at function end.  */
3841
3842void
3843function_epilogue (stream, size)
3844     FILE *stream;
3845     int size;
3846{
3847  trap_exit = pragma_interrupt = pragma_trapa = pragma_nosave_low_regs = 0;
3848  sp_switch = NULL_RTX;
3849}
3850
3851rtx
3852sh_builtin_saveregs (arglist)
3853     tree arglist;
3854{
3855  tree fntype = TREE_TYPE (current_function_decl);
3856  /* First unnamed integer register.  */
3857  int first_intreg = current_function_args_info.arg_count[(int) SH_ARG_INT];
3858  /* Number of integer registers we need to save.  */
3859  int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
3860  /* First unnamed SFmode float reg */
3861  int first_floatreg = current_function_args_info.arg_count[(int) SH_ARG_FLOAT];
3862  /* Number of SFmode float regs to save.  */
3863  int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
3864  int ptrsize = GET_MODE_SIZE (Pmode);
3865  rtx valist, regbuf, fpregs;
3866  int bufsize, regno;
3867
3868  /* Allocate block of memory for the regs. */
3869  /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
3870     Or can assign_stack_local accept a 0 SIZE argument?  */
3871  bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
3872
3873  regbuf = assign_stack_local (BLKmode, bufsize, 0);
3874  MEM_SET_IN_STRUCT_P (regbuf, 1);
3875
3876  /* Save int args.
3877     This is optimized to only save the regs that are necessary.  Explicitly
3878     named args need not be saved.  */
3879  if (n_intregs > 0)
3880    move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
3881			 gen_rtx (MEM, BLKmode,
3882			 	plus_constant (XEXP (regbuf, 0),
3883					n_floatregs * UNITS_PER_WORD)),
3884			 n_intregs, n_intregs * UNITS_PER_WORD);
3885
3886  /* Save float args.
3887     This is optimized to only save the regs that are necessary.  Explicitly
3888     named args need not be saved.
3889     We explicitly build a pointer to the buffer because it halves the insn
3890     count when not optimizing (otherwise the pointer is built for each reg
3891     saved).
3892     We emit the moves in reverse order so that we can use predecrement.  */
3893
3894  fpregs = gen_reg_rtx (Pmode);
3895  emit_move_insn (fpregs, XEXP (regbuf, 0));
3896  emit_insn (gen_addsi3 (fpregs, fpregs,
3897			 GEN_INT (n_floatregs * UNITS_PER_WORD)));
3898  if (TARGET_SH4)
3899    {
3900      for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
3901	{
3902	  emit_insn (gen_addsi3 (fpregs, fpregs,
3903				 GEN_INT (-2 * UNITS_PER_WORD)));
3904	  emit_move_insn (gen_rtx (MEM, DFmode, fpregs),
3905			  gen_rtx (REG, DFmode, BASE_ARG_REG (DFmode) + regno));
3906	}
3907      regno = first_floatreg;
3908      if (regno & 1)
3909	{
3910	  emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (- UNITS_PER_WORD)));
3911	  emit_move_insn (gen_rtx (MEM, SFmode, fpregs),
3912			  gen_rtx (REG, SFmode, BASE_ARG_REG (SFmode) + regno
3913						- (TARGET_LITTLE_ENDIAN != 0)));
3914	}
3915    }
3916  else
3917    for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
3918      {
3919	emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (- UNITS_PER_WORD)));
3920	emit_move_insn (gen_rtx (MEM, SFmode, fpregs),
3921			gen_rtx (REG, SFmode, BASE_ARG_REG (SFmode) + regno));
3922      }
3923
3924  /* Return the address of the regbuf.  */
3925  return XEXP (regbuf, 0);
3926}
3927
3928/* Define the offset between two registers, one to be eliminated, and
3929   the other its replacement, at the start of a routine.  */
3930
3931int
3932initial_elimination_offset (from, to)
3933     int from;
3934     int to;
3935{
3936  int regs_saved;
3937  int total_saved_regs_space;
3938  int total_auto_space = get_frame_size ();
3939  int save_flags = target_flags;
3940
3941  int live_regs_mask, live_regs_mask2;
3942  live_regs_mask = calc_live_regs (&regs_saved, &live_regs_mask2);
3943  if (TARGET_ALIGN_DOUBLE && regs_saved & 1)
3944    total_auto_space += 4;
3945  target_flags = save_flags;
3946
3947  total_saved_regs_space = (regs_saved) * 4;
3948
3949  if (from == ARG_POINTER_REGNUM && to == FRAME_POINTER_REGNUM)
3950    return total_saved_regs_space + total_auto_space;
3951
3952  if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
3953    return total_saved_regs_space + total_auto_space;
3954
3955  /* Initial gap between fp and sp is 0.  */
3956  if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
3957    return 0;
3958
3959  if (from == RETURN_ADDRESS_POINTER_REGNUM
3960      && (to == FRAME_POINTER_REGNUM || to == STACK_POINTER_REGNUM))
3961    {
3962      int i, n = total_saved_regs_space;
3963      for (i = PR_REG-1; i >= 0; i--)
3964	if (live_regs_mask & (1 << i))
3965	  n -= 4;
3966      return n + total_auto_space;
3967    }
3968
3969  abort ();
3970}
3971
3972/* Handle machine specific pragmas to be semi-compatible with Hitachi
3973   compiler.  */
3974
3975int
3976sh_handle_pragma (p_getc, p_ungetc, pname)
3977     int (*  p_getc)   PROTO((void));
3978     void (* p_ungetc) PROTO((int));
3979     char *  pname;
3980{
3981  int retval = 0;
3982
3983  if (strcmp (pname, "interrupt") == 0)
3984    pragma_interrupt = retval = 1;
3985  else if (strcmp (pname, "trapa") == 0)
3986    pragma_interrupt = pragma_trapa = retval = 1;
3987  else if (strcmp (pname, "nosave_low_regs") == 0)
3988    pragma_nosave_low_regs = retval = 1;
3989
3990  return retval;
3991}
3992
3993/* Generate 'handle_interrupt' attribute for decls */
3994
3995void
3996sh_pragma_insert_attributes (node, attributes, prefix)
3997     tree node;
3998     tree * attributes;
3999     tree * prefix;
4000{
4001  tree a;
4002
4003  if (! pragma_interrupt
4004      || TREE_CODE (node) != FUNCTION_DECL)
4005    return;
4006
4007  /* We are only interested in fields.  */
4008  if (TREE_CODE_CLASS (TREE_CODE (node)) != 'd')
4009    return;
4010
4011  /* Add a 'handle_interrupt' attribute.  */
4012  * attributes = tree_cons (get_identifier ("interrupt_handler"), NULL, * attributes);
4013
4014  return;
4015}
4016
4017/* Return nonzero if ATTR is a valid attribute for DECL.
4018   ATTRIBUTES are any existing attributes and ARGS are the arguments
4019   supplied with ATTR.
4020
4021   Supported attributes:
4022
4023   interrupt_handler -- specifies this function is an interrupt handler.
4024
4025   sp_switch -- specifies an alternate stack for an interrupt handler
4026   to run on.
4027
4028   trap_exit -- use a trapa to exit an interrupt function instead of
4029   an rte instruction.  */
4030
4031int
4032sh_valid_machine_decl_attribute (decl, attributes, attr, args)
4033     tree decl;
4034     tree attributes;
4035     tree attr;
4036     tree args;
4037{
4038  int retval = 0;
4039
4040  if (TREE_CODE (decl) != FUNCTION_DECL)
4041    return 0;
4042
4043  if (is_attribute_p ("interrupt_handler", attr))
4044    {
4045      return 1;
4046    }
4047
4048  if (is_attribute_p ("sp_switch", attr))
4049    {
4050      /* The sp_switch attribute only has meaning for interrupt functions.  */
4051      if (!pragma_interrupt)
4052	return 0;
4053
4054      /* sp_switch must have an argument.  */
4055      if (!args || TREE_CODE (args) != TREE_LIST)
4056	return 0;
4057
4058      /* The argument must be a constant string.  */
4059      if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
4060	return 0;
4061
4062      sp_switch = gen_rtx (SYMBOL_REF, VOIDmode,
4063			   TREE_STRING_POINTER (TREE_VALUE (args)));
4064      return 1;
4065    }
4066
4067  if (is_attribute_p ("trap_exit", attr))
4068    {
4069      /* The trap_exit attribute only has meaning for interrupt functions.  */
4070      if (!pragma_interrupt)
4071	return 0;
4072
4073      /* trap_exit must have an argument.  */
4074      if (!args || TREE_CODE (args) != TREE_LIST)
4075	return 0;
4076
4077      /* The argument must be a constant integer.  */
4078      if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
4079	return 0;
4080
4081      trap_exit = TREE_INT_CST_LOW (TREE_VALUE (args));
4082      return 1;
4083    }
4084}
4085
4086
4087/* Predicates used by the templates.  */
4088
4089/* Returns 1 if OP is MACL, MACH or PR.  The input must be a REG rtx.
4090   Used only in general_movsrc_operand.  */
4091
4092int
4093system_reg_operand (op, mode)
4094     rtx op;
4095     enum machine_mode mode;
4096{
4097  switch (REGNO (op))
4098    {
4099    case PR_REG:
4100    case MACL_REG:
4101    case MACH_REG:
4102      return 1;
4103    }
4104  return 0;
4105}
4106
4107/* Returns 1 if OP can be source of a simple move operation.
4108   Same as general_operand, but a LABEL_REF is valid, PRE_DEC is
4109   invalid as are subregs of system registers.  */
4110
4111int
4112general_movsrc_operand (op, mode)
4113     rtx op;
4114     enum machine_mode mode;
4115{
4116  if (GET_CODE (op) == MEM)
4117    {
4118      rtx inside = XEXP (op, 0);
4119      if (GET_CODE (inside) == CONST)
4120	inside = XEXP (inside, 0);
4121
4122      if (GET_CODE (inside) == LABEL_REF)
4123	return 1;
4124
4125      if (GET_CODE (inside) == PLUS
4126	  && GET_CODE (XEXP (inside, 0)) == LABEL_REF
4127	  && GET_CODE (XEXP (inside, 1)) == CONST_INT)
4128	return 1;
4129
4130      /* Only post inc allowed.  */
4131      if (GET_CODE (inside) == PRE_DEC)
4132	return 0;
4133    }
4134
4135  if ((mode == QImode || mode == HImode)
4136      && (GET_CODE (op) == SUBREG
4137	  && GET_CODE (XEXP (op, 0)) == REG
4138	  && system_reg_operand (XEXP (op, 0), mode)))
4139    return 0;
4140
4141  return general_operand (op, mode);
4142}
4143
4144/* Returns 1 if OP can be a destination of a move.
4145   Same as general_operand, but no preinc allowed.  */
4146
4147int
4148general_movdst_operand (op, mode)
4149     rtx op;
4150     enum machine_mode mode;
4151{
4152  /* Only pre dec allowed.  */
4153  if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == POST_INC)
4154    return 0;
4155
4156  return general_operand (op, mode);
4157}
4158
4159/* Returns 1 if OP is a normal arithmetic register.  */
4160
4161int
4162arith_reg_operand (op, mode)
4163     rtx op;
4164     enum machine_mode mode;
4165{
4166  if (register_operand (op, mode))
4167    {
4168      int regno;
4169
4170      if (GET_CODE (op) == REG)
4171	regno = REGNO (op);
4172      else if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == REG)
4173	regno = REGNO (SUBREG_REG (op));
4174      else
4175	return 1;
4176
4177      return (regno != T_REG && regno != PR_REG
4178	      && (regno != FPUL_REG || TARGET_SH4)
4179	      && regno != MACH_REG && regno != MACL_REG);
4180    }
4181  return 0;
4182}
4183
4184int
4185fp_arith_reg_operand (op, mode)
4186     rtx op;
4187     enum machine_mode mode;
4188{
4189  if (register_operand (op, mode))
4190    {
4191      int regno;
4192
4193      if (GET_CODE (op) == REG)
4194	regno = REGNO (op);
4195      else if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == REG)
4196	regno = REGNO (SUBREG_REG (op));
4197      else
4198	return 1;
4199
4200      return (regno >= FIRST_PSEUDO_REGISTER
4201	      || (regno >= FIRST_FP_REG && regno <= LAST_FP_REG));
4202    }
4203  return 0;
4204}
4205
4206int
4207fp_extended_operand (op, mode)
4208     rtx op;
4209     enum machine_mode mode;
4210{
4211  if (GET_CODE (op) == FLOAT_EXTEND && GET_MODE (op) == mode)
4212    {
4213      op = XEXP (op, 0);
4214      mode = GET_MODE (op);
4215    }
4216  if (register_operand (op, mode))
4217    {
4218      int regno;
4219
4220      if (GET_CODE (op) == REG)
4221	regno = REGNO (op);
4222      else if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == REG)
4223	regno = REGNO (SUBREG_REG (op));
4224      else
4225	return 1;
4226
4227      return (regno != T_REG && regno != PR_REG && regno > 15
4228	      && regno != MACH_REG && regno != MACL_REG);
4229    }
4230  return 0;
4231}
4232
4233/* Returns 1 if OP is a valid source operand for an arithmetic insn.  */
4234
4235int
4236arith_operand (op, mode)
4237     rtx op;
4238     enum machine_mode mode;
4239{
4240  if (arith_reg_operand (op, mode))
4241    return 1;
4242
4243  if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_I (INTVAL (op)))
4244    return 1;
4245
4246  return 0;
4247}
4248
4249/* Returns 1 if OP is a valid source operand for a compare insn.  */
4250
4251int
4252arith_reg_or_0_operand (op, mode)
4253     rtx op;
4254     enum machine_mode mode;
4255{
4256  if (arith_reg_operand (op, mode))
4257    return 1;
4258
4259  if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_N (INTVAL (op)))
4260    return 1;
4261
4262  return 0;
4263}
4264
4265/* Returns 1 if OP is a valid source operand for a logical operation.  */
4266
4267int
4268logical_operand (op, mode)
4269     rtx op;
4270     enum machine_mode mode;
4271{
4272  if (arith_reg_operand (op, mode))
4273    return 1;
4274
4275  if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_L (INTVAL (op)))
4276    return 1;
4277
4278  return 0;
4279}
4280
4281/* Nonzero if OP is a floating point value with value 0.0.  */
4282
4283int
4284fp_zero_operand (op)
4285     rtx op;
4286{
4287  REAL_VALUE_TYPE r;
4288
4289  if (GET_MODE (op) != SFmode)
4290    return 0;
4291
4292  REAL_VALUE_FROM_CONST_DOUBLE (r, op);
4293  return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r);
4294}
4295
4296/* Nonzero if OP is a floating point value with value 1.0.  */
4297
4298int
4299fp_one_operand (op)
4300     rtx op;
4301{
4302  REAL_VALUE_TYPE r;
4303
4304  if (GET_MODE (op) != SFmode)
4305    return 0;
4306
4307  REAL_VALUE_FROM_CONST_DOUBLE (r, op);
4308  return REAL_VALUES_EQUAL (r, dconst1);
4309}
4310
4311int
4312tertiary_reload_operand (op, mode)
4313     rtx op;
4314     enum machine_mode mode;
4315{
4316  enum rtx_code code = GET_CODE (op);
4317  return code == MEM || (TARGET_SH4 && code == CONST_DOUBLE);
4318}
4319
4320int
4321fpscr_operand (op)
4322     rtx op;
4323{
4324  return (GET_CODE (op) == REG && REGNO (op) == FPSCR_REG
4325	  && GET_MODE (op) == PSImode);
4326}
4327
4328int
4329commutative_float_operator (op, mode)
4330     rtx op;
4331     enum machine_mode mode;
4332{
4333  if (GET_MODE (op) != mode)
4334    return 0;
4335  switch (GET_CODE (op))
4336    {
4337    case PLUS:
4338    case MULT:
4339      return 1;
4340    }
4341  return 0;
4342}
4343
4344int
4345noncommutative_float_operator (op, mode)
4346     rtx op;
4347     enum machine_mode mode;
4348{
4349  if (GET_MODE (op) != mode)
4350    return 0;
4351  switch (GET_CODE (op))
4352    {
4353    case MINUS:
4354    case DIV:
4355      return 1;
4356    }
4357  return 0;
4358}
4359
4360int
4361binary_float_operator (op, mode)
4362     rtx op;
4363     enum machine_mode mode;
4364{
4365  if (GET_MODE (op) != mode)
4366    return 0;
4367  switch (GET_CODE (op))
4368    {
4369    case PLUS:
4370    case MINUS:
4371    case MULT:
4372    case DIV:
4373      return 1;
4374    }
4375  return 0;
4376}
4377
4378/* Return the destination address of a branch.  */
4379
4380int
4381branch_dest (branch)
4382     rtx branch;
4383{
4384  rtx dest = SET_SRC (PATTERN (branch));
4385  int dest_uid;
4386
4387  if (GET_CODE (dest) == IF_THEN_ELSE)
4388    dest = XEXP (dest, 1);
4389  dest = XEXP (dest, 0);
4390  dest_uid = INSN_UID (dest);
4391  return insn_addresses[dest_uid];
4392}
4393
4394/* Return non-zero if REG is not used after INSN.
4395   We assume REG is a reload reg, and therefore does
4396   not live past labels.  It may live past calls or jumps though.  */
4397int
4398reg_unused_after (reg, insn)
4399     rtx reg;
4400     rtx insn;
4401{
4402  enum rtx_code code;
4403  rtx set;
4404
4405  /* If the reg is set by this instruction, then it is safe for our
4406     case.  Disregard the case where this is a store to memory, since
4407     we are checking a register used in the store address.  */
4408  set = single_set (insn);
4409  if (set && GET_CODE (SET_DEST (set)) != MEM
4410      && reg_overlap_mentioned_p (reg, SET_DEST (set)))
4411    return 1;
4412
4413  while (insn = NEXT_INSN (insn))
4414    {
4415      code = GET_CODE (insn);
4416
4417#if 0
4418      /* If this is a label that existed before reload, then the register
4419	 if dead here.  However, if this is a label added by reorg, then
4420	 the register may still be live here.  We can't tell the difference,
4421	 so we just ignore labels completely.  */
4422      if (code == CODE_LABEL)
4423	return 1;
4424      /* else */
4425#endif
4426
4427      if (code == JUMP_INSN)
4428	return 0;
4429
4430      /* If this is a sequence, we must handle them all at once.
4431	 We could have for instance a call that sets the target register,
4432	 and a insn in a delay slot that uses the register.  In this case,
4433	 we must return 0.  */
4434      else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
4435	{
4436	  int i;
4437	  int retval = 0;
4438
4439	  for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
4440	    {
4441	      rtx this_insn = XVECEXP (PATTERN (insn), 0, i);
4442	      rtx set = single_set (this_insn);
4443
4444	      if (GET_CODE (this_insn) == CALL_INSN)
4445		code = CALL_INSN;
4446	      else if (GET_CODE (this_insn) == JUMP_INSN)
4447		{
4448		  if (INSN_ANNULLED_BRANCH_P (this_insn))
4449		    return 0;
4450		  code = JUMP_INSN;
4451		}
4452
4453	      if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
4454		return 0;
4455	      if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
4456		{
4457		  if (GET_CODE (SET_DEST (set)) != MEM)
4458		    retval = 1;
4459		  else
4460		    return 0;
4461		}
4462	      if (set == 0
4463		  && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
4464		return 0;
4465	    }
4466	  if (retval == 1)
4467	    return 1;
4468	  else if (code == JUMP_INSN)
4469	    return 0;
4470	}
4471      else if (GET_RTX_CLASS (code) == 'i')
4472	{
4473	  rtx set = single_set (insn);
4474
4475	  if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
4476	    return 0;
4477	  if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
4478	    return GET_CODE (SET_DEST (set)) != MEM;
4479	  if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
4480	    return 0;
4481	}
4482
4483      if (code == CALL_INSN && call_used_regs[REGNO (reg)])
4484	return 1;
4485    }
4486  return 1;
4487}
4488
4489extern struct obstack permanent_obstack;
4490
4491rtx
4492get_fpscr_rtx ()
4493{
4494  static rtx fpscr_rtx;
4495
4496  if (! fpscr_rtx)
4497    {
4498      push_obstacks (&permanent_obstack, &permanent_obstack);
4499      fpscr_rtx = gen_rtx (REG, PSImode, 48);
4500      REG_USERVAR_P (fpscr_rtx) = 1;
4501      pop_obstacks ();
4502      mark_user_reg (fpscr_rtx);
4503    }
4504  if (! reload_completed || mdep_reorg_phase != SH_AFTER_MDEP_REORG)
4505    mark_user_reg (fpscr_rtx);
4506  return fpscr_rtx;
4507}
4508
4509void
4510emit_sf_insn (pat)
4511     rtx pat;
4512{
4513  rtx addr;
4514  /* When generating reload insns,  we must not create new registers.  FPSCR
4515     should already have the correct value, so do nothing to change it.  */
4516  if (! TARGET_FPU_SINGLE && ! reload_in_progress)
4517    {
4518      addr = gen_reg_rtx (SImode);
4519      emit_insn (gen_fpu_switch0 (addr));
4520    }
4521  emit_insn (pat);
4522  if (! TARGET_FPU_SINGLE && ! reload_in_progress)
4523    {
4524      addr = gen_reg_rtx (SImode);
4525      emit_insn (gen_fpu_switch1 (addr));
4526    }
4527}
4528
4529void
4530emit_df_insn (pat)
4531     rtx pat;
4532{
4533  rtx addr;
4534  if (TARGET_FPU_SINGLE && ! reload_in_progress)
4535    {
4536      addr = gen_reg_rtx (SImode);
4537      emit_insn (gen_fpu_switch0 (addr));
4538    }
4539  emit_insn (pat);
4540  if (TARGET_FPU_SINGLE && ! reload_in_progress)
4541    {
4542      addr = gen_reg_rtx (SImode);
4543      emit_insn (gen_fpu_switch1 (addr));
4544    }
4545}
4546
4547void
4548expand_sf_unop (fun, operands)
4549     rtx (*fun)();
4550     rtx *operands;
4551{
4552  emit_sf_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
4553}
4554
4555void
4556expand_sf_binop (fun, operands)
4557     rtx (*fun)();
4558     rtx *operands;
4559{
4560  emit_sf_insn ((*fun) (operands[0], operands[1], operands[2],
4561			 get_fpscr_rtx ()));
4562}
4563
4564void
4565expand_df_unop (fun, operands)
4566     rtx (*fun)();
4567     rtx *operands;
4568{
4569  emit_df_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
4570}
4571
4572void
4573expand_df_binop (fun, operands)
4574     rtx (*fun)();
4575     rtx *operands;
4576{
4577  emit_df_insn ((*fun) (operands[0], operands[1], operands[2],
4578			 get_fpscr_rtx ()));
4579}
4580
4581void
4582expand_fp_branch (compare, branch)
4583     rtx (*compare) (), (*branch) ();
4584{
4585  (GET_MODE (sh_compare_op0)  == SFmode ? emit_sf_insn : emit_df_insn)
4586    ((*compare) ());
4587  emit_jump_insn ((*branch) ());
4588}
4589
4590/* We don't want to make fpscr call-saved, because that would prevent
4591   channging it, and it would also cost an exstra instruction to save it.
4592   We don't want it to be known as a global register either, because
4593   that disables all flow analysis.  But it has to be live at the function
4594   return.  Thus, we need to insert a USE at the end of the function.  */
4595/* This should best be called at about the time FINALIZE_PIC is called,
4596   but not dependent on flag_pic.  Alas, there is no suitable hook there,
4597   so this gets called from HAVE_RETURN.  */
4598int
4599emit_fpscr_use ()
4600{
4601  static int fpscr_uses = 0;
4602
4603  if (rtx_equal_function_value_matters)
4604    {
4605      emit_insn (gen_rtx (USE, VOIDmode, get_fpscr_rtx ()));
4606      fpscr_uses++;
4607    }
4608  else
4609    {
4610      if (fpscr_uses > 1)
4611	{
4612	  /* Due to he crude way we emit the USEs, we might end up with
4613	     some extra ones.  Delete all but the last one.  */
4614	  rtx insn;
4615
4616	  for (insn = get_last_insn(); insn; insn = PREV_INSN (insn))
4617	    if (GET_CODE (insn) == INSN
4618		&& GET_CODE (PATTERN (insn)) == USE
4619		&& GET_CODE (XEXP (PATTERN (insn), 0)) == REG
4620		&& REGNO (XEXP (PATTERN (insn), 0)) == FPSCR_REG)
4621	      {
4622		insn = PREV_INSN (insn);
4623		break;
4624	      }
4625	  for (; insn; insn = PREV_INSN (insn))
4626	    if (GET_CODE (insn) == INSN
4627		&& GET_CODE (PATTERN (insn)) == USE
4628		&& GET_CODE (XEXP (PATTERN (insn), 0)) == REG
4629		&& REGNO (XEXP (PATTERN (insn), 0)) == FPSCR_REG)
4630	      {
4631		PUT_CODE (insn, NOTE);
4632		NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED;
4633		NOTE_SOURCE_FILE (insn) = 0;
4634	      }
4635	}
4636      fpscr_uses = 0;
4637    }
4638}
4639
4640/* ??? gcc does flow analysis strictly after common subexpression
4641   elimination.  As a result, common subespression elimination fails
4642   when there are some intervening statements setting the same register.
4643   If we did nothing about this, this would hurt the precision switching
4644   for SH4 badly.  There is some cse after reload, but it is unable to
4645   undo the extra register pressure from the unused instructions, and
4646   it cannot remove auto-increment loads.
4647
4648   A C code example that shows this flow/cse weakness for (at least) SH
4649   and sparc (as of gcc ss-970706) is this:
4650
4651double
4652f(double a)
4653{
4654  double d;
4655  d = 0.1;
4656  a += d;
4657  d = 1.1;
4658  d = 0.1;
4659  a *= d;
4660  return a;
4661}
4662
4663   So we add another pass before common subexpression elimination, to
4664   remove assignments that are dead due to a following assignment in the
4665   same basic block.  */
4666
4667int sh_flag_remove_dead_before_cse;
4668
4669static void
4670mark_use (x, reg_set_block)
4671     rtx x, *reg_set_block;
4672{
4673  enum rtx_code code;
4674
4675  if (! x)
4676    return;
4677  code = GET_CODE (x);
4678  switch (code)
4679    {
4680    case REG:
4681      {
4682	int regno = REGNO (x);
4683	int nregs = (regno < FIRST_PSEUDO_REGISTER
4684		     ? HARD_REGNO_NREGS (regno, GET_MODE (x))
4685		     : 1);
4686	do
4687	  {
4688	    reg_set_block[regno + nregs - 1] = 0;
4689	  }
4690	while (--nregs);
4691	break;
4692      }
4693    case SET:
4694      {
4695	rtx dest = SET_DEST (x);
4696
4697	if (GET_CODE (dest) == SUBREG)
4698	  dest = SUBREG_REG (dest);
4699	if (GET_CODE (dest) != REG)
4700	  mark_use (dest, reg_set_block);
4701	mark_use (SET_SRC (x), reg_set_block);
4702	break;
4703      }
4704    case CLOBBER:
4705      break;
4706    default:
4707      {
4708	char *fmt = GET_RTX_FORMAT (code);
4709	int i, j;
4710	for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
4711	  {
4712	    if (fmt[i] == 'e')
4713	      mark_use (XEXP (x, i), reg_set_block);
4714	    else if (fmt[i] == 'E')
4715	      for (j = XVECLEN (x, i) - 1; j >= 0; j--)
4716		mark_use (XVECEXP (x, i, j), reg_set_block);
4717	  }
4718	break;
4719      }
4720    }
4721}
4722
4723int
4724remove_dead_before_cse ()
4725{
4726  rtx *reg_set_block, last, last_call, insn, set;
4727  int in_libcall = 0;
4728
4729  /* This pass should run just once, after rtl generation.  */
4730
4731  if (! sh_flag_remove_dead_before_cse
4732      || rtx_equal_function_value_matters
4733      || reload_completed)
4734    return;
4735
4736  sh_flag_remove_dead_before_cse = 0;
4737
4738  reg_set_block = (rtx *)alloca (max_reg_num () * sizeof (rtx));
4739  bzero ((char *)reg_set_block, max_reg_num () * sizeof (rtx));
4740  last_call = last = get_last_insn ();
4741  for (insn = last; insn; insn = PREV_INSN (insn))
4742    {
4743      if (GET_RTX_CLASS (GET_CODE (insn)) != 'i')
4744	continue;
4745      if (GET_CODE (insn) == JUMP_INSN)
4746	{
4747	  last_call = last = insn;
4748	  continue;
4749	}
4750      set = single_set (insn);
4751
4752      /* Don't delete parts of libcalls, since that would confuse cse, loop
4753	 and flow.  */
4754      if (find_reg_note (insn, REG_RETVAL, NULL_RTX))
4755	in_libcall = 1;
4756      else if (in_libcall)
4757	{
4758	  if (find_reg_note (insn, REG_LIBCALL, NULL_RTX))
4759	    in_libcall = 0;
4760	}
4761      else if (set && GET_CODE (SET_DEST (set)) == REG)
4762	{
4763	  int regno = REGNO (SET_DEST (set));
4764	  rtx ref_insn = (regno < FIRST_PSEUDO_REGISTER && call_used_regs[regno]
4765			  ? last_call
4766			  : last);
4767	  if (reg_set_block[regno] == ref_insn
4768	      && (regno >= FIRST_PSEUDO_REGISTER
4769		  || HARD_REGNO_NREGS (regno, GET_MODE (SET_DEST (set))) == 1)
4770	      && (GET_CODE (insn) != CALL_INSN || CONST_CALL_P (insn)))
4771	    {
4772	      PUT_CODE (insn, NOTE);
4773	      NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED;
4774	      NOTE_SOURCE_FILE (insn) = 0;
4775	      continue;
4776	    }
4777	  else
4778	    reg_set_block[REGNO (SET_DEST (set))] = ref_insn;
4779	}
4780      if (GET_CODE (insn) == CALL_INSN)
4781	{
4782	  last_call = insn;
4783	  mark_use (CALL_INSN_FUNCTION_USAGE (insn), reg_set_block);
4784	}
4785      mark_use (PATTERN (insn), reg_set_block);
4786    }
4787  return 0;
4788}
4789