1/* Subroutines for insn-output.c for SPARC.
2   Copyright (C) 1987-2015 Free Software Foundation, Inc.
3   Contributed by Michael Tiemann (tiemann@cygnus.com)
4   64-bit SPARC-V9 support by Michael Tiemann, Jim Wilson, and Doug Evans,
5   at Cygnus Support.
6
7This file is part of GCC.
8
9GCC is free software; you can redistribute it and/or modify
10it under the terms of the GNU General Public License as published by
11the Free Software Foundation; either version 3, or (at your option)
12any later version.
13
14GCC is distributed in the hope that it will be useful,
15but WITHOUT ANY WARRANTY; without even the implied warranty of
16MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17GNU General Public License for more details.
18
19You should have received a copy of the GNU General Public License
20along with GCC; see the file COPYING3.  If not see
21<http://www.gnu.org/licenses/>.  */
22
23#include "config.h"
24#include "system.h"
25#include "coretypes.h"
26#include "tm.h"
27#include "hash-set.h"
28#include "machmode.h"
29#include "vec.h"
30#include "double-int.h"
31#include "input.h"
32#include "alias.h"
33#include "symtab.h"
34#include "wide-int.h"
35#include "inchash.h"
36#include "tree.h"
37#include "fold-const.h"
38#include "stringpool.h"
39#include "stor-layout.h"
40#include "calls.h"
41#include "varasm.h"
42#include "rtl.h"
43#include "regs.h"
44#include "hard-reg-set.h"
45#include "insn-config.h"
46#include "insn-codes.h"
47#include "conditions.h"
48#include "output.h"
49#include "insn-attr.h"
50#include "flags.h"
51#include "function.h"
52#include "except.h"
53#include "hashtab.h"
54#include "statistics.h"
55#include "real.h"
56#include "fixed-value.h"
57#include "expmed.h"
58#include "dojump.h"
59#include "explow.h"
60#include "emit-rtl.h"
61#include "stmt.h"
62#include "expr.h"
63#include "optabs.h"
64#include "recog.h"
65#include "diagnostic-core.h"
66#include "ggc.h"
67#include "tm_p.h"
68#include "debug.h"
69#include "target.h"
70#include "target-def.h"
71#include "common/common-target.h"
72#include "hash-table.h"
73#include "predict.h"
74#include "dominance.h"
75#include "cfg.h"
76#include "cfgrtl.h"
77#include "cfganal.h"
78#include "lcm.h"
79#include "cfgbuild.h"
80#include "cfgcleanup.h"
81#include "basic-block.h"
82#include "tree-ssa-alias.h"
83#include "internal-fn.h"
84#include "gimple-fold.h"
85#include "tree-eh.h"
86#include "gimple-expr.h"
87#include "is-a.h"
88#include "gimple.h"
89#include "gimplify.h"
90#include "langhooks.h"
91#include "reload.h"
92#include "params.h"
93#include "df.h"
94#include "opts.h"
95#include "tree-pass.h"
96#include "context.h"
97#include "builtins.h"
98#include "rtl-iter.h"
99
100/* Processor costs */
101
102struct processor_costs {
103  /* Integer load */
104  const int int_load;
105
106  /* Integer signed load */
107  const int int_sload;
108
109  /* Integer zeroed load */
110  const int int_zload;
111
112  /* Float load */
113  const int float_load;
114
115  /* fmov, fneg, fabs */
116  const int float_move;
117
118  /* fadd, fsub */
119  const int float_plusminus;
120
121  /* fcmp */
122  const int float_cmp;
123
124  /* fmov, fmovr */
125  const int float_cmove;
126
127  /* fmul */
128  const int float_mul;
129
130  /* fdivs */
131  const int float_div_sf;
132
133  /* fdivd */
134  const int float_div_df;
135
136  /* fsqrts */
137  const int float_sqrt_sf;
138
139  /* fsqrtd */
140  const int float_sqrt_df;
141
142  /* umul/smul */
143  const int int_mul;
144
145  /* mulX */
146  const int int_mulX;
147
148  /* integer multiply cost for each bit set past the most
149     significant 3, so the formula for multiply cost becomes:
150
151	if (rs1 < 0)
152	  highest_bit = highest_clear_bit(rs1);
153	else
154	  highest_bit = highest_set_bit(rs1);
155	if (highest_bit < 3)
156	  highest_bit = 3;
157	cost = int_mul{,X} + ((highest_bit - 3) / int_mul_bit_factor);
158
159     A value of zero indicates that the multiply costs is fixed,
160     and not variable.  */
161  const int int_mul_bit_factor;
162
163  /* udiv/sdiv */
164  const int int_div;
165
166  /* divX */
167  const int int_divX;
168
169  /* movcc, movr */
170  const int int_cmove;
171
172  /* penalty for shifts, due to scheduling rules etc. */
173  const int shift_penalty;
174};
175
176static const
177struct processor_costs cypress_costs = {
178  COSTS_N_INSNS (2), /* int load */
179  COSTS_N_INSNS (2), /* int signed load */
180  COSTS_N_INSNS (2), /* int zeroed load */
181  COSTS_N_INSNS (2), /* float load */
182  COSTS_N_INSNS (5), /* fmov, fneg, fabs */
183  COSTS_N_INSNS (5), /* fadd, fsub */
184  COSTS_N_INSNS (1), /* fcmp */
185  COSTS_N_INSNS (1), /* fmov, fmovr */
186  COSTS_N_INSNS (7), /* fmul */
187  COSTS_N_INSNS (37), /* fdivs */
188  COSTS_N_INSNS (37), /* fdivd */
189  COSTS_N_INSNS (63), /* fsqrts */
190  COSTS_N_INSNS (63), /* fsqrtd */
191  COSTS_N_INSNS (1), /* imul */
192  COSTS_N_INSNS (1), /* imulX */
193  0, /* imul bit factor */
194  COSTS_N_INSNS (1), /* idiv */
195  COSTS_N_INSNS (1), /* idivX */
196  COSTS_N_INSNS (1), /* movcc/movr */
197  0, /* shift penalty */
198};
199
200static const
201struct processor_costs supersparc_costs = {
202  COSTS_N_INSNS (1), /* int load */
203  COSTS_N_INSNS (1), /* int signed load */
204  COSTS_N_INSNS (1), /* int zeroed load */
205  COSTS_N_INSNS (0), /* float load */
206  COSTS_N_INSNS (3), /* fmov, fneg, fabs */
207  COSTS_N_INSNS (3), /* fadd, fsub */
208  COSTS_N_INSNS (3), /* fcmp */
209  COSTS_N_INSNS (1), /* fmov, fmovr */
210  COSTS_N_INSNS (3), /* fmul */
211  COSTS_N_INSNS (6), /* fdivs */
212  COSTS_N_INSNS (9), /* fdivd */
213  COSTS_N_INSNS (12), /* fsqrts */
214  COSTS_N_INSNS (12), /* fsqrtd */
215  COSTS_N_INSNS (4), /* imul */
216  COSTS_N_INSNS (4), /* imulX */
217  0, /* imul bit factor */
218  COSTS_N_INSNS (4), /* idiv */
219  COSTS_N_INSNS (4), /* idivX */
220  COSTS_N_INSNS (1), /* movcc/movr */
221  1, /* shift penalty */
222};
223
224static const
225struct processor_costs hypersparc_costs = {
226  COSTS_N_INSNS (1), /* int load */
227  COSTS_N_INSNS (1), /* int signed load */
228  COSTS_N_INSNS (1), /* int zeroed load */
229  COSTS_N_INSNS (1), /* float load */
230  COSTS_N_INSNS (1), /* fmov, fneg, fabs */
231  COSTS_N_INSNS (1), /* fadd, fsub */
232  COSTS_N_INSNS (1), /* fcmp */
233  COSTS_N_INSNS (1), /* fmov, fmovr */
234  COSTS_N_INSNS (1), /* fmul */
235  COSTS_N_INSNS (8), /* fdivs */
236  COSTS_N_INSNS (12), /* fdivd */
237  COSTS_N_INSNS (17), /* fsqrts */
238  COSTS_N_INSNS (17), /* fsqrtd */
239  COSTS_N_INSNS (17), /* imul */
240  COSTS_N_INSNS (17), /* imulX */
241  0, /* imul bit factor */
242  COSTS_N_INSNS (17), /* idiv */
243  COSTS_N_INSNS (17), /* idivX */
244  COSTS_N_INSNS (1), /* movcc/movr */
245  0, /* shift penalty */
246};
247
248static const
249struct processor_costs leon_costs = {
250  COSTS_N_INSNS (1), /* int load */
251  COSTS_N_INSNS (1), /* int signed load */
252  COSTS_N_INSNS (1), /* int zeroed load */
253  COSTS_N_INSNS (1), /* float load */
254  COSTS_N_INSNS (1), /* fmov, fneg, fabs */
255  COSTS_N_INSNS (1), /* fadd, fsub */
256  COSTS_N_INSNS (1), /* fcmp */
257  COSTS_N_INSNS (1), /* fmov, fmovr */
258  COSTS_N_INSNS (1), /* fmul */
259  COSTS_N_INSNS (15), /* fdivs */
260  COSTS_N_INSNS (15), /* fdivd */
261  COSTS_N_INSNS (23), /* fsqrts */
262  COSTS_N_INSNS (23), /* fsqrtd */
263  COSTS_N_INSNS (5), /* imul */
264  COSTS_N_INSNS (5), /* imulX */
265  0, /* imul bit factor */
266  COSTS_N_INSNS (5), /* idiv */
267  COSTS_N_INSNS (5), /* idivX */
268  COSTS_N_INSNS (1), /* movcc/movr */
269  0, /* shift penalty */
270};
271
272static const
273struct processor_costs leon3_costs = {
274  COSTS_N_INSNS (1), /* int load */
275  COSTS_N_INSNS (1), /* int signed load */
276  COSTS_N_INSNS (1), /* int zeroed load */
277  COSTS_N_INSNS (1), /* float load */
278  COSTS_N_INSNS (1), /* fmov, fneg, fabs */
279  COSTS_N_INSNS (1), /* fadd, fsub */
280  COSTS_N_INSNS (1), /* fcmp */
281  COSTS_N_INSNS (1), /* fmov, fmovr */
282  COSTS_N_INSNS (1), /* fmul */
283  COSTS_N_INSNS (14), /* fdivs */
284  COSTS_N_INSNS (15), /* fdivd */
285  COSTS_N_INSNS (22), /* fsqrts */
286  COSTS_N_INSNS (23), /* fsqrtd */
287  COSTS_N_INSNS (5), /* imul */
288  COSTS_N_INSNS (5), /* imulX */
289  0, /* imul bit factor */
290  COSTS_N_INSNS (35), /* idiv */
291  COSTS_N_INSNS (35), /* idivX */
292  COSTS_N_INSNS (1), /* movcc/movr */
293  0, /* shift penalty */
294};
295
296static const
297struct processor_costs sparclet_costs = {
298  COSTS_N_INSNS (3), /* int load */
299  COSTS_N_INSNS (3), /* int signed load */
300  COSTS_N_INSNS (1), /* int zeroed load */
301  COSTS_N_INSNS (1), /* float load */
302  COSTS_N_INSNS (1), /* fmov, fneg, fabs */
303  COSTS_N_INSNS (1), /* fadd, fsub */
304  COSTS_N_INSNS (1), /* fcmp */
305  COSTS_N_INSNS (1), /* fmov, fmovr */
306  COSTS_N_INSNS (1), /* fmul */
307  COSTS_N_INSNS (1), /* fdivs */
308  COSTS_N_INSNS (1), /* fdivd */
309  COSTS_N_INSNS (1), /* fsqrts */
310  COSTS_N_INSNS (1), /* fsqrtd */
311  COSTS_N_INSNS (5), /* imul */
312  COSTS_N_INSNS (5), /* imulX */
313  0, /* imul bit factor */
314  COSTS_N_INSNS (5), /* idiv */
315  COSTS_N_INSNS (5), /* idivX */
316  COSTS_N_INSNS (1), /* movcc/movr */
317  0, /* shift penalty */
318};
319
320static const
321struct processor_costs ultrasparc_costs = {
322  COSTS_N_INSNS (2), /* int load */
323  COSTS_N_INSNS (3), /* int signed load */
324  COSTS_N_INSNS (2), /* int zeroed load */
325  COSTS_N_INSNS (2), /* float load */
326  COSTS_N_INSNS (1), /* fmov, fneg, fabs */
327  COSTS_N_INSNS (4), /* fadd, fsub */
328  COSTS_N_INSNS (1), /* fcmp */
329  COSTS_N_INSNS (2), /* fmov, fmovr */
330  COSTS_N_INSNS (4), /* fmul */
331  COSTS_N_INSNS (13), /* fdivs */
332  COSTS_N_INSNS (23), /* fdivd */
333  COSTS_N_INSNS (13), /* fsqrts */
334  COSTS_N_INSNS (23), /* fsqrtd */
335  COSTS_N_INSNS (4), /* imul */
336  COSTS_N_INSNS (4), /* imulX */
337  2, /* imul bit factor */
338  COSTS_N_INSNS (37), /* idiv */
339  COSTS_N_INSNS (68), /* idivX */
340  COSTS_N_INSNS (2), /* movcc/movr */
341  2, /* shift penalty */
342};
343
344static const
345struct processor_costs ultrasparc3_costs = {
346  COSTS_N_INSNS (2), /* int load */
347  COSTS_N_INSNS (3), /* int signed load */
348  COSTS_N_INSNS (3), /* int zeroed load */
349  COSTS_N_INSNS (2), /* float load */
350  COSTS_N_INSNS (3), /* fmov, fneg, fabs */
351  COSTS_N_INSNS (4), /* fadd, fsub */
352  COSTS_N_INSNS (5), /* fcmp */
353  COSTS_N_INSNS (3), /* fmov, fmovr */
354  COSTS_N_INSNS (4), /* fmul */
355  COSTS_N_INSNS (17), /* fdivs */
356  COSTS_N_INSNS (20), /* fdivd */
357  COSTS_N_INSNS (20), /* fsqrts */
358  COSTS_N_INSNS (29), /* fsqrtd */
359  COSTS_N_INSNS (6), /* imul */
360  COSTS_N_INSNS (6), /* imulX */
361  0, /* imul bit factor */
362  COSTS_N_INSNS (40), /* idiv */
363  COSTS_N_INSNS (71), /* idivX */
364  COSTS_N_INSNS (2), /* movcc/movr */
365  0, /* shift penalty */
366};
367
368static const
369struct processor_costs niagara_costs = {
370  COSTS_N_INSNS (3), /* int load */
371  COSTS_N_INSNS (3), /* int signed load */
372  COSTS_N_INSNS (3), /* int zeroed load */
373  COSTS_N_INSNS (9), /* float load */
374  COSTS_N_INSNS (8), /* fmov, fneg, fabs */
375  COSTS_N_INSNS (8), /* fadd, fsub */
376  COSTS_N_INSNS (26), /* fcmp */
377  COSTS_N_INSNS (8), /* fmov, fmovr */
378  COSTS_N_INSNS (29), /* fmul */
379  COSTS_N_INSNS (54), /* fdivs */
380  COSTS_N_INSNS (83), /* fdivd */
381  COSTS_N_INSNS (100), /* fsqrts - not implemented in hardware */
382  COSTS_N_INSNS (100), /* fsqrtd - not implemented in hardware */
383  COSTS_N_INSNS (11), /* imul */
384  COSTS_N_INSNS (11), /* imulX */
385  0, /* imul bit factor */
386  COSTS_N_INSNS (72), /* idiv */
387  COSTS_N_INSNS (72), /* idivX */
388  COSTS_N_INSNS (1), /* movcc/movr */
389  0, /* shift penalty */
390};
391
392static const
393struct processor_costs niagara2_costs = {
394  COSTS_N_INSNS (3), /* int load */
395  COSTS_N_INSNS (3), /* int signed load */
396  COSTS_N_INSNS (3), /* int zeroed load */
397  COSTS_N_INSNS (3), /* float load */
398  COSTS_N_INSNS (6), /* fmov, fneg, fabs */
399  COSTS_N_INSNS (6), /* fadd, fsub */
400  COSTS_N_INSNS (6), /* fcmp */
401  COSTS_N_INSNS (6), /* fmov, fmovr */
402  COSTS_N_INSNS (6), /* fmul */
403  COSTS_N_INSNS (19), /* fdivs */
404  COSTS_N_INSNS (33), /* fdivd */
405  COSTS_N_INSNS (19), /* fsqrts */
406  COSTS_N_INSNS (33), /* fsqrtd */
407  COSTS_N_INSNS (5), /* imul */
408  COSTS_N_INSNS (5), /* imulX */
409  0, /* imul bit factor */
410  COSTS_N_INSNS (26), /* idiv, average of 12 - 41 cycle range */
411  COSTS_N_INSNS (26), /* idivX, average of 12 - 41 cycle range */
412  COSTS_N_INSNS (1), /* movcc/movr */
413  0, /* shift penalty */
414};
415
416static const
417struct processor_costs niagara3_costs = {
418  COSTS_N_INSNS (3), /* int load */
419  COSTS_N_INSNS (3), /* int signed load */
420  COSTS_N_INSNS (3), /* int zeroed load */
421  COSTS_N_INSNS (3), /* float load */
422  COSTS_N_INSNS (9), /* fmov, fneg, fabs */
423  COSTS_N_INSNS (9), /* fadd, fsub */
424  COSTS_N_INSNS (9), /* fcmp */
425  COSTS_N_INSNS (9), /* fmov, fmovr */
426  COSTS_N_INSNS (9), /* fmul */
427  COSTS_N_INSNS (23), /* fdivs */
428  COSTS_N_INSNS (37), /* fdivd */
429  COSTS_N_INSNS (23), /* fsqrts */
430  COSTS_N_INSNS (37), /* fsqrtd */
431  COSTS_N_INSNS (9), /* imul */
432  COSTS_N_INSNS (9), /* imulX */
433  0, /* imul bit factor */
434  COSTS_N_INSNS (31), /* idiv, average of 17 - 45 cycle range */
435  COSTS_N_INSNS (30), /* idivX, average of 16 - 44 cycle range */
436  COSTS_N_INSNS (1), /* movcc/movr */
437  0, /* shift penalty */
438};
439
440static const
441struct processor_costs niagara4_costs = {
442  COSTS_N_INSNS (5), /* int load */
443  COSTS_N_INSNS (5), /* int signed load */
444  COSTS_N_INSNS (5), /* int zeroed load */
445  COSTS_N_INSNS (5), /* float load */
446  COSTS_N_INSNS (11), /* fmov, fneg, fabs */
447  COSTS_N_INSNS (11), /* fadd, fsub */
448  COSTS_N_INSNS (11), /* fcmp */
449  COSTS_N_INSNS (11), /* fmov, fmovr */
450  COSTS_N_INSNS (11), /* fmul */
451  COSTS_N_INSNS (24), /* fdivs */
452  COSTS_N_INSNS (37), /* fdivd */
453  COSTS_N_INSNS (24), /* fsqrts */
454  COSTS_N_INSNS (37), /* fsqrtd */
455  COSTS_N_INSNS (12), /* imul */
456  COSTS_N_INSNS (12), /* imulX */
457  0, /* imul bit factor */
458  COSTS_N_INSNS (50), /* idiv, average of 41 - 60 cycle range */
459  COSTS_N_INSNS (35), /* idivX, average of 26 - 44 cycle range */
460  COSTS_N_INSNS (1), /* movcc/movr */
461  0, /* shift penalty */
462};
463
464static const struct processor_costs *sparc_costs = &cypress_costs;
465
466#ifdef HAVE_AS_RELAX_OPTION
467/* If 'as' and 'ld' are relaxing tail call insns into branch always, use
468   "or %o7,%g0,X; call Y; or X,%g0,%o7" always, so that it can be optimized.
469   With sethi/jmp, neither 'as' nor 'ld' has an easy way how to find out if
470   somebody does not branch between the sethi and jmp.  */
471#define LEAF_SIBCALL_SLOT_RESERVED_P 1
472#else
473#define LEAF_SIBCALL_SLOT_RESERVED_P \
474  ((TARGET_ARCH64 && !TARGET_CM_MEDLOW) || flag_pic)
475#endif
476
477/* Vector to say how input registers are mapped to output registers.
478   HARD_FRAME_POINTER_REGNUM cannot be remapped by this function to
479   eliminate it.  You must use -fomit-frame-pointer to get that.  */
480char leaf_reg_remap[] =
481{ 0, 1, 2, 3, 4, 5, 6, 7,
482  -1, -1, -1, -1, -1, -1, 14, -1,
483  -1, -1, -1, -1, -1, -1, -1, -1,
484  8, 9, 10, 11, 12, 13, -1, 15,
485
486  32, 33, 34, 35, 36, 37, 38, 39,
487  40, 41, 42, 43, 44, 45, 46, 47,
488  48, 49, 50, 51, 52, 53, 54, 55,
489  56, 57, 58, 59, 60, 61, 62, 63,
490  64, 65, 66, 67, 68, 69, 70, 71,
491  72, 73, 74, 75, 76, 77, 78, 79,
492  80, 81, 82, 83, 84, 85, 86, 87,
493  88, 89, 90, 91, 92, 93, 94, 95,
494  96, 97, 98, 99, 100, 101, 102};
495
496/* Vector, indexed by hard register number, which contains 1
497   for a register that is allowable in a candidate for leaf
498   function treatment.  */
499char sparc_leaf_regs[] =
500{ 1, 1, 1, 1, 1, 1, 1, 1,
501  0, 0, 0, 0, 0, 0, 1, 0,
502  0, 0, 0, 0, 0, 0, 0, 0,
503  1, 1, 1, 1, 1, 1, 0, 1,
504  1, 1, 1, 1, 1, 1, 1, 1,
505  1, 1, 1, 1, 1, 1, 1, 1,
506  1, 1, 1, 1, 1, 1, 1, 1,
507  1, 1, 1, 1, 1, 1, 1, 1,
508  1, 1, 1, 1, 1, 1, 1, 1,
509  1, 1, 1, 1, 1, 1, 1, 1,
510  1, 1, 1, 1, 1, 1, 1, 1,
511  1, 1, 1, 1, 1, 1, 1, 1,
512  1, 1, 1, 1, 1, 1, 1};
513
514struct GTY(()) machine_function
515{
516  /* Size of the frame of the function.  */
517  HOST_WIDE_INT frame_size;
518
519  /* Size of the frame of the function minus the register window save area
520     and the outgoing argument area.  */
521  HOST_WIDE_INT apparent_frame_size;
522
523  /* Register we pretend the frame pointer is allocated to.  Normally, this
524     is %fp, but if we are in a leaf procedure, this is (%sp + offset).  We
525     record "offset" separately as it may be too big for (reg + disp).  */
526  rtx frame_base_reg;
527  HOST_WIDE_INT frame_base_offset;
528
529  /* Number of global or FP registers to be saved (as 4-byte quantities).  */
530  int n_global_fp_regs;
531
532  /* True if the current function is leaf and uses only leaf regs,
533     so that the SPARC leaf function optimization can be applied.
534     Private version of crtl->uses_only_leaf_regs, see
535     sparc_expand_prologue for the rationale.  */
536  int leaf_function_p;
537
538  /* True if the prologue saves local or in registers.  */
539  bool save_local_in_regs_p;
540
541  /* True if the data calculated by sparc_expand_prologue are valid.  */
542  bool prologue_data_valid_p;
543};
544
545#define sparc_frame_size		cfun->machine->frame_size
546#define sparc_apparent_frame_size	cfun->machine->apparent_frame_size
547#define sparc_frame_base_reg		cfun->machine->frame_base_reg
548#define sparc_frame_base_offset		cfun->machine->frame_base_offset
549#define sparc_n_global_fp_regs		cfun->machine->n_global_fp_regs
550#define sparc_leaf_function_p		cfun->machine->leaf_function_p
551#define sparc_save_local_in_regs_p	cfun->machine->save_local_in_regs_p
552#define sparc_prologue_data_valid_p	cfun->machine->prologue_data_valid_p
553
554/* 1 if the next opcode is to be specially indented.  */
555int sparc_indent_opcode = 0;
556
557static void sparc_option_override (void);
558static void sparc_init_modes (void);
559static void scan_record_type (const_tree, int *, int *, int *);
560static int function_arg_slotno (const CUMULATIVE_ARGS *, machine_mode,
561				const_tree, bool, bool, int *, int *);
562
563static int supersparc_adjust_cost (rtx_insn *, rtx, rtx_insn *, int);
564static int hypersparc_adjust_cost (rtx_insn *, rtx, rtx_insn *, int);
565
566static void sparc_emit_set_const32 (rtx, rtx);
567static void sparc_emit_set_const64 (rtx, rtx);
568static void sparc_output_addr_vec (rtx);
569static void sparc_output_addr_diff_vec (rtx);
570static void sparc_output_deferred_case_vectors (void);
571static bool sparc_legitimate_address_p (machine_mode, rtx, bool);
572static bool sparc_legitimate_constant_p (machine_mode, rtx);
573static rtx sparc_builtin_saveregs (void);
574static int epilogue_renumber (rtx *, int);
575static bool sparc_assemble_integer (rtx, unsigned int, int);
576static int set_extends (rtx_insn *);
577static void sparc_asm_function_prologue (FILE *, HOST_WIDE_INT);
578static void sparc_asm_function_epilogue (FILE *, HOST_WIDE_INT);
579#ifdef TARGET_SOLARIS
580static void sparc_solaris_elf_asm_named_section (const char *, unsigned int,
581						 tree) ATTRIBUTE_UNUSED;
582#endif
583static int sparc_adjust_cost (rtx_insn *, rtx, rtx_insn *, int);
584static int sparc_issue_rate (void);
585static void sparc_sched_init (FILE *, int, int);
586static int sparc_use_sched_lookahead (void);
587
588static void emit_soft_tfmode_libcall (const char *, int, rtx *);
589static void emit_soft_tfmode_binop (enum rtx_code, rtx *);
590static void emit_soft_tfmode_unop (enum rtx_code, rtx *);
591static void emit_soft_tfmode_cvt (enum rtx_code, rtx *);
592static void emit_hard_tfmode_operation (enum rtx_code, rtx *);
593
594static bool sparc_function_ok_for_sibcall (tree, tree);
595static void sparc_init_libfuncs (void);
596static void sparc_init_builtins (void);
597static void sparc_fpu_init_builtins (void);
598static void sparc_vis_init_builtins (void);
599static tree sparc_builtin_decl (unsigned, bool);
600static rtx sparc_expand_builtin (tree, rtx, rtx, machine_mode, int);
601static tree sparc_fold_builtin (tree, int, tree *, bool);
602static void sparc_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
603				   HOST_WIDE_INT, tree);
604static bool sparc_can_output_mi_thunk (const_tree, HOST_WIDE_INT,
605				       HOST_WIDE_INT, const_tree);
606static struct machine_function * sparc_init_machine_status (void);
607static bool sparc_cannot_force_const_mem (machine_mode, rtx);
608static rtx sparc_tls_get_addr (void);
609static rtx sparc_tls_got (void);
610static int sparc_register_move_cost (machine_mode,
611				     reg_class_t, reg_class_t);
612static bool sparc_rtx_costs (rtx, int, int, int, int *, bool);
613static rtx sparc_function_value (const_tree, const_tree, bool);
614static rtx sparc_libcall_value (machine_mode, const_rtx);
615static bool sparc_function_value_regno_p (const unsigned int);
616static rtx sparc_struct_value_rtx (tree, int);
617static machine_mode sparc_promote_function_mode (const_tree, machine_mode,
618						      int *, const_tree, int);
619static bool sparc_return_in_memory (const_tree, const_tree);
620static bool sparc_strict_argument_naming (cumulative_args_t);
621static void sparc_va_start (tree, rtx);
622static tree sparc_gimplify_va_arg (tree, tree, gimple_seq *, gimple_seq *);
623static bool sparc_vector_mode_supported_p (machine_mode);
624static bool sparc_tls_referenced_p (rtx);
625static rtx sparc_legitimize_tls_address (rtx);
626static rtx sparc_legitimize_pic_address (rtx, rtx);
627static rtx sparc_legitimize_address (rtx, rtx, machine_mode);
628static rtx sparc_delegitimize_address (rtx);
629static bool sparc_mode_dependent_address_p (const_rtx, addr_space_t);
630static bool sparc_pass_by_reference (cumulative_args_t,
631				     machine_mode, const_tree, bool);
632static void sparc_function_arg_advance (cumulative_args_t,
633					machine_mode, const_tree, bool);
634static rtx sparc_function_arg_1 (cumulative_args_t,
635				 machine_mode, const_tree, bool, bool);
636static rtx sparc_function_arg (cumulative_args_t,
637			       machine_mode, const_tree, bool);
638static rtx sparc_function_incoming_arg (cumulative_args_t,
639					machine_mode, const_tree, bool);
640static unsigned int sparc_function_arg_boundary (machine_mode,
641						 const_tree);
642static int sparc_arg_partial_bytes (cumulative_args_t,
643				    machine_mode, tree, bool);
644static void sparc_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
645static void sparc_file_end (void);
646static bool sparc_frame_pointer_required (void);
647static bool sparc_can_eliminate (const int, const int);
648static rtx sparc_builtin_setjmp_frame_value (void);
649static void sparc_conditional_register_usage (void);
650#ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
651static const char *sparc_mangle_type (const_tree);
652#endif
653static void sparc_trampoline_init (rtx, tree, rtx);
654static machine_mode sparc_preferred_simd_mode (machine_mode);
655static reg_class_t sparc_preferred_reload_class (rtx x, reg_class_t rclass);
656static bool sparc_print_operand_punct_valid_p (unsigned char);
657static void sparc_print_operand (FILE *, rtx, int);
658static void sparc_print_operand_address (FILE *, rtx);
659static reg_class_t sparc_secondary_reload (bool, rtx, reg_class_t,
660					   machine_mode,
661					   secondary_reload_info *);
662static machine_mode sparc_cstore_mode (enum insn_code icode);
663static void sparc_atomic_assign_expand_fenv (tree *, tree *, tree *);
664
665#ifdef SUBTARGET_ATTRIBUTE_TABLE
666/* Table of valid machine attributes.  */
667static const struct attribute_spec sparc_attribute_table[] =
668{
669  /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
670       do_diagnostic } */
671  SUBTARGET_ATTRIBUTE_TABLE,
672  { NULL,        0, 0, false, false, false, NULL, false }
673};
674#endif
675
676/* Option handling.  */
677
678/* Parsed value.  */
679enum cmodel sparc_cmodel;
680
681char sparc_hard_reg_printed[8];
682
683/* Initialize the GCC target structure.  */
684
685/* The default is to use .half rather than .short for aligned HI objects.  */
686#undef TARGET_ASM_ALIGNED_HI_OP
687#define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
688
689#undef TARGET_ASM_UNALIGNED_HI_OP
690#define TARGET_ASM_UNALIGNED_HI_OP "\t.uahalf\t"
691#undef TARGET_ASM_UNALIGNED_SI_OP
692#define TARGET_ASM_UNALIGNED_SI_OP "\t.uaword\t"
693#undef TARGET_ASM_UNALIGNED_DI_OP
694#define TARGET_ASM_UNALIGNED_DI_OP "\t.uaxword\t"
695
696/* The target hook has to handle DI-mode values.  */
697#undef TARGET_ASM_INTEGER
698#define TARGET_ASM_INTEGER sparc_assemble_integer
699
700#undef TARGET_ASM_FUNCTION_PROLOGUE
701#define TARGET_ASM_FUNCTION_PROLOGUE sparc_asm_function_prologue
702#undef TARGET_ASM_FUNCTION_EPILOGUE
703#define TARGET_ASM_FUNCTION_EPILOGUE sparc_asm_function_epilogue
704
705#undef TARGET_SCHED_ADJUST_COST
706#define TARGET_SCHED_ADJUST_COST sparc_adjust_cost
707#undef TARGET_SCHED_ISSUE_RATE
708#define TARGET_SCHED_ISSUE_RATE sparc_issue_rate
709#undef TARGET_SCHED_INIT
710#define TARGET_SCHED_INIT sparc_sched_init
711#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
712#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD sparc_use_sched_lookahead
713
714#undef TARGET_FUNCTION_OK_FOR_SIBCALL
715#define TARGET_FUNCTION_OK_FOR_SIBCALL sparc_function_ok_for_sibcall
716
717#undef TARGET_INIT_LIBFUNCS
718#define TARGET_INIT_LIBFUNCS sparc_init_libfuncs
719
720#undef TARGET_LEGITIMIZE_ADDRESS
721#define TARGET_LEGITIMIZE_ADDRESS sparc_legitimize_address
722#undef TARGET_DELEGITIMIZE_ADDRESS
723#define TARGET_DELEGITIMIZE_ADDRESS sparc_delegitimize_address
724#undef TARGET_MODE_DEPENDENT_ADDRESS_P
725#define TARGET_MODE_DEPENDENT_ADDRESS_P sparc_mode_dependent_address_p
726
727#undef TARGET_INIT_BUILTINS
728#define TARGET_INIT_BUILTINS sparc_init_builtins
729#undef TARGET_BUILTIN_DECL
730#define TARGET_BUILTIN_DECL sparc_builtin_decl
731#undef TARGET_EXPAND_BUILTIN
732#define TARGET_EXPAND_BUILTIN sparc_expand_builtin
733#undef TARGET_FOLD_BUILTIN
734#define TARGET_FOLD_BUILTIN sparc_fold_builtin
735
736#if TARGET_TLS
737#undef TARGET_HAVE_TLS
738#define TARGET_HAVE_TLS true
739#endif
740
741#undef TARGET_CANNOT_FORCE_CONST_MEM
742#define TARGET_CANNOT_FORCE_CONST_MEM sparc_cannot_force_const_mem
743
744#undef TARGET_ASM_OUTPUT_MI_THUNK
745#define TARGET_ASM_OUTPUT_MI_THUNK sparc_output_mi_thunk
746#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
747#define TARGET_ASM_CAN_OUTPUT_MI_THUNK sparc_can_output_mi_thunk
748
749#undef TARGET_RTX_COSTS
750#define TARGET_RTX_COSTS sparc_rtx_costs
751#undef TARGET_ADDRESS_COST
752#define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
753#undef TARGET_REGISTER_MOVE_COST
754#define TARGET_REGISTER_MOVE_COST sparc_register_move_cost
755
756#undef TARGET_PROMOTE_FUNCTION_MODE
757#define TARGET_PROMOTE_FUNCTION_MODE sparc_promote_function_mode
758
759#undef TARGET_FUNCTION_VALUE
760#define TARGET_FUNCTION_VALUE sparc_function_value
761#undef TARGET_LIBCALL_VALUE
762#define TARGET_LIBCALL_VALUE sparc_libcall_value
763#undef TARGET_FUNCTION_VALUE_REGNO_P
764#define TARGET_FUNCTION_VALUE_REGNO_P sparc_function_value_regno_p
765
766#undef TARGET_STRUCT_VALUE_RTX
767#define TARGET_STRUCT_VALUE_RTX sparc_struct_value_rtx
768#undef TARGET_RETURN_IN_MEMORY
769#define TARGET_RETURN_IN_MEMORY sparc_return_in_memory
770#undef TARGET_MUST_PASS_IN_STACK
771#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
772#undef TARGET_PASS_BY_REFERENCE
773#define TARGET_PASS_BY_REFERENCE sparc_pass_by_reference
774#undef TARGET_ARG_PARTIAL_BYTES
775#define TARGET_ARG_PARTIAL_BYTES sparc_arg_partial_bytes
776#undef TARGET_FUNCTION_ARG_ADVANCE
777#define TARGET_FUNCTION_ARG_ADVANCE sparc_function_arg_advance
778#undef TARGET_FUNCTION_ARG
779#define TARGET_FUNCTION_ARG sparc_function_arg
780#undef TARGET_FUNCTION_INCOMING_ARG
781#define TARGET_FUNCTION_INCOMING_ARG sparc_function_incoming_arg
782#undef TARGET_FUNCTION_ARG_BOUNDARY
783#define TARGET_FUNCTION_ARG_BOUNDARY sparc_function_arg_boundary
784
785#undef TARGET_EXPAND_BUILTIN_SAVEREGS
786#define TARGET_EXPAND_BUILTIN_SAVEREGS sparc_builtin_saveregs
787#undef TARGET_STRICT_ARGUMENT_NAMING
788#define TARGET_STRICT_ARGUMENT_NAMING sparc_strict_argument_naming
789
790#undef TARGET_EXPAND_BUILTIN_VA_START
791#define TARGET_EXPAND_BUILTIN_VA_START sparc_va_start
792#undef TARGET_GIMPLIFY_VA_ARG_EXPR
793#define TARGET_GIMPLIFY_VA_ARG_EXPR sparc_gimplify_va_arg
794
795#undef TARGET_VECTOR_MODE_SUPPORTED_P
796#define TARGET_VECTOR_MODE_SUPPORTED_P sparc_vector_mode_supported_p
797
798#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
799#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE sparc_preferred_simd_mode
800
801#ifdef SUBTARGET_INSERT_ATTRIBUTES
802#undef TARGET_INSERT_ATTRIBUTES
803#define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
804#endif
805
806#ifdef SUBTARGET_ATTRIBUTE_TABLE
807#undef TARGET_ATTRIBUTE_TABLE
808#define TARGET_ATTRIBUTE_TABLE sparc_attribute_table
809#endif
810
811#undef TARGET_RELAXED_ORDERING
812#define TARGET_RELAXED_ORDERING SPARC_RELAXED_ORDERING
813
814#undef TARGET_OPTION_OVERRIDE
815#define TARGET_OPTION_OVERRIDE sparc_option_override
816
817#if TARGET_GNU_TLS && defined(HAVE_AS_SPARC_UA_PCREL)
818#undef TARGET_ASM_OUTPUT_DWARF_DTPREL
819#define TARGET_ASM_OUTPUT_DWARF_DTPREL sparc_output_dwarf_dtprel
820#endif
821
822#undef TARGET_ASM_FILE_END
823#define TARGET_ASM_FILE_END sparc_file_end
824
825#undef TARGET_FRAME_POINTER_REQUIRED
826#define TARGET_FRAME_POINTER_REQUIRED sparc_frame_pointer_required
827
828#undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
829#define TARGET_BUILTIN_SETJMP_FRAME_VALUE sparc_builtin_setjmp_frame_value
830
831#undef TARGET_CAN_ELIMINATE
832#define TARGET_CAN_ELIMINATE sparc_can_eliminate
833
834#undef  TARGET_PREFERRED_RELOAD_CLASS
835#define TARGET_PREFERRED_RELOAD_CLASS sparc_preferred_reload_class
836
837#undef TARGET_SECONDARY_RELOAD
838#define TARGET_SECONDARY_RELOAD sparc_secondary_reload
839
840#undef TARGET_CONDITIONAL_REGISTER_USAGE
841#define TARGET_CONDITIONAL_REGISTER_USAGE sparc_conditional_register_usage
842
843#ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
844#undef TARGET_MANGLE_TYPE
845#define TARGET_MANGLE_TYPE sparc_mangle_type
846#endif
847
848#undef TARGET_LEGITIMATE_ADDRESS_P
849#define TARGET_LEGITIMATE_ADDRESS_P sparc_legitimate_address_p
850
851#undef TARGET_LEGITIMATE_CONSTANT_P
852#define TARGET_LEGITIMATE_CONSTANT_P sparc_legitimate_constant_p
853
854#undef TARGET_TRAMPOLINE_INIT
855#define TARGET_TRAMPOLINE_INIT sparc_trampoline_init
856
857#undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
858#define TARGET_PRINT_OPERAND_PUNCT_VALID_P sparc_print_operand_punct_valid_p
859#undef TARGET_PRINT_OPERAND
860#define TARGET_PRINT_OPERAND sparc_print_operand
861#undef TARGET_PRINT_OPERAND_ADDRESS
862#define TARGET_PRINT_OPERAND_ADDRESS sparc_print_operand_address
863
864/* The value stored by LDSTUB.  */
865#undef TARGET_ATOMIC_TEST_AND_SET_TRUEVAL
866#define TARGET_ATOMIC_TEST_AND_SET_TRUEVAL 0xff
867
868#undef TARGET_CSTORE_MODE
869#define TARGET_CSTORE_MODE sparc_cstore_mode
870
871#undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
872#define TARGET_ATOMIC_ASSIGN_EXPAND_FENV sparc_atomic_assign_expand_fenv
873
874struct gcc_target targetm = TARGET_INITIALIZER;
875
876/* Return the memory reference contained in X if any, zero otherwise.  */
877
878static rtx
879mem_ref (rtx x)
880{
881  if (GET_CODE (x) == SIGN_EXTEND || GET_CODE (x) == ZERO_EXTEND)
882    x = XEXP (x, 0);
883
884  if (MEM_P (x))
885    return x;
886
887  return NULL_RTX;
888}
889
890/* We use a machine specific pass to enable workarounds for errata.
891   We need to have the (essentially) final form of the insn stream in order
892   to properly detect the various hazards.  Therefore, this machine specific
893   pass runs as late as possible.  The pass is inserted in the pass pipeline
894   at the end of sparc_option_override.  */
895
896static unsigned int
897sparc_do_work_around_errata (void)
898{
899  rtx_insn *insn, *next;
900
901  /* Force all instructions to be split into their final form.  */
902  split_all_insns_noflow ();
903
904  /* Now look for specific patterns in the insn stream.  */
905  for (insn = get_insns (); insn; insn = next)
906    {
907      bool insert_nop = false;
908      rtx set;
909
910      /* Look into the instruction in a delay slot.  */
911      if (NONJUMP_INSN_P (insn))
912	if (rtx_sequence *seq = dyn_cast <rtx_sequence *> (PATTERN (insn)))
913	  insn = seq->insn (1);
914
915      /* Look for a single-word load into an odd-numbered FP register.  */
916      if (sparc_fix_at697f
917	  && NONJUMP_INSN_P (insn)
918	  && (set = single_set (insn)) != NULL_RTX
919	  && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
920	  && MEM_P (SET_SRC (set))
921	  && REG_P (SET_DEST (set))
922	  && REGNO (SET_DEST (set)) > 31
923	  && REGNO (SET_DEST (set)) % 2 != 0)
924	{
925	  /* The wrong dependency is on the enclosing double register.  */
926	  const unsigned int x = REGNO (SET_DEST (set)) - 1;
927	  unsigned int src1, src2, dest;
928	  int code;
929
930	  next = next_active_insn (insn);
931	  if (!next)
932	    break;
933	  /* If the insn is a branch, then it cannot be problematic.  */
934	  if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
935	    continue;
936
937	  extract_insn (next);
938	  code = INSN_CODE (next);
939
940	  switch (code)
941	    {
942	    case CODE_FOR_adddf3:
943	    case CODE_FOR_subdf3:
944	    case CODE_FOR_muldf3:
945	    case CODE_FOR_divdf3:
946	      dest = REGNO (recog_data.operand[0]);
947	      src1 = REGNO (recog_data.operand[1]);
948	      src2 = REGNO (recog_data.operand[2]);
949	      if (src1 != src2)
950		{
951		  /* Case [1-4]:
952				 ld [address], %fx+1
953				 FPOPd %f{x,y}, %f{y,x}, %f{x,y}  */
954		  if ((src1 == x || src2 == x)
955		      && (dest == src1 || dest == src2))
956		    insert_nop = true;
957		}
958	      else
959		{
960		  /* Case 5:
961			     ld [address], %fx+1
962			     FPOPd %fx, %fx, %fx  */
963		  if (src1 == x
964		      && dest == src1
965		      && (code == CODE_FOR_adddf3 || code == CODE_FOR_muldf3))
966		    insert_nop = true;
967		}
968	      break;
969
970	    case CODE_FOR_sqrtdf2:
971	      dest = REGNO (recog_data.operand[0]);
972	      src1 = REGNO (recog_data.operand[1]);
973	      /* Case 6:
974			 ld [address], %fx+1
975			 fsqrtd %fx, %fx  */
976	      if (src1 == x && dest == src1)
977		insert_nop = true;
978	      break;
979
980	    default:
981	      break;
982	    }
983	}
984
985      /* Look for a single-word load into an integer register.  */
986      else if (sparc_fix_ut699
987	       && NONJUMP_INSN_P (insn)
988	       && (set = single_set (insn)) != NULL_RTX
989	       && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) <= 4
990	       && mem_ref (SET_SRC (set)) != NULL_RTX
991	       && REG_P (SET_DEST (set))
992	       && REGNO (SET_DEST (set)) < 32)
993	{
994	  /* There is no problem if the second memory access has a data
995	     dependency on the first single-cycle load.  */
996	  rtx x = SET_DEST (set);
997
998	  next = next_active_insn (insn);
999	  if (!next)
1000	    break;
1001	  /* If the insn is a branch, then it cannot be problematic.  */
1002	  if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
1003	    continue;
1004
1005	  /* Look for a second memory access to/from an integer register.  */
1006	  if ((set = single_set (next)) != NULL_RTX)
1007	    {
1008	      rtx src = SET_SRC (set);
1009	      rtx dest = SET_DEST (set);
1010	      rtx mem;
1011
1012	      /* LDD is affected.  */
1013	      if ((mem = mem_ref (src)) != NULL_RTX
1014		  && REG_P (dest)
1015		  && REGNO (dest) < 32
1016		  && !reg_mentioned_p (x, XEXP (mem, 0)))
1017		insert_nop = true;
1018
1019	      /* STD is *not* affected.  */
1020	      else if (MEM_P (dest)
1021		       && GET_MODE_SIZE (GET_MODE (dest)) <= 4
1022		       && (src == CONST0_RTX (GET_MODE (dest))
1023			   || (REG_P (src)
1024			       && REGNO (src) < 32
1025			       && REGNO (src) != REGNO (x)))
1026		       && !reg_mentioned_p (x, XEXP (dest, 0)))
1027		insert_nop = true;
1028	    }
1029	}
1030
1031      /* Look for a single-word load/operation into an FP register.  */
1032      else if (sparc_fix_ut699
1033	       && NONJUMP_INSN_P (insn)
1034	       && (set = single_set (insn)) != NULL_RTX
1035	       && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
1036	       && REG_P (SET_DEST (set))
1037	       && REGNO (SET_DEST (set)) > 31)
1038	{
1039	  /* Number of instructions in the problematic window.  */
1040	  const int n_insns = 4;
1041	  /* The problematic combination is with the sibling FP register.  */
1042	  const unsigned int x = REGNO (SET_DEST (set));
1043	  const unsigned int y = x ^ 1;
1044	  rtx_insn *after;
1045	  int i;
1046
1047	  next = next_active_insn (insn);
1048	  if (!next)
1049	    break;
1050	  /* If the insn is a branch, then it cannot be problematic.  */
1051	  if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
1052	    continue;
1053
1054	  /* Look for a second load/operation into the sibling FP register.  */
1055	  if (!((set = single_set (next)) != NULL_RTX
1056		&& GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
1057		&& REG_P (SET_DEST (set))
1058		&& REGNO (SET_DEST (set)) == y))
1059	    continue;
1060
1061	  /* Look for a (possible) store from the FP register in the next N
1062	     instructions, but bail out if it is again modified or if there
1063	     is a store from the sibling FP register before this store.  */
1064	  for (after = next, i = 0; i < n_insns; i++)
1065	    {
1066	      bool branch_p;
1067
1068	      after = next_active_insn (after);
1069	      if (!after)
1070		break;
1071
1072	      /* This is a branch with an empty delay slot.  */
1073	      if (!NONJUMP_INSN_P (after))
1074		{
1075		  if (++i == n_insns)
1076		    break;
1077		  branch_p = true;
1078		  after = NULL;
1079		}
1080	      /* This is a branch with a filled delay slot.  */
1081	      else if (rtx_sequence *seq =
1082		         dyn_cast <rtx_sequence *> (PATTERN (after)))
1083		{
1084		  if (++i == n_insns)
1085		    break;
1086		  branch_p = true;
1087		  after = seq->insn (1);
1088		}
1089	      /* This is a regular instruction.  */
1090	      else
1091		branch_p = false;
1092
1093	      if (after && (set = single_set (after)) != NULL_RTX)
1094		{
1095		  const rtx src = SET_SRC (set);
1096		  const rtx dest = SET_DEST (set);
1097		  const unsigned int size = GET_MODE_SIZE (GET_MODE (dest));
1098
1099		  /* If the FP register is again modified before the store,
1100		     then the store isn't affected.  */
1101		  if (REG_P (dest)
1102		      && (REGNO (dest) == x
1103			  || (REGNO (dest) == y && size == 8)))
1104		    break;
1105
1106		  if (MEM_P (dest) && REG_P (src))
1107		    {
1108		      /* If there is a store from the sibling FP register
1109			 before the store, then the store is not affected.  */
1110		      if (REGNO (src) == y || (REGNO (src) == x && size == 8))
1111			break;
1112
1113		      /* Otherwise, the store is affected.  */
1114		      if (REGNO (src) == x && size == 4)
1115			{
1116			  insert_nop = true;
1117			  break;
1118			}
1119		    }
1120		}
1121
1122	      /* If we have a branch in the first M instructions, then we
1123		 cannot see the (M+2)th instruction so we play safe.  */
1124	      if (branch_p && i <= (n_insns - 2))
1125		{
1126		  insert_nop = true;
1127		  break;
1128		}
1129	    }
1130	}
1131
1132      else
1133	next = NEXT_INSN (insn);
1134
1135      if (insert_nop)
1136	emit_insn_before (gen_nop (), next);
1137    }
1138
1139  return 0;
1140}
1141
1142namespace {
1143
1144const pass_data pass_data_work_around_errata =
1145{
1146  RTL_PASS, /* type */
1147  "errata", /* name */
1148  OPTGROUP_NONE, /* optinfo_flags */
1149  TV_MACH_DEP, /* tv_id */
1150  0, /* properties_required */
1151  0, /* properties_provided */
1152  0, /* properties_destroyed */
1153  0, /* todo_flags_start */
1154  0, /* todo_flags_finish */
1155};
1156
1157class pass_work_around_errata : public rtl_opt_pass
1158{
1159public:
1160  pass_work_around_errata(gcc::context *ctxt)
1161    : rtl_opt_pass(pass_data_work_around_errata, ctxt)
1162  {}
1163
1164  /* opt_pass methods: */
1165  virtual bool gate (function *)
1166    {
1167      /* The only errata we handle are those of the AT697F and UT699.  */
1168      return sparc_fix_at697f != 0 || sparc_fix_ut699 != 0;
1169    }
1170
1171  virtual unsigned int execute (function *)
1172    {
1173      return sparc_do_work_around_errata ();
1174    }
1175
1176}; // class pass_work_around_errata
1177
1178} // anon namespace
1179
1180rtl_opt_pass *
1181make_pass_work_around_errata (gcc::context *ctxt)
1182{
1183  return new pass_work_around_errata (ctxt);
1184}
1185
1186/* Helpers for TARGET_DEBUG_OPTIONS.  */
1187static void
1188dump_target_flag_bits (const int flags)
1189{
1190  if (flags & MASK_64BIT)
1191    fprintf (stderr, "64BIT ");
1192  if (flags & MASK_APP_REGS)
1193    fprintf (stderr, "APP_REGS ");
1194  if (flags & MASK_FASTER_STRUCTS)
1195    fprintf (stderr, "FASTER_STRUCTS ");
1196  if (flags & MASK_FLAT)
1197    fprintf (stderr, "FLAT ");
1198  if (flags & MASK_FMAF)
1199    fprintf (stderr, "FMAF ");
1200  if (flags & MASK_FPU)
1201    fprintf (stderr, "FPU ");
1202  if (flags & MASK_HARD_QUAD)
1203    fprintf (stderr, "HARD_QUAD ");
1204  if (flags & MASK_POPC)
1205    fprintf (stderr, "POPC ");
1206  if (flags & MASK_PTR64)
1207    fprintf (stderr, "PTR64 ");
1208  if (flags & MASK_STACK_BIAS)
1209    fprintf (stderr, "STACK_BIAS ");
1210  if (flags & MASK_UNALIGNED_DOUBLES)
1211    fprintf (stderr, "UNALIGNED_DOUBLES ");
1212  if (flags & MASK_V8PLUS)
1213    fprintf (stderr, "V8PLUS ");
1214  if (flags & MASK_VIS)
1215    fprintf (stderr, "VIS ");
1216  if (flags & MASK_VIS2)
1217    fprintf (stderr, "VIS2 ");
1218  if (flags & MASK_VIS3)
1219    fprintf (stderr, "VIS3 ");
1220  if (flags & MASK_CBCOND)
1221    fprintf (stderr, "CBCOND ");
1222  if (flags & MASK_DEPRECATED_V8_INSNS)
1223    fprintf (stderr, "DEPRECATED_V8_INSNS ");
1224  if (flags & MASK_SPARCLET)
1225    fprintf (stderr, "SPARCLET ");
1226  if (flags & MASK_SPARCLITE)
1227    fprintf (stderr, "SPARCLITE ");
1228  if (flags & MASK_V8)
1229    fprintf (stderr, "V8 ");
1230  if (flags & MASK_V9)
1231    fprintf (stderr, "V9 ");
1232}
1233
1234static void
1235dump_target_flags (const char *prefix, const int flags)
1236{
1237  fprintf (stderr, "%s: (%08x) [ ", prefix, flags);
1238  dump_target_flag_bits (flags);
1239  fprintf(stderr, "]\n");
1240}
1241
1242/* Validate and override various options, and do some machine dependent
1243   initialization.  */
1244
1245static void
1246sparc_option_override (void)
1247{
1248  static struct code_model {
1249    const char *const name;
1250    const enum cmodel value;
1251  } const cmodels[] = {
1252    { "32", CM_32 },
1253    { "medlow", CM_MEDLOW },
1254    { "medmid", CM_MEDMID },
1255    { "medany", CM_MEDANY },
1256    { "embmedany", CM_EMBMEDANY },
1257    { NULL, (enum cmodel) 0 }
1258  };
1259  const struct code_model *cmodel;
1260  /* Map TARGET_CPU_DEFAULT to value for -m{cpu,tune}=.  */
1261  static struct cpu_default {
1262    const int cpu;
1263    const enum processor_type processor;
1264  } const cpu_default[] = {
1265    /* There must be one entry here for each TARGET_CPU value.  */
1266    { TARGET_CPU_sparc, PROCESSOR_CYPRESS },
1267    { TARGET_CPU_v8, PROCESSOR_V8 },
1268    { TARGET_CPU_supersparc, PROCESSOR_SUPERSPARC },
1269    { TARGET_CPU_hypersparc, PROCESSOR_HYPERSPARC },
1270    { TARGET_CPU_leon, PROCESSOR_LEON },
1271    { TARGET_CPU_leon3, PROCESSOR_LEON3 },
1272    { TARGET_CPU_leon3v7, PROCESSOR_LEON3V7 },
1273    { TARGET_CPU_sparclite, PROCESSOR_F930 },
1274    { TARGET_CPU_sparclite86x, PROCESSOR_SPARCLITE86X },
1275    { TARGET_CPU_sparclet, PROCESSOR_TSC701 },
1276    { TARGET_CPU_v9, PROCESSOR_V9 },
1277    { TARGET_CPU_ultrasparc, PROCESSOR_ULTRASPARC },
1278    { TARGET_CPU_ultrasparc3, PROCESSOR_ULTRASPARC3 },
1279    { TARGET_CPU_niagara, PROCESSOR_NIAGARA },
1280    { TARGET_CPU_niagara2, PROCESSOR_NIAGARA2 },
1281    { TARGET_CPU_niagara3, PROCESSOR_NIAGARA3 },
1282    { TARGET_CPU_niagara4, PROCESSOR_NIAGARA4 },
1283    { -1, PROCESSOR_V7 }
1284  };
1285  const struct cpu_default *def;
1286  /* Table of values for -m{cpu,tune}=.  This must match the order of
1287     the enum processor_type in sparc-opts.h.  */
1288  static struct cpu_table {
1289    const char *const name;
1290    const int disable;
1291    const int enable;
1292  } const cpu_table[] = {
1293    { "v7",		MASK_ISA, 0 },
1294    { "cypress",	MASK_ISA, 0 },
1295    { "v8",		MASK_ISA, MASK_V8 },
1296    /* TI TMS390Z55 supersparc */
1297    { "supersparc",	MASK_ISA, MASK_V8 },
1298    { "hypersparc",	MASK_ISA, MASK_V8|MASK_FPU },
1299    { "leon",		MASK_ISA, MASK_V8|MASK_LEON|MASK_FPU },
1300    { "leon3",		MASK_ISA, MASK_V8|MASK_LEON3|MASK_FPU },
1301    { "leon3v7",	MASK_ISA, MASK_LEON3|MASK_FPU },
1302    { "sparclite",	MASK_ISA, MASK_SPARCLITE },
1303    /* The Fujitsu MB86930 is the original sparclite chip, with no FPU.  */
1304    { "f930",		MASK_ISA|MASK_FPU, MASK_SPARCLITE },
1305    /* The Fujitsu MB86934 is the recent sparclite chip, with an FPU.  */
1306    { "f934",		MASK_ISA, MASK_SPARCLITE|MASK_FPU },
1307    { "sparclite86x",	MASK_ISA|MASK_FPU, MASK_SPARCLITE },
1308    { "sparclet",	MASK_ISA, MASK_SPARCLET },
1309    /* TEMIC sparclet */
1310    { "tsc701",		MASK_ISA, MASK_SPARCLET },
1311    { "v9",		MASK_ISA, MASK_V9 },
1312    /* UltraSPARC I, II, IIi */
1313    { "ultrasparc",	MASK_ISA,
1314    /* Although insns using %y are deprecated, it is a clear win.  */
1315      MASK_V9|MASK_DEPRECATED_V8_INSNS },
1316    /* UltraSPARC III */
1317    /* ??? Check if %y issue still holds true.  */
1318    { "ultrasparc3",	MASK_ISA,
1319      MASK_V9|MASK_DEPRECATED_V8_INSNS|MASK_VIS2 },
1320    /* UltraSPARC T1 */
1321    { "niagara",	MASK_ISA,
1322      MASK_V9|MASK_DEPRECATED_V8_INSNS },
1323    /* UltraSPARC T2 */
1324    { "niagara2",	MASK_ISA,
1325      MASK_V9|MASK_POPC|MASK_VIS2 },
1326    /* UltraSPARC T3 */
1327    { "niagara3",	MASK_ISA,
1328      MASK_V9|MASK_POPC|MASK_VIS2|MASK_VIS3|MASK_FMAF },
1329    /* UltraSPARC T4 */
1330    { "niagara4",	MASK_ISA,
1331      MASK_V9|MASK_POPC|MASK_VIS2|MASK_VIS3|MASK_FMAF|MASK_CBCOND },
1332  };
1333  const struct cpu_table *cpu;
1334  unsigned int i;
1335  int fpu;
1336
1337  if (sparc_debug_string != NULL)
1338    {
1339      const char *q;
1340      char *p;
1341
1342      p = ASTRDUP (sparc_debug_string);
1343      while ((q = strtok (p, ",")) != NULL)
1344	{
1345	  bool invert;
1346	  int mask;
1347
1348	  p = NULL;
1349	  if (*q == '!')
1350	    {
1351	      invert = true;
1352	      q++;
1353	    }
1354	  else
1355	    invert = false;
1356
1357	  if (! strcmp (q, "all"))
1358	    mask = MASK_DEBUG_ALL;
1359	  else if (! strcmp (q, "options"))
1360	    mask = MASK_DEBUG_OPTIONS;
1361	  else
1362	    error ("unknown -mdebug-%s switch", q);
1363
1364	  if (invert)
1365	    sparc_debug &= ~mask;
1366	  else
1367	    sparc_debug |= mask;
1368	}
1369    }
1370
1371  if (TARGET_DEBUG_OPTIONS)
1372    {
1373      dump_target_flags("Initial target_flags", target_flags);
1374      dump_target_flags("target_flags_explicit", target_flags_explicit);
1375    }
1376
1377#ifdef SUBTARGET_OVERRIDE_OPTIONS
1378  SUBTARGET_OVERRIDE_OPTIONS;
1379#endif
1380
1381#ifndef SPARC_BI_ARCH
1382  /* Check for unsupported architecture size.  */
1383  if (! TARGET_64BIT != DEFAULT_ARCH32_P)
1384    error ("%s is not supported by this configuration",
1385	   DEFAULT_ARCH32_P ? "-m64" : "-m32");
1386#endif
1387
1388  /* We force all 64bit archs to use 128 bit long double */
1389  if (TARGET_64BIT && ! TARGET_LONG_DOUBLE_128)
1390    {
1391      error ("-mlong-double-64 not allowed with -m64");
1392      target_flags |= MASK_LONG_DOUBLE_128;
1393    }
1394
1395  /* Code model selection.  */
1396  sparc_cmodel = SPARC_DEFAULT_CMODEL;
1397
1398#ifdef SPARC_BI_ARCH
1399  if (TARGET_ARCH32)
1400    sparc_cmodel = CM_32;
1401#endif
1402
1403  if (sparc_cmodel_string != NULL)
1404    {
1405      if (TARGET_ARCH64)
1406	{
1407	  for (cmodel = &cmodels[0]; cmodel->name; cmodel++)
1408	    if (strcmp (sparc_cmodel_string, cmodel->name) == 0)
1409	      break;
1410	  if (cmodel->name == NULL)
1411	    error ("bad value (%s) for -mcmodel= switch", sparc_cmodel_string);
1412	  else
1413	    sparc_cmodel = cmodel->value;
1414	}
1415      else
1416	error ("-mcmodel= is not supported on 32 bit systems");
1417    }
1418
1419  /* Check that -fcall-saved-REG wasn't specified for out registers.  */
1420  for (i = 8; i < 16; i++)
1421    if (!call_used_regs [i])
1422      {
1423	error ("-fcall-saved-REG is not supported for out registers");
1424        call_used_regs [i] = 1;
1425      }
1426
1427  fpu = target_flags & MASK_FPU; /* save current -mfpu status */
1428
1429  /* Set the default CPU.  */
1430  if (!global_options_set.x_sparc_cpu_and_features)
1431    {
1432      for (def = &cpu_default[0]; def->cpu != -1; ++def)
1433	if (def->cpu == TARGET_CPU_DEFAULT)
1434	  break;
1435      gcc_assert (def->cpu != -1);
1436      sparc_cpu_and_features = def->processor;
1437    }
1438
1439  if (!global_options_set.x_sparc_cpu)
1440    sparc_cpu = sparc_cpu_and_features;
1441
1442  cpu = &cpu_table[(int) sparc_cpu_and_features];
1443
1444  if (TARGET_DEBUG_OPTIONS)
1445    {
1446      fprintf (stderr, "sparc_cpu_and_features: %s\n", cpu->name);
1447      fprintf (stderr, "sparc_cpu: %s\n",
1448	       cpu_table[(int) sparc_cpu].name);
1449      dump_target_flags ("cpu->disable", cpu->disable);
1450      dump_target_flags ("cpu->enable", cpu->enable);
1451    }
1452
1453  target_flags &= ~cpu->disable;
1454  target_flags |= (cpu->enable
1455#ifndef HAVE_AS_FMAF_HPC_VIS3
1456		   & ~(MASK_FMAF | MASK_VIS3)
1457#endif
1458#ifndef HAVE_AS_SPARC4
1459		   & ~MASK_CBCOND
1460#endif
1461#ifndef HAVE_AS_LEON
1462		   & ~(MASK_LEON | MASK_LEON3)
1463#endif
1464		   );
1465
1466  /* If -mfpu or -mno-fpu was explicitly used, don't override with
1467     the processor default.  */
1468  if (target_flags_explicit & MASK_FPU)
1469    target_flags = (target_flags & ~MASK_FPU) | fpu;
1470
1471  /* -mvis2 implies -mvis */
1472  if (TARGET_VIS2)
1473    target_flags |= MASK_VIS;
1474
1475  /* -mvis3 implies -mvis2 and -mvis */
1476  if (TARGET_VIS3)
1477    target_flags |= MASK_VIS2 | MASK_VIS;
1478
1479  /* Don't allow -mvis, -mvis2, -mvis3, or -mfmaf if FPU is
1480     disabled.  */
1481  if (! TARGET_FPU)
1482    target_flags &= ~(MASK_VIS | MASK_VIS2 | MASK_VIS3 | MASK_FMAF);
1483
1484  /* -mvis assumes UltraSPARC+, so we are sure v9 instructions
1485     are available.
1486     -m64 also implies v9.  */
1487  if (TARGET_VIS || TARGET_ARCH64)
1488    {
1489      target_flags |= MASK_V9;
1490      target_flags &= ~(MASK_V8 | MASK_SPARCLET | MASK_SPARCLITE);
1491    }
1492
1493  /* -mvis also implies -mv8plus on 32-bit */
1494  if (TARGET_VIS && ! TARGET_ARCH64)
1495    target_flags |= MASK_V8PLUS;
1496
1497  /* Use the deprecated v8 insns for sparc64 in 32 bit mode.  */
1498  if (TARGET_V9 && TARGET_ARCH32)
1499    target_flags |= MASK_DEPRECATED_V8_INSNS;
1500
1501  /* V8PLUS requires V9, makes no sense in 64 bit mode.  */
1502  if (! TARGET_V9 || TARGET_ARCH64)
1503    target_flags &= ~MASK_V8PLUS;
1504
1505  /* Don't use stack biasing in 32 bit mode.  */
1506  if (TARGET_ARCH32)
1507    target_flags &= ~MASK_STACK_BIAS;
1508
1509  /* Supply a default value for align_functions.  */
1510  if (align_functions == 0
1511      && (sparc_cpu == PROCESSOR_ULTRASPARC
1512	  || sparc_cpu == PROCESSOR_ULTRASPARC3
1513	  || sparc_cpu == PROCESSOR_NIAGARA
1514	  || sparc_cpu == PROCESSOR_NIAGARA2
1515	  || sparc_cpu == PROCESSOR_NIAGARA3
1516	  || sparc_cpu == PROCESSOR_NIAGARA4))
1517    align_functions = 32;
1518
1519  /* Validate PCC_STRUCT_RETURN.  */
1520  if (flag_pcc_struct_return == DEFAULT_PCC_STRUCT_RETURN)
1521    flag_pcc_struct_return = (TARGET_ARCH64 ? 0 : 1);
1522
1523  /* Only use .uaxword when compiling for a 64-bit target.  */
1524  if (!TARGET_ARCH64)
1525    targetm.asm_out.unaligned_op.di = NULL;
1526
1527  /* Do various machine dependent initializations.  */
1528  sparc_init_modes ();
1529
1530  /* Set up function hooks.  */
1531  init_machine_status = sparc_init_machine_status;
1532
1533  switch (sparc_cpu)
1534    {
1535    case PROCESSOR_V7:
1536    case PROCESSOR_CYPRESS:
1537      sparc_costs = &cypress_costs;
1538      break;
1539    case PROCESSOR_V8:
1540    case PROCESSOR_SPARCLITE:
1541    case PROCESSOR_SUPERSPARC:
1542      sparc_costs = &supersparc_costs;
1543      break;
1544    case PROCESSOR_F930:
1545    case PROCESSOR_F934:
1546    case PROCESSOR_HYPERSPARC:
1547    case PROCESSOR_SPARCLITE86X:
1548      sparc_costs = &hypersparc_costs;
1549      break;
1550    case PROCESSOR_LEON:
1551      sparc_costs = &leon_costs;
1552      break;
1553    case PROCESSOR_LEON3:
1554    case PROCESSOR_LEON3V7:
1555      sparc_costs = &leon3_costs;
1556      break;
1557    case PROCESSOR_SPARCLET:
1558    case PROCESSOR_TSC701:
1559      sparc_costs = &sparclet_costs;
1560      break;
1561    case PROCESSOR_V9:
1562    case PROCESSOR_ULTRASPARC:
1563      sparc_costs = &ultrasparc_costs;
1564      break;
1565    case PROCESSOR_ULTRASPARC3:
1566      sparc_costs = &ultrasparc3_costs;
1567      break;
1568    case PROCESSOR_NIAGARA:
1569      sparc_costs = &niagara_costs;
1570      break;
1571    case PROCESSOR_NIAGARA2:
1572      sparc_costs = &niagara2_costs;
1573      break;
1574    case PROCESSOR_NIAGARA3:
1575      sparc_costs = &niagara3_costs;
1576      break;
1577    case PROCESSOR_NIAGARA4:
1578      sparc_costs = &niagara4_costs;
1579      break;
1580    case PROCESSOR_NATIVE:
1581      gcc_unreachable ();
1582    };
1583
1584  if (sparc_memory_model == SMM_DEFAULT)
1585    {
1586      /* Choose the memory model for the operating system.  */
1587      enum sparc_memory_model_type os_default = SUBTARGET_DEFAULT_MEMORY_MODEL;
1588      if (os_default != SMM_DEFAULT)
1589	sparc_memory_model = os_default;
1590      /* Choose the most relaxed model for the processor.  */
1591      else if (TARGET_V9)
1592	sparc_memory_model = SMM_RMO;
1593      else if (TARGET_LEON3)
1594	sparc_memory_model = SMM_TSO;
1595      else if (TARGET_LEON)
1596	sparc_memory_model = SMM_SC;
1597      else if (TARGET_V8)
1598	sparc_memory_model = SMM_PSO;
1599      else
1600	sparc_memory_model = SMM_SC;
1601    }
1602
1603#ifdef TARGET_DEFAULT_LONG_DOUBLE_128
1604  if (!(target_flags_explicit & MASK_LONG_DOUBLE_128))
1605    target_flags |= MASK_LONG_DOUBLE_128;
1606#endif
1607
1608  if (TARGET_DEBUG_OPTIONS)
1609    dump_target_flags ("Final target_flags", target_flags);
1610
1611  maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
1612			 ((sparc_cpu == PROCESSOR_ULTRASPARC
1613			   || sparc_cpu == PROCESSOR_NIAGARA
1614			   || sparc_cpu == PROCESSOR_NIAGARA2
1615			   || sparc_cpu == PROCESSOR_NIAGARA3
1616			   || sparc_cpu == PROCESSOR_NIAGARA4)
1617			  ? 2
1618			  : (sparc_cpu == PROCESSOR_ULTRASPARC3
1619			     ? 8 : 3)),
1620			 global_options.x_param_values,
1621			 global_options_set.x_param_values);
1622  maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
1623			 ((sparc_cpu == PROCESSOR_ULTRASPARC
1624			   || sparc_cpu == PROCESSOR_ULTRASPARC3
1625			   || sparc_cpu == PROCESSOR_NIAGARA
1626			   || sparc_cpu == PROCESSOR_NIAGARA2
1627			   || sparc_cpu == PROCESSOR_NIAGARA3
1628			   || sparc_cpu == PROCESSOR_NIAGARA4)
1629			  ? 64 : 32),
1630			 global_options.x_param_values,
1631			 global_options_set.x_param_values);
1632
1633  /* Disable save slot sharing for call-clobbered registers by default.
1634     The IRA sharing algorithm works on single registers only and this
1635     pessimizes for double floating-point registers.  */
1636  if (!global_options_set.x_flag_ira_share_save_slots)
1637    flag_ira_share_save_slots = 0;
1638
1639  /* We register a machine specific pass to work around errata, if any.
1640     The pass mut be scheduled as late as possible so that we have the
1641     (essentially) final form of the insn stream to work on.
1642     Registering the pass must be done at start up.  It's convenient to
1643     do it here.  */
1644  opt_pass *errata_pass = make_pass_work_around_errata (g);
1645  struct register_pass_info insert_pass_work_around_errata =
1646    {
1647      errata_pass,		/* pass */
1648      "dbr",			/* reference_pass_name */
1649      1,			/* ref_pass_instance_number */
1650      PASS_POS_INSERT_AFTER	/* po_op */
1651    };
1652  register_pass (&insert_pass_work_around_errata);
1653}
1654
1655/* Miscellaneous utilities.  */
1656
1657/* Nonzero if CODE, a comparison, is suitable for use in v9 conditional move
1658   or branch on register contents instructions.  */
1659
1660int
1661v9_regcmp_p (enum rtx_code code)
1662{
1663  return (code == EQ || code == NE || code == GE || code == LT
1664	  || code == LE || code == GT);
1665}
1666
1667/* Nonzero if OP is a floating point constant which can
1668   be loaded into an integer register using a single
1669   sethi instruction.  */
1670
1671int
1672fp_sethi_p (rtx op)
1673{
1674  if (GET_CODE (op) == CONST_DOUBLE)
1675    {
1676      REAL_VALUE_TYPE r;
1677      long i;
1678
1679      REAL_VALUE_FROM_CONST_DOUBLE (r, op);
1680      REAL_VALUE_TO_TARGET_SINGLE (r, i);
1681      return !SPARC_SIMM13_P (i) && SPARC_SETHI_P (i);
1682    }
1683
1684  return 0;
1685}
1686
1687/* Nonzero if OP is a floating point constant which can
1688   be loaded into an integer register using a single
1689   mov instruction.  */
1690
1691int
1692fp_mov_p (rtx op)
1693{
1694  if (GET_CODE (op) == CONST_DOUBLE)
1695    {
1696      REAL_VALUE_TYPE r;
1697      long i;
1698
1699      REAL_VALUE_FROM_CONST_DOUBLE (r, op);
1700      REAL_VALUE_TO_TARGET_SINGLE (r, i);
1701      return SPARC_SIMM13_P (i);
1702    }
1703
1704  return 0;
1705}
1706
1707/* Nonzero if OP is a floating point constant which can
1708   be loaded into an integer register using a high/losum
1709   instruction sequence.  */
1710
1711int
1712fp_high_losum_p (rtx op)
1713{
1714  /* The constraints calling this should only be in
1715     SFmode move insns, so any constant which cannot
1716     be moved using a single insn will do.  */
1717  if (GET_CODE (op) == CONST_DOUBLE)
1718    {
1719      REAL_VALUE_TYPE r;
1720      long i;
1721
1722      REAL_VALUE_FROM_CONST_DOUBLE (r, op);
1723      REAL_VALUE_TO_TARGET_SINGLE (r, i);
1724      return !SPARC_SIMM13_P (i) && !SPARC_SETHI_P (i);
1725    }
1726
1727  return 0;
1728}
1729
1730/* Return true if the address of LABEL can be loaded by means of the
1731   mov{si,di}_pic_label_ref patterns in PIC mode.  */
1732
1733static bool
1734can_use_mov_pic_label_ref (rtx label)
1735{
1736  /* VxWorks does not impose a fixed gap between segments; the run-time
1737     gap can be different from the object-file gap.  We therefore can't
1738     assume X - _GLOBAL_OFFSET_TABLE_ is a link-time constant unless we
1739     are absolutely sure that X is in the same segment as the GOT.
1740     Unfortunately, the flexibility of linker scripts means that we
1741     can't be sure of that in general, so assume that GOT-relative
1742     accesses are never valid on VxWorks.  */
1743  if (TARGET_VXWORKS_RTP)
1744    return false;
1745
1746  /* Similarly, if the label is non-local, it might end up being placed
1747     in a different section than the current one; now mov_pic_label_ref
1748     requires the label and the code to be in the same section.  */
1749  if (LABEL_REF_NONLOCAL_P (label))
1750    return false;
1751
1752  /* Finally, if we are reordering basic blocks and partition into hot
1753     and cold sections, this might happen for any label.  */
1754  if (flag_reorder_blocks_and_partition)
1755    return false;
1756
1757  return true;
1758}
1759
1760/* Expand a move instruction.  Return true if all work is done.  */
1761
1762bool
1763sparc_expand_move (machine_mode mode, rtx *operands)
1764{
1765  /* Handle sets of MEM first.  */
1766  if (GET_CODE (operands[0]) == MEM)
1767    {
1768      /* 0 is a register (or a pair of registers) on SPARC.  */
1769      if (register_or_zero_operand (operands[1], mode))
1770	return false;
1771
1772      if (!reload_in_progress)
1773	{
1774	  operands[0] = validize_mem (operands[0]);
1775	  operands[1] = force_reg (mode, operands[1]);
1776	}
1777    }
1778
1779  /* Fixup TLS cases.  */
1780  if (TARGET_HAVE_TLS
1781      && CONSTANT_P (operands[1])
1782      && sparc_tls_referenced_p (operands [1]))
1783    {
1784      operands[1] = sparc_legitimize_tls_address (operands[1]);
1785      return false;
1786    }
1787
1788  /* Fixup PIC cases.  */
1789  if (flag_pic && CONSTANT_P (operands[1]))
1790    {
1791      if (pic_address_needs_scratch (operands[1]))
1792	operands[1] = sparc_legitimize_pic_address (operands[1], NULL_RTX);
1793
1794      /* We cannot use the mov{si,di}_pic_label_ref patterns in all cases.  */
1795      if (GET_CODE (operands[1]) == LABEL_REF
1796	  && can_use_mov_pic_label_ref (operands[1]))
1797	{
1798	  if (mode == SImode)
1799	    {
1800	      emit_insn (gen_movsi_pic_label_ref (operands[0], operands[1]));
1801	      return true;
1802	    }
1803
1804	  if (mode == DImode)
1805	    {
1806	      gcc_assert (TARGET_ARCH64);
1807	      emit_insn (gen_movdi_pic_label_ref (operands[0], operands[1]));
1808	      return true;
1809	    }
1810	}
1811
1812      if (symbolic_operand (operands[1], mode))
1813	{
1814	  operands[1]
1815	    = sparc_legitimize_pic_address (operands[1],
1816					    reload_in_progress
1817					    ? operands[0] : NULL_RTX);
1818	  return false;
1819	}
1820    }
1821
1822  /* If we are trying to toss an integer constant into FP registers,
1823     or loading a FP or vector constant, force it into memory.  */
1824  if (CONSTANT_P (operands[1])
1825      && REG_P (operands[0])
1826      && (SPARC_FP_REG_P (REGNO (operands[0]))
1827	  || SCALAR_FLOAT_MODE_P (mode)
1828	  || VECTOR_MODE_P (mode)))
1829    {
1830      /* emit_group_store will send such bogosity to us when it is
1831         not storing directly into memory.  So fix this up to avoid
1832         crashes in output_constant_pool.  */
1833      if (operands [1] == const0_rtx)
1834	operands[1] = CONST0_RTX (mode);
1835
1836      /* We can clear or set to all-ones FP registers if TARGET_VIS, and
1837	 always other regs.  */
1838      if ((TARGET_VIS || REGNO (operands[0]) < SPARC_FIRST_FP_REG)
1839	  && (const_zero_operand (operands[1], mode)
1840	      || const_all_ones_operand (operands[1], mode)))
1841	return false;
1842
1843      if (REGNO (operands[0]) < SPARC_FIRST_FP_REG
1844	  /* We are able to build any SF constant in integer registers
1845	     with at most 2 instructions.  */
1846	  && (mode == SFmode
1847	      /* And any DF constant in integer registers.  */
1848	      || (mode == DFmode
1849		  && ! can_create_pseudo_p ())))
1850	return false;
1851
1852      operands[1] = force_const_mem (mode, operands[1]);
1853      if (!reload_in_progress)
1854	operands[1] = validize_mem (operands[1]);
1855      return false;
1856    }
1857
1858  /* Accept non-constants and valid constants unmodified.  */
1859  if (!CONSTANT_P (operands[1])
1860      || GET_CODE (operands[1]) == HIGH
1861      || input_operand (operands[1], mode))
1862    return false;
1863
1864  switch (mode)
1865    {
1866    case QImode:
1867      /* All QImode constants require only one insn, so proceed.  */
1868      break;
1869
1870    case HImode:
1871    case SImode:
1872      sparc_emit_set_const32 (operands[0], operands[1]);
1873      return true;
1874
1875    case DImode:
1876      /* input_operand should have filtered out 32-bit mode.  */
1877      sparc_emit_set_const64 (operands[0], operands[1]);
1878      return true;
1879
1880    case TImode:
1881      {
1882	rtx high, low;
1883	/* TImode isn't available in 32-bit mode.  */
1884	split_double (operands[1], &high, &low);
1885	emit_insn (gen_movdi (operand_subword (operands[0], 0, 0, TImode),
1886			      high));
1887	emit_insn (gen_movdi (operand_subword (operands[0], 1, 0, TImode),
1888			      low));
1889      }
1890      return true;
1891
1892    default:
1893      gcc_unreachable ();
1894    }
1895
1896  return false;
1897}
1898
1899/* Load OP1, a 32-bit constant, into OP0, a register.
1900   We know it can't be done in one insn when we get
1901   here, the move expander guarantees this.  */
1902
1903static void
1904sparc_emit_set_const32 (rtx op0, rtx op1)
1905{
1906  machine_mode mode = GET_MODE (op0);
1907  rtx temp = op0;
1908
1909  if (can_create_pseudo_p ())
1910    temp = gen_reg_rtx (mode);
1911
1912  if (GET_CODE (op1) == CONST_INT)
1913    {
1914      gcc_assert (!small_int_operand (op1, mode)
1915		  && !const_high_operand (op1, mode));
1916
1917      /* Emit them as real moves instead of a HIGH/LO_SUM,
1918	 this way CSE can see everything and reuse intermediate
1919	 values if it wants.  */
1920      emit_insn (gen_rtx_SET (VOIDmode, temp,
1921			      GEN_INT (INTVAL (op1)
1922			        & ~(HOST_WIDE_INT)0x3ff)));
1923
1924      emit_insn (gen_rtx_SET (VOIDmode,
1925			      op0,
1926			      gen_rtx_IOR (mode, temp,
1927					   GEN_INT (INTVAL (op1) & 0x3ff))));
1928    }
1929  else
1930    {
1931      /* A symbol, emit in the traditional way.  */
1932      emit_insn (gen_rtx_SET (VOIDmode, temp,
1933			      gen_rtx_HIGH (mode, op1)));
1934      emit_insn (gen_rtx_SET (VOIDmode,
1935			      op0, gen_rtx_LO_SUM (mode, temp, op1)));
1936    }
1937}
1938
1939/* Load OP1, a symbolic 64-bit constant, into OP0, a DImode register.
1940   If TEMP is nonzero, we are forbidden to use any other scratch
1941   registers.  Otherwise, we are allowed to generate them as needed.
1942
1943   Note that TEMP may have TImode if the code model is TARGET_CM_MEDANY
1944   or TARGET_CM_EMBMEDANY (see the reload_indi and reload_outdi patterns).  */
1945
1946void
1947sparc_emit_set_symbolic_const64 (rtx op0, rtx op1, rtx temp)
1948{
1949  rtx temp1, temp2, temp3, temp4, temp5;
1950  rtx ti_temp = 0;
1951
1952  if (temp && GET_MODE (temp) == TImode)
1953    {
1954      ti_temp = temp;
1955      temp = gen_rtx_REG (DImode, REGNO (temp));
1956    }
1957
1958  /* SPARC-V9 code-model support.  */
1959  switch (sparc_cmodel)
1960    {
1961    case CM_MEDLOW:
1962      /* The range spanned by all instructions in the object is less
1963	 than 2^31 bytes (2GB) and the distance from any instruction
1964	 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
1965	 than 2^31 bytes (2GB).
1966
1967	 The executable must be in the low 4TB of the virtual address
1968	 space.
1969
1970	 sethi	%hi(symbol), %temp1
1971	 or	%temp1, %lo(symbol), %reg  */
1972      if (temp)
1973	temp1 = temp;  /* op0 is allowed.  */
1974      else
1975	temp1 = gen_reg_rtx (DImode);
1976
1977      emit_insn (gen_rtx_SET (VOIDmode, temp1, gen_rtx_HIGH (DImode, op1)));
1978      emit_insn (gen_rtx_SET (VOIDmode, op0, gen_rtx_LO_SUM (DImode, temp1, op1)));
1979      break;
1980
1981    case CM_MEDMID:
1982      /* The range spanned by all instructions in the object is less
1983	 than 2^31 bytes (2GB) and the distance from any instruction
1984	 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
1985	 than 2^31 bytes (2GB).
1986
1987	 The executable must be in the low 16TB of the virtual address
1988	 space.
1989
1990	 sethi	%h44(symbol), %temp1
1991	 or	%temp1, %m44(symbol), %temp2
1992	 sllx	%temp2, 12, %temp3
1993	 or	%temp3, %l44(symbol), %reg  */
1994      if (temp)
1995	{
1996	  temp1 = op0;
1997	  temp2 = op0;
1998	  temp3 = temp;  /* op0 is allowed.  */
1999	}
2000      else
2001	{
2002	  temp1 = gen_reg_rtx (DImode);
2003	  temp2 = gen_reg_rtx (DImode);
2004	  temp3 = gen_reg_rtx (DImode);
2005	}
2006
2007      emit_insn (gen_seth44 (temp1, op1));
2008      emit_insn (gen_setm44 (temp2, temp1, op1));
2009      emit_insn (gen_rtx_SET (VOIDmode, temp3,
2010			      gen_rtx_ASHIFT (DImode, temp2, GEN_INT (12))));
2011      emit_insn (gen_setl44 (op0, temp3, op1));
2012      break;
2013
2014    case CM_MEDANY:
2015      /* The range spanned by all instructions in the object is less
2016	 than 2^31 bytes (2GB) and the distance from any instruction
2017	 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
2018	 than 2^31 bytes (2GB).
2019
2020	 The executable can be placed anywhere in the virtual address
2021	 space.
2022
2023	 sethi	%hh(symbol), %temp1
2024	 sethi	%lm(symbol), %temp2
2025	 or	%temp1, %hm(symbol), %temp3
2026	 sllx	%temp3, 32, %temp4
2027	 or	%temp4, %temp2, %temp5
2028	 or	%temp5, %lo(symbol), %reg  */
2029      if (temp)
2030	{
2031	  /* It is possible that one of the registers we got for operands[2]
2032	     might coincide with that of operands[0] (which is why we made
2033	     it TImode).  Pick the other one to use as our scratch.  */
2034	  if (rtx_equal_p (temp, op0))
2035	    {
2036	      gcc_assert (ti_temp);
2037	      temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
2038	    }
2039	  temp1 = op0;
2040	  temp2 = temp;  /* op0 is _not_ allowed, see above.  */
2041	  temp3 = op0;
2042	  temp4 = op0;
2043	  temp5 = op0;
2044	}
2045      else
2046	{
2047	  temp1 = gen_reg_rtx (DImode);
2048	  temp2 = gen_reg_rtx (DImode);
2049	  temp3 = gen_reg_rtx (DImode);
2050	  temp4 = gen_reg_rtx (DImode);
2051	  temp5 = gen_reg_rtx (DImode);
2052	}
2053
2054      emit_insn (gen_sethh (temp1, op1));
2055      emit_insn (gen_setlm (temp2, op1));
2056      emit_insn (gen_sethm (temp3, temp1, op1));
2057      emit_insn (gen_rtx_SET (VOIDmode, temp4,
2058			      gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
2059      emit_insn (gen_rtx_SET (VOIDmode, temp5,
2060			      gen_rtx_PLUS (DImode, temp4, temp2)));
2061      emit_insn (gen_setlo (op0, temp5, op1));
2062      break;
2063
2064    case CM_EMBMEDANY:
2065      /* Old old old backwards compatibility kruft here.
2066	 Essentially it is MEDLOW with a fixed 64-bit
2067	 virtual base added to all data segment addresses.
2068	 Text-segment stuff is computed like MEDANY, we can't
2069	 reuse the code above because the relocation knobs
2070	 look different.
2071
2072	 Data segment:	sethi	%hi(symbol), %temp1
2073			add	%temp1, EMBMEDANY_BASE_REG, %temp2
2074			or	%temp2, %lo(symbol), %reg  */
2075      if (data_segment_operand (op1, GET_MODE (op1)))
2076	{
2077	  if (temp)
2078	    {
2079	      temp1 = temp;  /* op0 is allowed.  */
2080	      temp2 = op0;
2081	    }
2082	  else
2083	    {
2084	      temp1 = gen_reg_rtx (DImode);
2085	      temp2 = gen_reg_rtx (DImode);
2086	    }
2087
2088	  emit_insn (gen_embmedany_sethi (temp1, op1));
2089	  emit_insn (gen_embmedany_brsum (temp2, temp1));
2090	  emit_insn (gen_embmedany_losum (op0, temp2, op1));
2091	}
2092
2093      /* Text segment:	sethi	%uhi(symbol), %temp1
2094			sethi	%hi(symbol), %temp2
2095			or	%temp1, %ulo(symbol), %temp3
2096			sllx	%temp3, 32, %temp4
2097			or	%temp4, %temp2, %temp5
2098			or	%temp5, %lo(symbol), %reg  */
2099      else
2100	{
2101	  if (temp)
2102	    {
2103	      /* It is possible that one of the registers we got for operands[2]
2104		 might coincide with that of operands[0] (which is why we made
2105		 it TImode).  Pick the other one to use as our scratch.  */
2106	      if (rtx_equal_p (temp, op0))
2107		{
2108		  gcc_assert (ti_temp);
2109		  temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
2110		}
2111	      temp1 = op0;
2112	      temp2 = temp;  /* op0 is _not_ allowed, see above.  */
2113	      temp3 = op0;
2114	      temp4 = op0;
2115	      temp5 = op0;
2116	    }
2117	  else
2118	    {
2119	      temp1 = gen_reg_rtx (DImode);
2120	      temp2 = gen_reg_rtx (DImode);
2121	      temp3 = gen_reg_rtx (DImode);
2122	      temp4 = gen_reg_rtx (DImode);
2123	      temp5 = gen_reg_rtx (DImode);
2124	    }
2125
2126	  emit_insn (gen_embmedany_textuhi (temp1, op1));
2127	  emit_insn (gen_embmedany_texthi  (temp2, op1));
2128	  emit_insn (gen_embmedany_textulo (temp3, temp1, op1));
2129	  emit_insn (gen_rtx_SET (VOIDmode, temp4,
2130				  gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
2131	  emit_insn (gen_rtx_SET (VOIDmode, temp5,
2132				  gen_rtx_PLUS (DImode, temp4, temp2)));
2133	  emit_insn (gen_embmedany_textlo  (op0, temp5, op1));
2134	}
2135      break;
2136
2137    default:
2138      gcc_unreachable ();
2139    }
2140}
2141
2142#if HOST_BITS_PER_WIDE_INT == 32
2143static void
2144sparc_emit_set_const64 (rtx op0 ATTRIBUTE_UNUSED, rtx op1 ATTRIBUTE_UNUSED)
2145{
2146  gcc_unreachable ();
2147}
2148#else
2149/* These avoid problems when cross compiling.  If we do not
2150   go through all this hair then the optimizer will see
2151   invalid REG_EQUAL notes or in some cases none at all.  */
2152static rtx gen_safe_HIGH64 (rtx, HOST_WIDE_INT);
2153static rtx gen_safe_SET64 (rtx, HOST_WIDE_INT);
2154static rtx gen_safe_OR64 (rtx, HOST_WIDE_INT);
2155static rtx gen_safe_XOR64 (rtx, HOST_WIDE_INT);
2156
2157/* The optimizer is not to assume anything about exactly
2158   which bits are set for a HIGH, they are unspecified.
2159   Unfortunately this leads to many missed optimizations
2160   during CSE.  We mask out the non-HIGH bits, and matches
2161   a plain movdi, to alleviate this problem.  */
2162static rtx
2163gen_safe_HIGH64 (rtx dest, HOST_WIDE_INT val)
2164{
2165  return gen_rtx_SET (VOIDmode, dest, GEN_INT (val & ~(HOST_WIDE_INT)0x3ff));
2166}
2167
2168static rtx
2169gen_safe_SET64 (rtx dest, HOST_WIDE_INT val)
2170{
2171  return gen_rtx_SET (VOIDmode, dest, GEN_INT (val));
2172}
2173
2174static rtx
2175gen_safe_OR64 (rtx src, HOST_WIDE_INT val)
2176{
2177  return gen_rtx_IOR (DImode, src, GEN_INT (val));
2178}
2179
2180static rtx
2181gen_safe_XOR64 (rtx src, HOST_WIDE_INT val)
2182{
2183  return gen_rtx_XOR (DImode, src, GEN_INT (val));
2184}
2185
2186/* Worker routines for 64-bit constant formation on arch64.
2187   One of the key things to be doing in these emissions is
2188   to create as many temp REGs as possible.  This makes it
2189   possible for half-built constants to be used later when
2190   such values are similar to something required later on.
2191   Without doing this, the optimizer cannot see such
2192   opportunities.  */
2193
2194static void sparc_emit_set_const64_quick1 (rtx, rtx,
2195					   unsigned HOST_WIDE_INT, int);
2196
2197static void
2198sparc_emit_set_const64_quick1 (rtx op0, rtx temp,
2199			       unsigned HOST_WIDE_INT low_bits, int is_neg)
2200{
2201  unsigned HOST_WIDE_INT high_bits;
2202
2203  if (is_neg)
2204    high_bits = (~low_bits) & 0xffffffff;
2205  else
2206    high_bits = low_bits;
2207
2208  emit_insn (gen_safe_HIGH64 (temp, high_bits));
2209  if (!is_neg)
2210    {
2211      emit_insn (gen_rtx_SET (VOIDmode, op0,
2212			      gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2213    }
2214  else
2215    {
2216      /* If we are XOR'ing with -1, then we should emit a one's complement
2217	 instead.  This way the combiner will notice logical operations
2218	 such as ANDN later on and substitute.  */
2219      if ((low_bits & 0x3ff) == 0x3ff)
2220	{
2221	  emit_insn (gen_rtx_SET (VOIDmode, op0,
2222				  gen_rtx_NOT (DImode, temp)));
2223	}
2224      else
2225	{
2226	  emit_insn (gen_rtx_SET (VOIDmode, op0,
2227				  gen_safe_XOR64 (temp,
2228						  (-(HOST_WIDE_INT)0x400
2229						   | (low_bits & 0x3ff)))));
2230	}
2231    }
2232}
2233
2234static void sparc_emit_set_const64_quick2 (rtx, rtx, unsigned HOST_WIDE_INT,
2235					   unsigned HOST_WIDE_INT, int);
2236
2237static void
2238sparc_emit_set_const64_quick2 (rtx op0, rtx temp,
2239			       unsigned HOST_WIDE_INT high_bits,
2240			       unsigned HOST_WIDE_INT low_immediate,
2241			       int shift_count)
2242{
2243  rtx temp2 = op0;
2244
2245  if ((high_bits & 0xfffffc00) != 0)
2246    {
2247      emit_insn (gen_safe_HIGH64 (temp, high_bits));
2248      if ((high_bits & ~0xfffffc00) != 0)
2249	emit_insn (gen_rtx_SET (VOIDmode, op0,
2250				gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2251      else
2252	temp2 = temp;
2253    }
2254  else
2255    {
2256      emit_insn (gen_safe_SET64 (temp, high_bits));
2257      temp2 = temp;
2258    }
2259
2260  /* Now shift it up into place.  */
2261  emit_insn (gen_rtx_SET (VOIDmode, op0,
2262			  gen_rtx_ASHIFT (DImode, temp2,
2263					  GEN_INT (shift_count))));
2264
2265  /* If there is a low immediate part piece, finish up by
2266     putting that in as well.  */
2267  if (low_immediate != 0)
2268    emit_insn (gen_rtx_SET (VOIDmode, op0,
2269			    gen_safe_OR64 (op0, low_immediate)));
2270}
2271
2272static void sparc_emit_set_const64_longway (rtx, rtx, unsigned HOST_WIDE_INT,
2273					    unsigned HOST_WIDE_INT);
2274
2275/* Full 64-bit constant decomposition.  Even though this is the
2276   'worst' case, we still optimize a few things away.  */
2277static void
2278sparc_emit_set_const64_longway (rtx op0, rtx temp,
2279				unsigned HOST_WIDE_INT high_bits,
2280				unsigned HOST_WIDE_INT low_bits)
2281{
2282  rtx sub_temp = op0;
2283
2284  if (can_create_pseudo_p ())
2285    sub_temp = gen_reg_rtx (DImode);
2286
2287  if ((high_bits & 0xfffffc00) != 0)
2288    {
2289      emit_insn (gen_safe_HIGH64 (temp, high_bits));
2290      if ((high_bits & ~0xfffffc00) != 0)
2291	emit_insn (gen_rtx_SET (VOIDmode,
2292				sub_temp,
2293				gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2294      else
2295	sub_temp = temp;
2296    }
2297  else
2298    {
2299      emit_insn (gen_safe_SET64 (temp, high_bits));
2300      sub_temp = temp;
2301    }
2302
2303  if (can_create_pseudo_p ())
2304    {
2305      rtx temp2 = gen_reg_rtx (DImode);
2306      rtx temp3 = gen_reg_rtx (DImode);
2307      rtx temp4 = gen_reg_rtx (DImode);
2308
2309      emit_insn (gen_rtx_SET (VOIDmode, temp4,
2310			      gen_rtx_ASHIFT (DImode, sub_temp,
2311					      GEN_INT (32))));
2312
2313      emit_insn (gen_safe_HIGH64 (temp2, low_bits));
2314      if ((low_bits & ~0xfffffc00) != 0)
2315	{
2316	  emit_insn (gen_rtx_SET (VOIDmode, temp3,
2317				  gen_safe_OR64 (temp2, (low_bits & 0x3ff))));
2318	  emit_insn (gen_rtx_SET (VOIDmode, op0,
2319				  gen_rtx_PLUS (DImode, temp4, temp3)));
2320	}
2321      else
2322	{
2323	  emit_insn (gen_rtx_SET (VOIDmode, op0,
2324				  gen_rtx_PLUS (DImode, temp4, temp2)));
2325	}
2326    }
2327  else
2328    {
2329      rtx low1 = GEN_INT ((low_bits >> (32 - 12))          & 0xfff);
2330      rtx low2 = GEN_INT ((low_bits >> (32 - 12 - 12))     & 0xfff);
2331      rtx low3 = GEN_INT ((low_bits >> (32 - 12 - 12 - 8)) & 0x0ff);
2332      int to_shift = 12;
2333
2334      /* We are in the middle of reload, so this is really
2335	 painful.  However we do still make an attempt to
2336	 avoid emitting truly stupid code.  */
2337      if (low1 != const0_rtx)
2338	{
2339	  emit_insn (gen_rtx_SET (VOIDmode, op0,
2340				  gen_rtx_ASHIFT (DImode, sub_temp,
2341						  GEN_INT (to_shift))));
2342	  emit_insn (gen_rtx_SET (VOIDmode, op0,
2343				  gen_rtx_IOR (DImode, op0, low1)));
2344	  sub_temp = op0;
2345	  to_shift = 12;
2346	}
2347      else
2348	{
2349	  to_shift += 12;
2350	}
2351      if (low2 != const0_rtx)
2352	{
2353	  emit_insn (gen_rtx_SET (VOIDmode, op0,
2354				  gen_rtx_ASHIFT (DImode, sub_temp,
2355						  GEN_INT (to_shift))));
2356	  emit_insn (gen_rtx_SET (VOIDmode, op0,
2357				  gen_rtx_IOR (DImode, op0, low2)));
2358	  sub_temp = op0;
2359	  to_shift = 8;
2360	}
2361      else
2362	{
2363	  to_shift += 8;
2364	}
2365      emit_insn (gen_rtx_SET (VOIDmode, op0,
2366			      gen_rtx_ASHIFT (DImode, sub_temp,
2367					      GEN_INT (to_shift))));
2368      if (low3 != const0_rtx)
2369	emit_insn (gen_rtx_SET (VOIDmode, op0,
2370				gen_rtx_IOR (DImode, op0, low3)));
2371      /* phew...  */
2372    }
2373}
2374
2375/* Analyze a 64-bit constant for certain properties.  */
2376static void analyze_64bit_constant (unsigned HOST_WIDE_INT,
2377				    unsigned HOST_WIDE_INT,
2378				    int *, int *, int *);
2379
2380static void
2381analyze_64bit_constant (unsigned HOST_WIDE_INT high_bits,
2382			unsigned HOST_WIDE_INT low_bits,
2383			int *hbsp, int *lbsp, int *abbasp)
2384{
2385  int lowest_bit_set, highest_bit_set, all_bits_between_are_set;
2386  int i;
2387
2388  lowest_bit_set = highest_bit_set = -1;
2389  i = 0;
2390  do
2391    {
2392      if ((lowest_bit_set == -1)
2393	  && ((low_bits >> i) & 1))
2394	lowest_bit_set = i;
2395      if ((highest_bit_set == -1)
2396	  && ((high_bits >> (32 - i - 1)) & 1))
2397	highest_bit_set = (64 - i - 1);
2398    }
2399  while (++i < 32
2400	 && ((highest_bit_set == -1)
2401	     || (lowest_bit_set == -1)));
2402  if (i == 32)
2403    {
2404      i = 0;
2405      do
2406	{
2407	  if ((lowest_bit_set == -1)
2408	      && ((high_bits >> i) & 1))
2409	    lowest_bit_set = i + 32;
2410	  if ((highest_bit_set == -1)
2411	      && ((low_bits >> (32 - i - 1)) & 1))
2412	    highest_bit_set = 32 - i - 1;
2413	}
2414      while (++i < 32
2415	     && ((highest_bit_set == -1)
2416		 || (lowest_bit_set == -1)));
2417    }
2418  /* If there are no bits set this should have gone out
2419     as one instruction!  */
2420  gcc_assert (lowest_bit_set != -1 && highest_bit_set != -1);
2421  all_bits_between_are_set = 1;
2422  for (i = lowest_bit_set; i <= highest_bit_set; i++)
2423    {
2424      if (i < 32)
2425	{
2426	  if ((low_bits & (1 << i)) != 0)
2427	    continue;
2428	}
2429      else
2430	{
2431	  if ((high_bits & (1 << (i - 32))) != 0)
2432	    continue;
2433	}
2434      all_bits_between_are_set = 0;
2435      break;
2436    }
2437  *hbsp = highest_bit_set;
2438  *lbsp = lowest_bit_set;
2439  *abbasp = all_bits_between_are_set;
2440}
2441
2442static int const64_is_2insns (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT);
2443
2444static int
2445const64_is_2insns (unsigned HOST_WIDE_INT high_bits,
2446		   unsigned HOST_WIDE_INT low_bits)
2447{
2448  int highest_bit_set, lowest_bit_set, all_bits_between_are_set;
2449
2450  if (high_bits == 0
2451      || high_bits == 0xffffffff)
2452    return 1;
2453
2454  analyze_64bit_constant (high_bits, low_bits,
2455			  &highest_bit_set, &lowest_bit_set,
2456			  &all_bits_between_are_set);
2457
2458  if ((highest_bit_set == 63
2459       || lowest_bit_set == 0)
2460      && all_bits_between_are_set != 0)
2461    return 1;
2462
2463  if ((highest_bit_set - lowest_bit_set) < 21)
2464    return 1;
2465
2466  return 0;
2467}
2468
2469static unsigned HOST_WIDE_INT create_simple_focus_bits (unsigned HOST_WIDE_INT,
2470							unsigned HOST_WIDE_INT,
2471							int, int);
2472
2473static unsigned HOST_WIDE_INT
2474create_simple_focus_bits (unsigned HOST_WIDE_INT high_bits,
2475			  unsigned HOST_WIDE_INT low_bits,
2476			  int lowest_bit_set, int shift)
2477{
2478  HOST_WIDE_INT hi, lo;
2479
2480  if (lowest_bit_set < 32)
2481    {
2482      lo = (low_bits >> lowest_bit_set) << shift;
2483      hi = ((high_bits << (32 - lowest_bit_set)) << shift);
2484    }
2485  else
2486    {
2487      lo = 0;
2488      hi = ((high_bits >> (lowest_bit_set - 32)) << shift);
2489    }
2490  gcc_assert (! (hi & lo));
2491  return (hi | lo);
2492}
2493
2494/* Here we are sure to be arch64 and this is an integer constant
2495   being loaded into a register.  Emit the most efficient
2496   insn sequence possible.  Detection of all the 1-insn cases
2497   has been done already.  */
2498static void
2499sparc_emit_set_const64 (rtx op0, rtx op1)
2500{
2501  unsigned HOST_WIDE_INT high_bits, low_bits;
2502  int lowest_bit_set, highest_bit_set;
2503  int all_bits_between_are_set;
2504  rtx temp = 0;
2505
2506  /* Sanity check that we know what we are working with.  */
2507  gcc_assert (TARGET_ARCH64
2508	      && (GET_CODE (op0) == SUBREG
2509		  || (REG_P (op0) && ! SPARC_FP_REG_P (REGNO (op0)))));
2510
2511  if (! can_create_pseudo_p ())
2512    temp = op0;
2513
2514  if (GET_CODE (op1) != CONST_INT)
2515    {
2516      sparc_emit_set_symbolic_const64 (op0, op1, temp);
2517      return;
2518    }
2519
2520  if (! temp)
2521    temp = gen_reg_rtx (DImode);
2522
2523  high_bits = ((INTVAL (op1) >> 32) & 0xffffffff);
2524  low_bits = (INTVAL (op1) & 0xffffffff);
2525
2526  /* low_bits	bits 0  --> 31
2527     high_bits	bits 32 --> 63  */
2528
2529  analyze_64bit_constant (high_bits, low_bits,
2530			  &highest_bit_set, &lowest_bit_set,
2531			  &all_bits_between_are_set);
2532
2533  /* First try for a 2-insn sequence.  */
2534
2535  /* These situations are preferred because the optimizer can
2536   * do more things with them:
2537   * 1) mov	-1, %reg
2538   *    sllx	%reg, shift, %reg
2539   * 2) mov	-1, %reg
2540   *    srlx	%reg, shift, %reg
2541   * 3) mov	some_small_const, %reg
2542   *    sllx	%reg, shift, %reg
2543   */
2544  if (((highest_bit_set == 63
2545	|| lowest_bit_set == 0)
2546       && all_bits_between_are_set != 0)
2547      || ((highest_bit_set - lowest_bit_set) < 12))
2548    {
2549      HOST_WIDE_INT the_const = -1;
2550      int shift = lowest_bit_set;
2551
2552      if ((highest_bit_set != 63
2553	   && lowest_bit_set != 0)
2554	  || all_bits_between_are_set == 0)
2555	{
2556	  the_const =
2557	    create_simple_focus_bits (high_bits, low_bits,
2558				      lowest_bit_set, 0);
2559	}
2560      else if (lowest_bit_set == 0)
2561	shift = -(63 - highest_bit_set);
2562
2563      gcc_assert (SPARC_SIMM13_P (the_const));
2564      gcc_assert (shift != 0);
2565
2566      emit_insn (gen_safe_SET64 (temp, the_const));
2567      if (shift > 0)
2568	emit_insn (gen_rtx_SET (VOIDmode,
2569				op0,
2570				gen_rtx_ASHIFT (DImode,
2571						temp,
2572						GEN_INT (shift))));
2573      else if (shift < 0)
2574	emit_insn (gen_rtx_SET (VOIDmode,
2575				op0,
2576				gen_rtx_LSHIFTRT (DImode,
2577						  temp,
2578						  GEN_INT (-shift))));
2579      return;
2580    }
2581
2582  /* Now a range of 22 or less bits set somewhere.
2583   * 1) sethi	%hi(focus_bits), %reg
2584   *    sllx	%reg, shift, %reg
2585   * 2) sethi	%hi(focus_bits), %reg
2586   *    srlx	%reg, shift, %reg
2587   */
2588  if ((highest_bit_set - lowest_bit_set) < 21)
2589    {
2590      unsigned HOST_WIDE_INT focus_bits =
2591	create_simple_focus_bits (high_bits, low_bits,
2592				  lowest_bit_set, 10);
2593
2594      gcc_assert (SPARC_SETHI_P (focus_bits));
2595      gcc_assert (lowest_bit_set != 10);
2596
2597      emit_insn (gen_safe_HIGH64 (temp, focus_bits));
2598
2599      /* If lowest_bit_set == 10 then a sethi alone could have done it.  */
2600      if (lowest_bit_set < 10)
2601	emit_insn (gen_rtx_SET (VOIDmode,
2602				op0,
2603				gen_rtx_LSHIFTRT (DImode, temp,
2604						  GEN_INT (10 - lowest_bit_set))));
2605      else if (lowest_bit_set > 10)
2606	emit_insn (gen_rtx_SET (VOIDmode,
2607				op0,
2608				gen_rtx_ASHIFT (DImode, temp,
2609						GEN_INT (lowest_bit_set - 10))));
2610      return;
2611    }
2612
2613  /* 1) sethi	%hi(low_bits), %reg
2614   *    or	%reg, %lo(low_bits), %reg
2615   * 2) sethi	%hi(~low_bits), %reg
2616   *	xor	%reg, %lo(-0x400 | (low_bits & 0x3ff)), %reg
2617   */
2618  if (high_bits == 0
2619      || high_bits == 0xffffffff)
2620    {
2621      sparc_emit_set_const64_quick1 (op0, temp, low_bits,
2622				     (high_bits == 0xffffffff));
2623      return;
2624    }
2625
2626  /* Now, try 3-insn sequences.  */
2627
2628  /* 1) sethi	%hi(high_bits), %reg
2629   *    or	%reg, %lo(high_bits), %reg
2630   *    sllx	%reg, 32, %reg
2631   */
2632  if (low_bits == 0)
2633    {
2634      sparc_emit_set_const64_quick2 (op0, temp, high_bits, 0, 32);
2635      return;
2636    }
2637
2638  /* We may be able to do something quick
2639     when the constant is negated, so try that.  */
2640  if (const64_is_2insns ((~high_bits) & 0xffffffff,
2641			 (~low_bits) & 0xfffffc00))
2642    {
2643      /* NOTE: The trailing bits get XOR'd so we need the
2644	 non-negated bits, not the negated ones.  */
2645      unsigned HOST_WIDE_INT trailing_bits = low_bits & 0x3ff;
2646
2647      if ((((~high_bits) & 0xffffffff) == 0
2648	   && ((~low_bits) & 0x80000000) == 0)
2649	  || (((~high_bits) & 0xffffffff) == 0xffffffff
2650	      && ((~low_bits) & 0x80000000) != 0))
2651	{
2652	  unsigned HOST_WIDE_INT fast_int = (~low_bits & 0xffffffff);
2653
2654	  if ((SPARC_SETHI_P (fast_int)
2655	       && (~high_bits & 0xffffffff) == 0)
2656	      || SPARC_SIMM13_P (fast_int))
2657	    emit_insn (gen_safe_SET64 (temp, fast_int));
2658	  else
2659	    sparc_emit_set_const64 (temp, GEN_INT (fast_int));
2660	}
2661      else
2662	{
2663	  rtx negated_const;
2664	  negated_const = GEN_INT (((~low_bits) & 0xfffffc00) |
2665				   (((HOST_WIDE_INT)((~high_bits) & 0xffffffff))<<32));
2666	  sparc_emit_set_const64 (temp, negated_const);
2667	}
2668
2669      /* If we are XOR'ing with -1, then we should emit a one's complement
2670	 instead.  This way the combiner will notice logical operations
2671	 such as ANDN later on and substitute.  */
2672      if (trailing_bits == 0x3ff)
2673	{
2674	  emit_insn (gen_rtx_SET (VOIDmode, op0,
2675				  gen_rtx_NOT (DImode, temp)));
2676	}
2677      else
2678	{
2679	  emit_insn (gen_rtx_SET (VOIDmode,
2680				  op0,
2681				  gen_safe_XOR64 (temp,
2682						  (-0x400 | trailing_bits))));
2683	}
2684      return;
2685    }
2686
2687  /* 1) sethi	%hi(xxx), %reg
2688   *    or	%reg, %lo(xxx), %reg
2689   *	sllx	%reg, yyy, %reg
2690   *
2691   * ??? This is just a generalized version of the low_bits==0
2692   * thing above, FIXME...
2693   */
2694  if ((highest_bit_set - lowest_bit_set) < 32)
2695    {
2696      unsigned HOST_WIDE_INT focus_bits =
2697	create_simple_focus_bits (high_bits, low_bits,
2698				  lowest_bit_set, 0);
2699
2700      /* We can't get here in this state.  */
2701      gcc_assert (highest_bit_set >= 32 && lowest_bit_set < 32);
2702
2703      /* So what we know is that the set bits straddle the
2704	 middle of the 64-bit word.  */
2705      sparc_emit_set_const64_quick2 (op0, temp,
2706				     focus_bits, 0,
2707				     lowest_bit_set);
2708      return;
2709    }
2710
2711  /* 1) sethi	%hi(high_bits), %reg
2712   *    or	%reg, %lo(high_bits), %reg
2713   *    sllx	%reg, 32, %reg
2714   *	or	%reg, low_bits, %reg
2715   */
2716  if (SPARC_SIMM13_P(low_bits)
2717      && ((int)low_bits > 0))
2718    {
2719      sparc_emit_set_const64_quick2 (op0, temp, high_bits, low_bits, 32);
2720      return;
2721    }
2722
2723  /* The easiest way when all else fails, is full decomposition.  */
2724  sparc_emit_set_const64_longway (op0, temp, high_bits, low_bits);
2725}
2726#endif /* HOST_BITS_PER_WIDE_INT == 32 */
2727
2728/* Given a comparison code (EQ, NE, etc.) and the first operand of a COMPARE,
2729   return the mode to be used for the comparison.  For floating-point,
2730   CCFP[E]mode is used.  CC_NOOVmode should be used when the first operand
2731   is a PLUS, MINUS, NEG, or ASHIFT.  CCmode should be used when no special
2732   processing is needed.  */
2733
2734machine_mode
2735select_cc_mode (enum rtx_code op, rtx x, rtx y ATTRIBUTE_UNUSED)
2736{
2737  if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2738    {
2739      switch (op)
2740	{
2741	case EQ:
2742	case NE:
2743	case UNORDERED:
2744	case ORDERED:
2745	case UNLT:
2746	case UNLE:
2747	case UNGT:
2748	case UNGE:
2749	case UNEQ:
2750	case LTGT:
2751	  return CCFPmode;
2752
2753	case LT:
2754	case LE:
2755	case GT:
2756	case GE:
2757	  return CCFPEmode;
2758
2759	default:
2760	  gcc_unreachable ();
2761	}
2762    }
2763  else if (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
2764	   || GET_CODE (x) == NEG || GET_CODE (x) == ASHIFT)
2765    {
2766      if (TARGET_ARCH64 && GET_MODE (x) == DImode)
2767	return CCX_NOOVmode;
2768      else
2769	return CC_NOOVmode;
2770    }
2771  else
2772    {
2773      if (TARGET_ARCH64 && GET_MODE (x) == DImode)
2774	return CCXmode;
2775      else
2776	return CCmode;
2777    }
2778}
2779
2780/* Emit the compare insn and return the CC reg for a CODE comparison
2781   with operands X and Y.  */
2782
2783static rtx
2784gen_compare_reg_1 (enum rtx_code code, rtx x, rtx y)
2785{
2786  machine_mode mode;
2787  rtx cc_reg;
2788
2789  if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
2790    return x;
2791
2792  mode = SELECT_CC_MODE (code, x, y);
2793
2794  /* ??? We don't have movcc patterns so we cannot generate pseudo regs for the
2795     fcc regs (cse can't tell they're really call clobbered regs and will
2796     remove a duplicate comparison even if there is an intervening function
2797     call - it will then try to reload the cc reg via an int reg which is why
2798     we need the movcc patterns).  It is possible to provide the movcc
2799     patterns by using the ldxfsr/stxfsr v9 insns.  I tried it: you need two
2800     registers (say %g1,%g5) and it takes about 6 insns.  A better fix would be
2801     to tell cse that CCFPE mode registers (even pseudos) are call
2802     clobbered.  */
2803
2804  /* ??? This is an experiment.  Rather than making changes to cse which may
2805     or may not be easy/clean, we do our own cse.  This is possible because
2806     we will generate hard registers.  Cse knows they're call clobbered (it
2807     doesn't know the same thing about pseudos). If we guess wrong, no big
2808     deal, but if we win, great!  */
2809
2810  if (TARGET_V9 && GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2811#if 1 /* experiment */
2812    {
2813      int reg;
2814      /* We cycle through the registers to ensure they're all exercised.  */
2815      static int next_fcc_reg = 0;
2816      /* Previous x,y for each fcc reg.  */
2817      static rtx prev_args[4][2];
2818
2819      /* Scan prev_args for x,y.  */
2820      for (reg = 0; reg < 4; reg++)
2821	if (prev_args[reg][0] == x && prev_args[reg][1] == y)
2822	  break;
2823      if (reg == 4)
2824	{
2825	  reg = next_fcc_reg;
2826	  prev_args[reg][0] = x;
2827	  prev_args[reg][1] = y;
2828	  next_fcc_reg = (next_fcc_reg + 1) & 3;
2829	}
2830      cc_reg = gen_rtx_REG (mode, reg + SPARC_FIRST_V9_FCC_REG);
2831    }
2832#else
2833    cc_reg = gen_reg_rtx (mode);
2834#endif /* ! experiment */
2835  else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2836    cc_reg = gen_rtx_REG (mode, SPARC_FCC_REG);
2837  else
2838    cc_reg = gen_rtx_REG (mode, SPARC_ICC_REG);
2839
2840  /* We shouldn't get there for TFmode if !TARGET_HARD_QUAD.  If we do, this
2841     will only result in an unrecognizable insn so no point in asserting.  */
2842  emit_insn (gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y)));
2843
2844  return cc_reg;
2845}
2846
2847
2848/* Emit the compare insn and return the CC reg for the comparison in CMP.  */
2849
2850rtx
2851gen_compare_reg (rtx cmp)
2852{
2853  return gen_compare_reg_1 (GET_CODE (cmp), XEXP (cmp, 0), XEXP (cmp, 1));
2854}
2855
2856/* This function is used for v9 only.
2857   DEST is the target of the Scc insn.
2858   CODE is the code for an Scc's comparison.
2859   X and Y are the values we compare.
2860
2861   This function is needed to turn
2862
2863	   (set (reg:SI 110)
2864	       (gt (reg:CCX 100 %icc)
2865	           (const_int 0)))
2866   into
2867	   (set (reg:SI 110)
2868	       (gt:DI (reg:CCX 100 %icc)
2869	           (const_int 0)))
2870
2871   IE: The instruction recognizer needs to see the mode of the comparison to
2872   find the right instruction. We could use "gt:DI" right in the
2873   define_expand, but leaving it out allows us to handle DI, SI, etc.  */
2874
2875static int
2876gen_v9_scc (rtx dest, enum rtx_code compare_code, rtx x, rtx y)
2877{
2878  if (! TARGET_ARCH64
2879      && (GET_MODE (x) == DImode
2880	  || GET_MODE (dest) == DImode))
2881    return 0;
2882
2883  /* Try to use the movrCC insns.  */
2884  if (TARGET_ARCH64
2885      && GET_MODE_CLASS (GET_MODE (x)) == MODE_INT
2886      && y == const0_rtx
2887      && v9_regcmp_p (compare_code))
2888    {
2889      rtx op0 = x;
2890      rtx temp;
2891
2892      /* Special case for op0 != 0.  This can be done with one instruction if
2893	 dest == x.  */
2894
2895      if (compare_code == NE
2896	  && GET_MODE (dest) == DImode
2897	  && rtx_equal_p (op0, dest))
2898	{
2899	  emit_insn (gen_rtx_SET (VOIDmode, dest,
2900			      gen_rtx_IF_THEN_ELSE (DImode,
2901				       gen_rtx_fmt_ee (compare_code, DImode,
2902						       op0, const0_rtx),
2903				       const1_rtx,
2904				       dest)));
2905	  return 1;
2906	}
2907
2908      if (reg_overlap_mentioned_p (dest, op0))
2909	{
2910	  /* Handle the case where dest == x.
2911	     We "early clobber" the result.  */
2912	  op0 = gen_reg_rtx (GET_MODE (x));
2913	  emit_move_insn (op0, x);
2914	}
2915
2916      emit_insn (gen_rtx_SET (VOIDmode, dest, const0_rtx));
2917      if (GET_MODE (op0) != DImode)
2918	{
2919	  temp = gen_reg_rtx (DImode);
2920	  convert_move (temp, op0, 0);
2921	}
2922      else
2923	temp = op0;
2924      emit_insn (gen_rtx_SET (VOIDmode, dest,
2925			  gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
2926				   gen_rtx_fmt_ee (compare_code, DImode,
2927						   temp, const0_rtx),
2928				   const1_rtx,
2929				   dest)));
2930      return 1;
2931    }
2932  else
2933    {
2934      x = gen_compare_reg_1 (compare_code, x, y);
2935      y = const0_rtx;
2936
2937      gcc_assert (GET_MODE (x) != CC_NOOVmode
2938		  && GET_MODE (x) != CCX_NOOVmode);
2939
2940      emit_insn (gen_rtx_SET (VOIDmode, dest, const0_rtx));
2941      emit_insn (gen_rtx_SET (VOIDmode, dest,
2942			  gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
2943				   gen_rtx_fmt_ee (compare_code,
2944						   GET_MODE (x), x, y),
2945				    const1_rtx, dest)));
2946      return 1;
2947    }
2948}
2949
2950
2951/* Emit an scc insn.  For seq, sne, sgeu, and sltu, we can do this
2952   without jumps using the addx/subx instructions.  */
2953
2954bool
2955emit_scc_insn (rtx operands[])
2956{
2957  rtx tem;
2958  rtx x;
2959  rtx y;
2960  enum rtx_code code;
2961
2962  /* The quad-word fp compare library routines all return nonzero to indicate
2963     true, which is different from the equivalent libgcc routines, so we must
2964     handle them specially here.  */
2965  if (GET_MODE (operands[2]) == TFmode && ! TARGET_HARD_QUAD)
2966    {
2967      operands[1] = sparc_emit_float_lib_cmp (operands[2], operands[3],
2968					      GET_CODE (operands[1]));
2969      operands[2] = XEXP (operands[1], 0);
2970      operands[3] = XEXP (operands[1], 1);
2971    }
2972
2973  code = GET_CODE (operands[1]);
2974  x = operands[2];
2975  y = operands[3];
2976
2977  /* For seq/sne on v9 we use the same code as v8 (the addx/subx method has
2978     more applications).  The exception to this is "reg != 0" which can
2979     be done in one instruction on v9 (so we do it).  */
2980  if (code == EQ)
2981    {
2982      if (GET_MODE (x) == SImode)
2983        {
2984	  rtx pat;
2985	  if (TARGET_ARCH64)
2986	    pat = gen_seqsidi_special (operands[0], x, y);
2987	  else
2988	    pat = gen_seqsisi_special (operands[0], x, y);
2989          emit_insn (pat);
2990          return true;
2991        }
2992      else if (GET_MODE (x) == DImode)
2993        {
2994	  rtx pat = gen_seqdi_special (operands[0], x, y);
2995          emit_insn (pat);
2996          return true;
2997        }
2998    }
2999
3000  if (code == NE)
3001    {
3002      if (GET_MODE (x) == SImode)
3003        {
3004          rtx pat;
3005	  if (TARGET_ARCH64)
3006	    pat = gen_snesidi_special (operands[0], x, y);
3007	  else
3008	    pat = gen_snesisi_special (operands[0], x, y);
3009          emit_insn (pat);
3010          return true;
3011        }
3012      else if (GET_MODE (x) == DImode)
3013        {
3014	  rtx pat;
3015	  if (TARGET_VIS3)
3016	    pat = gen_snedi_special_vis3 (operands[0], x, y);
3017	  else
3018	    pat = gen_snedi_special (operands[0], x, y);
3019          emit_insn (pat);
3020          return true;
3021        }
3022    }
3023
3024  if (TARGET_V9
3025      && TARGET_ARCH64
3026      && GET_MODE (x) == DImode
3027      && !(TARGET_VIS3
3028	   && (code == GTU || code == LTU))
3029      && gen_v9_scc (operands[0], code, x, y))
3030    return true;
3031
3032  /* We can do LTU and GEU using the addx/subx instructions too.  And
3033     for GTU/LEU, if both operands are registers swap them and fall
3034     back to the easy case.  */
3035  if (code == GTU || code == LEU)
3036    {
3037      if ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
3038          && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG))
3039        {
3040          tem = x;
3041          x = y;
3042          y = tem;
3043          code = swap_condition (code);
3044        }
3045    }
3046
3047  if (code == LTU
3048      || (!TARGET_VIS3 && code == GEU))
3049    {
3050      emit_insn (gen_rtx_SET (VOIDmode, operands[0],
3051			      gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3052					      gen_compare_reg_1 (code, x, y),
3053					      const0_rtx)));
3054      return true;
3055    }
3056
3057  /* All the posibilities to use addx/subx based sequences has been
3058     exhausted, try for a 3 instruction sequence using v9 conditional
3059     moves.  */
3060  if (TARGET_V9 && gen_v9_scc (operands[0], code, x, y))
3061    return true;
3062
3063  /* Nope, do branches.  */
3064  return false;
3065}
3066
3067/* Emit a conditional jump insn for the v9 architecture using comparison code
3068   CODE and jump target LABEL.
3069   This function exists to take advantage of the v9 brxx insns.  */
3070
3071static void
3072emit_v9_brxx_insn (enum rtx_code code, rtx op0, rtx label)
3073{
3074  emit_jump_insn (gen_rtx_SET (VOIDmode,
3075			   pc_rtx,
3076			   gen_rtx_IF_THEN_ELSE (VOIDmode,
3077				    gen_rtx_fmt_ee (code, GET_MODE (op0),
3078						    op0, const0_rtx),
3079				    gen_rtx_LABEL_REF (VOIDmode, label),
3080				    pc_rtx)));
3081}
3082
3083/* Emit a conditional jump insn for the UA2011 architecture using
3084   comparison code CODE and jump target LABEL.  This function exists
3085   to take advantage of the UA2011 Compare and Branch insns.  */
3086
3087static void
3088emit_cbcond_insn (enum rtx_code code, rtx op0, rtx op1, rtx label)
3089{
3090  rtx if_then_else;
3091
3092  if_then_else = gen_rtx_IF_THEN_ELSE (VOIDmode,
3093				       gen_rtx_fmt_ee(code, GET_MODE(op0),
3094						      op0, op1),
3095				       gen_rtx_LABEL_REF (VOIDmode, label),
3096				       pc_rtx);
3097
3098  emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, if_then_else));
3099}
3100
3101void
3102emit_conditional_branch_insn (rtx operands[])
3103{
3104  /* The quad-word fp compare library routines all return nonzero to indicate
3105     true, which is different from the equivalent libgcc routines, so we must
3106     handle them specially here.  */
3107  if (GET_MODE (operands[1]) == TFmode && ! TARGET_HARD_QUAD)
3108    {
3109      operands[0] = sparc_emit_float_lib_cmp (operands[1], operands[2],
3110					      GET_CODE (operands[0]));
3111      operands[1] = XEXP (operands[0], 0);
3112      operands[2] = XEXP (operands[0], 1);
3113    }
3114
3115  /* If we can tell early on that the comparison is against a constant
3116     that won't fit in the 5-bit signed immediate field of a cbcond,
3117     use one of the other v9 conditional branch sequences.  */
3118  if (TARGET_CBCOND
3119      && GET_CODE (operands[1]) == REG
3120      && (GET_MODE (operands[1]) == SImode
3121	  || (TARGET_ARCH64 && GET_MODE (operands[1]) == DImode))
3122      && (GET_CODE (operands[2]) != CONST_INT
3123	  || SPARC_SIMM5_P (INTVAL (operands[2]))))
3124    {
3125      emit_cbcond_insn (GET_CODE (operands[0]), operands[1], operands[2], operands[3]);
3126      return;
3127    }
3128
3129  if (TARGET_ARCH64 && operands[2] == const0_rtx
3130      && GET_CODE (operands[1]) == REG
3131      && GET_MODE (operands[1]) == DImode)
3132    {
3133      emit_v9_brxx_insn (GET_CODE (operands[0]), operands[1], operands[3]);
3134      return;
3135    }
3136
3137  operands[1] = gen_compare_reg (operands[0]);
3138  operands[2] = const0_rtx;
3139  operands[0] = gen_rtx_fmt_ee (GET_CODE (operands[0]), VOIDmode,
3140				operands[1], operands[2]);
3141  emit_jump_insn (gen_cbranchcc4 (operands[0], operands[1], operands[2],
3142				  operands[3]));
3143}
3144
3145
3146/* Generate a DFmode part of a hard TFmode register.
3147   REG is the TFmode hard register, LOW is 1 for the
3148   low 64bit of the register and 0 otherwise.
3149 */
3150rtx
3151gen_df_reg (rtx reg, int low)
3152{
3153  int regno = REGNO (reg);
3154
3155  if ((WORDS_BIG_ENDIAN == 0) ^ (low != 0))
3156    regno += (TARGET_ARCH64 && SPARC_INT_REG_P (regno)) ? 1 : 2;
3157  return gen_rtx_REG (DFmode, regno);
3158}
3159
3160/* Generate a call to FUNC with OPERANDS.  Operand 0 is the return value.
3161   Unlike normal calls, TFmode operands are passed by reference.  It is
3162   assumed that no more than 3 operands are required.  */
3163
3164static void
3165emit_soft_tfmode_libcall (const char *func_name, int nargs, rtx *operands)
3166{
3167  rtx ret_slot = NULL, arg[3], func_sym;
3168  int i;
3169
3170  /* We only expect to be called for conversions, unary, and binary ops.  */
3171  gcc_assert (nargs == 2 || nargs == 3);
3172
3173  for (i = 0; i < nargs; ++i)
3174    {
3175      rtx this_arg = operands[i];
3176      rtx this_slot;
3177
3178      /* TFmode arguments and return values are passed by reference.  */
3179      if (GET_MODE (this_arg) == TFmode)
3180	{
3181	  int force_stack_temp;
3182
3183	  force_stack_temp = 0;
3184	  if (TARGET_BUGGY_QP_LIB && i == 0)
3185	    force_stack_temp = 1;
3186
3187	  if (GET_CODE (this_arg) == MEM
3188	      && ! force_stack_temp)
3189	    {
3190	      tree expr = MEM_EXPR (this_arg);
3191	      if (expr)
3192		mark_addressable (expr);
3193	      this_arg = XEXP (this_arg, 0);
3194	    }
3195	  else if (CONSTANT_P (this_arg)
3196		   && ! force_stack_temp)
3197	    {
3198	      this_slot = force_const_mem (TFmode, this_arg);
3199	      this_arg = XEXP (this_slot, 0);
3200	    }
3201	  else
3202	    {
3203	      this_slot = assign_stack_temp (TFmode, GET_MODE_SIZE (TFmode));
3204
3205	      /* Operand 0 is the return value.  We'll copy it out later.  */
3206	      if (i > 0)
3207		emit_move_insn (this_slot, this_arg);
3208	      else
3209		ret_slot = this_slot;
3210
3211	      this_arg = XEXP (this_slot, 0);
3212	    }
3213	}
3214
3215      arg[i] = this_arg;
3216    }
3217
3218  func_sym = gen_rtx_SYMBOL_REF (Pmode, func_name);
3219
3220  if (GET_MODE (operands[0]) == TFmode)
3221    {
3222      if (nargs == 2)
3223	emit_library_call (func_sym, LCT_NORMAL, VOIDmode, 2,
3224			   arg[0], GET_MODE (arg[0]),
3225			   arg[1], GET_MODE (arg[1]));
3226      else
3227	emit_library_call (func_sym, LCT_NORMAL, VOIDmode, 3,
3228			   arg[0], GET_MODE (arg[0]),
3229			   arg[1], GET_MODE (arg[1]),
3230			   arg[2], GET_MODE (arg[2]));
3231
3232      if (ret_slot)
3233	emit_move_insn (operands[0], ret_slot);
3234    }
3235  else
3236    {
3237      rtx ret;
3238
3239      gcc_assert (nargs == 2);
3240
3241      ret = emit_library_call_value (func_sym, operands[0], LCT_NORMAL,
3242				     GET_MODE (operands[0]), 1,
3243				     arg[1], GET_MODE (arg[1]));
3244
3245      if (ret != operands[0])
3246	emit_move_insn (operands[0], ret);
3247    }
3248}
3249
3250/* Expand soft-float TFmode calls to sparc abi routines.  */
3251
3252static void
3253emit_soft_tfmode_binop (enum rtx_code code, rtx *operands)
3254{
3255  const char *func;
3256
3257  switch (code)
3258    {
3259    case PLUS:
3260      func = "_Qp_add";
3261      break;
3262    case MINUS:
3263      func = "_Qp_sub";
3264      break;
3265    case MULT:
3266      func = "_Qp_mul";
3267      break;
3268    case DIV:
3269      func = "_Qp_div";
3270      break;
3271    default:
3272      gcc_unreachable ();
3273    }
3274
3275  emit_soft_tfmode_libcall (func, 3, operands);
3276}
3277
3278static void
3279emit_soft_tfmode_unop (enum rtx_code code, rtx *operands)
3280{
3281  const char *func;
3282
3283  gcc_assert (code == SQRT);
3284  func = "_Qp_sqrt";
3285
3286  emit_soft_tfmode_libcall (func, 2, operands);
3287}
3288
3289static void
3290emit_soft_tfmode_cvt (enum rtx_code code, rtx *operands)
3291{
3292  const char *func;
3293
3294  switch (code)
3295    {
3296    case FLOAT_EXTEND:
3297      switch (GET_MODE (operands[1]))
3298	{
3299	case SFmode:
3300	  func = "_Qp_stoq";
3301	  break;
3302	case DFmode:
3303	  func = "_Qp_dtoq";
3304	  break;
3305	default:
3306	  gcc_unreachable ();
3307	}
3308      break;
3309
3310    case FLOAT_TRUNCATE:
3311      switch (GET_MODE (operands[0]))
3312	{
3313	case SFmode:
3314	  func = "_Qp_qtos";
3315	  break;
3316	case DFmode:
3317	  func = "_Qp_qtod";
3318	  break;
3319	default:
3320	  gcc_unreachable ();
3321	}
3322      break;
3323
3324    case FLOAT:
3325      switch (GET_MODE (operands[1]))
3326	{
3327	case SImode:
3328	  func = "_Qp_itoq";
3329	  if (TARGET_ARCH64)
3330	    operands[1] = gen_rtx_SIGN_EXTEND (DImode, operands[1]);
3331	  break;
3332	case DImode:
3333	  func = "_Qp_xtoq";
3334	  break;
3335	default:
3336	  gcc_unreachable ();
3337	}
3338      break;
3339
3340    case UNSIGNED_FLOAT:
3341      switch (GET_MODE (operands[1]))
3342	{
3343	case SImode:
3344	  func = "_Qp_uitoq";
3345	  if (TARGET_ARCH64)
3346	    operands[1] = gen_rtx_ZERO_EXTEND (DImode, operands[1]);
3347	  break;
3348	case DImode:
3349	  func = "_Qp_uxtoq";
3350	  break;
3351	default:
3352	  gcc_unreachable ();
3353	}
3354      break;
3355
3356    case FIX:
3357      switch (GET_MODE (operands[0]))
3358	{
3359	case SImode:
3360	  func = "_Qp_qtoi";
3361	  break;
3362	case DImode:
3363	  func = "_Qp_qtox";
3364	  break;
3365	default:
3366	  gcc_unreachable ();
3367	}
3368      break;
3369
3370    case UNSIGNED_FIX:
3371      switch (GET_MODE (operands[0]))
3372	{
3373	case SImode:
3374	  func = "_Qp_qtoui";
3375	  break;
3376	case DImode:
3377	  func = "_Qp_qtoux";
3378	  break;
3379	default:
3380	  gcc_unreachable ();
3381	}
3382      break;
3383
3384    default:
3385      gcc_unreachable ();
3386    }
3387
3388  emit_soft_tfmode_libcall (func, 2, operands);
3389}
3390
3391/* Expand a hard-float tfmode operation.  All arguments must be in
3392   registers.  */
3393
3394static void
3395emit_hard_tfmode_operation (enum rtx_code code, rtx *operands)
3396{
3397  rtx op, dest;
3398
3399  if (GET_RTX_CLASS (code) == RTX_UNARY)
3400    {
3401      operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
3402      op = gen_rtx_fmt_e (code, GET_MODE (operands[0]), operands[1]);
3403    }
3404  else
3405    {
3406      operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
3407      operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
3408      op = gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3409			   operands[1], operands[2]);
3410    }
3411
3412  if (register_operand (operands[0], VOIDmode))
3413    dest = operands[0];
3414  else
3415    dest = gen_reg_rtx (GET_MODE (operands[0]));
3416
3417  emit_insn (gen_rtx_SET (VOIDmode, dest, op));
3418
3419  if (dest != operands[0])
3420    emit_move_insn (operands[0], dest);
3421}
3422
3423void
3424emit_tfmode_binop (enum rtx_code code, rtx *operands)
3425{
3426  if (TARGET_HARD_QUAD)
3427    emit_hard_tfmode_operation (code, operands);
3428  else
3429    emit_soft_tfmode_binop (code, operands);
3430}
3431
3432void
3433emit_tfmode_unop (enum rtx_code code, rtx *operands)
3434{
3435  if (TARGET_HARD_QUAD)
3436    emit_hard_tfmode_operation (code, operands);
3437  else
3438    emit_soft_tfmode_unop (code, operands);
3439}
3440
3441void
3442emit_tfmode_cvt (enum rtx_code code, rtx *operands)
3443{
3444  if (TARGET_HARD_QUAD)
3445    emit_hard_tfmode_operation (code, operands);
3446  else
3447    emit_soft_tfmode_cvt (code, operands);
3448}
3449
3450/* Return nonzero if a branch/jump/call instruction will be emitting
3451   nop into its delay slot.  */
3452
3453int
3454empty_delay_slot (rtx_insn *insn)
3455{
3456  rtx seq;
3457
3458  /* If no previous instruction (should not happen), return true.  */
3459  if (PREV_INSN (insn) == NULL)
3460    return 1;
3461
3462  seq = NEXT_INSN (PREV_INSN (insn));
3463  if (GET_CODE (PATTERN (seq)) == SEQUENCE)
3464    return 0;
3465
3466  return 1;
3467}
3468
3469/* Return nonzero if we should emit a nop after a cbcond instruction.
3470   The cbcond instruction does not have a delay slot, however there is
3471   a severe performance penalty if a control transfer appears right
3472   after a cbcond.  Therefore we emit a nop when we detect this
3473   situation.  */
3474
3475int
3476emit_cbcond_nop (rtx insn)
3477{
3478  rtx next = next_active_insn (insn);
3479
3480  if (!next)
3481    return 1;
3482
3483  if (NONJUMP_INSN_P (next)
3484      && GET_CODE (PATTERN (next)) == SEQUENCE)
3485    next = XVECEXP (PATTERN (next), 0, 0);
3486  else if (CALL_P (next)
3487	   && GET_CODE (PATTERN (next)) == PARALLEL)
3488    {
3489      rtx delay = XVECEXP (PATTERN (next), 0, 1);
3490
3491      if (GET_CODE (delay) == RETURN)
3492	{
3493	  /* It's a sibling call.  Do not emit the nop if we're going
3494	     to emit something other than the jump itself as the first
3495	     instruction of the sibcall sequence.  */
3496	  if (sparc_leaf_function_p || TARGET_FLAT)
3497	    return 0;
3498	}
3499    }
3500
3501  if (NONJUMP_INSN_P (next))
3502    return 0;
3503
3504  return 1;
3505}
3506
3507/* Return nonzero if TRIAL can go into the call delay slot.  */
3508
3509int
3510eligible_for_call_delay (rtx_insn *trial)
3511{
3512  rtx pat;
3513
3514  if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
3515    return 0;
3516
3517  /* Binutils allows
3518       call __tls_get_addr, %tgd_call (foo)
3519        add %l7, %o0, %o0, %tgd_add (foo)
3520     while Sun as/ld does not.  */
3521  if (TARGET_GNU_TLS || !TARGET_TLS)
3522    return 1;
3523
3524  pat = PATTERN (trial);
3525
3526  /* We must reject tgd_add{32|64}, i.e.
3527       (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSGD)))
3528     and tldm_add{32|64}, i.e.
3529       (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSLDM)))
3530     for Sun as/ld.  */
3531  if (GET_CODE (pat) == SET
3532      && GET_CODE (SET_SRC (pat)) == PLUS)
3533    {
3534      rtx unspec = XEXP (SET_SRC (pat), 1);
3535
3536      if (GET_CODE (unspec) == UNSPEC
3537	  && (XINT (unspec, 1) == UNSPEC_TLSGD
3538	      || XINT (unspec, 1) == UNSPEC_TLSLDM))
3539	return 0;
3540    }
3541
3542  return 1;
3543}
3544
3545/* Return nonzero if TRIAL, an insn, can be combined with a 'restore'
3546   instruction.  RETURN_P is true if the v9 variant 'return' is to be
3547   considered in the test too.
3548
3549   TRIAL must be a SET whose destination is a REG appropriate for the
3550   'restore' instruction or, if RETURN_P is true, for the 'return'
3551   instruction.  */
3552
3553static int
3554eligible_for_restore_insn (rtx trial, bool return_p)
3555{
3556  rtx pat = PATTERN (trial);
3557  rtx src = SET_SRC (pat);
3558  bool src_is_freg = false;
3559  rtx src_reg;
3560
3561  /* Since we now can do moves between float and integer registers when
3562     VIS3 is enabled, we have to catch this case.  We can allow such
3563     moves when doing a 'return' however.  */
3564  src_reg = src;
3565  if (GET_CODE (src_reg) == SUBREG)
3566    src_reg = SUBREG_REG (src_reg);
3567  if (GET_CODE (src_reg) == REG
3568      && SPARC_FP_REG_P (REGNO (src_reg)))
3569    src_is_freg = true;
3570
3571  /* The 'restore src,%g0,dest' pattern for word mode and below.  */
3572  if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
3573      && arith_operand (src, GET_MODE (src))
3574      && ! src_is_freg)
3575    {
3576      if (TARGET_ARCH64)
3577        return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
3578      else
3579        return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (SImode);
3580    }
3581
3582  /* The 'restore src,%g0,dest' pattern for double-word mode.  */
3583  else if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
3584	   && arith_double_operand (src, GET_MODE (src))
3585	   && ! src_is_freg)
3586    return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
3587
3588  /* The 'restore src,%g0,dest' pattern for float if no FPU.  */
3589  else if (! TARGET_FPU && register_operand (src, SFmode))
3590    return 1;
3591
3592  /* The 'restore src,%g0,dest' pattern for double if no FPU.  */
3593  else if (! TARGET_FPU && TARGET_ARCH64 && register_operand (src, DFmode))
3594    return 1;
3595
3596  /* If we have the 'return' instruction, anything that does not use
3597     local or output registers and can go into a delay slot wins.  */
3598  else if (return_p && TARGET_V9 && !epilogue_renumber (&pat, 1))
3599    return 1;
3600
3601  /* The 'restore src1,src2,dest' pattern for SImode.  */
3602  else if (GET_CODE (src) == PLUS
3603	   && register_operand (XEXP (src, 0), SImode)
3604	   && arith_operand (XEXP (src, 1), SImode))
3605    return 1;
3606
3607  /* The 'restore src1,src2,dest' pattern for DImode.  */
3608  else if (GET_CODE (src) == PLUS
3609	   && register_operand (XEXP (src, 0), DImode)
3610	   && arith_double_operand (XEXP (src, 1), DImode))
3611    return 1;
3612
3613  /* The 'restore src1,%lo(src2),dest' pattern.  */
3614  else if (GET_CODE (src) == LO_SUM
3615	   && ! TARGET_CM_MEDMID
3616	   && ((register_operand (XEXP (src, 0), SImode)
3617	        && immediate_operand (XEXP (src, 1), SImode))
3618	       || (TARGET_ARCH64
3619		   && register_operand (XEXP (src, 0), DImode)
3620		   && immediate_operand (XEXP (src, 1), DImode))))
3621    return 1;
3622
3623  /* The 'restore src,src,dest' pattern.  */
3624  else if (GET_CODE (src) == ASHIFT
3625	   && (register_operand (XEXP (src, 0), SImode)
3626	       || register_operand (XEXP (src, 0), DImode))
3627	   && XEXP (src, 1) == const1_rtx)
3628    return 1;
3629
3630  return 0;
3631}
3632
3633/* Return nonzero if TRIAL can go into the function return's delay slot.  */
3634
3635int
3636eligible_for_return_delay (rtx_insn *trial)
3637{
3638  int regno;
3639  rtx pat;
3640
3641  /* If the function uses __builtin_eh_return, the eh_return machinery
3642     occupies the delay slot.  */
3643  if (crtl->calls_eh_return)
3644    return 0;
3645
3646  if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
3647    return 0;
3648
3649  /* In the case of a leaf or flat function, anything can go into the slot.  */
3650  if (sparc_leaf_function_p || TARGET_FLAT)
3651    return 1;
3652
3653  if (!NONJUMP_INSN_P (trial))
3654    return 0;
3655
3656  pat = PATTERN (trial);
3657  if (GET_CODE (pat) == PARALLEL)
3658    {
3659      int i;
3660
3661      if (! TARGET_V9)
3662	return 0;
3663      for (i = XVECLEN (pat, 0) - 1; i >= 0; i--)
3664	{
3665	  rtx expr = XVECEXP (pat, 0, i);
3666	  if (GET_CODE (expr) != SET)
3667	    return 0;
3668	  if (GET_CODE (SET_DEST (expr)) != REG)
3669	    return 0;
3670	  regno = REGNO (SET_DEST (expr));
3671	  if (regno >= 8 && regno < 24)
3672	    return 0;
3673	}
3674      return !epilogue_renumber (&pat, 1);
3675    }
3676
3677  if (GET_CODE (pat) != SET)
3678    return 0;
3679
3680  if (GET_CODE (SET_DEST (pat)) != REG)
3681    return 0;
3682
3683  regno = REGNO (SET_DEST (pat));
3684
3685  /* Otherwise, only operations which can be done in tandem with
3686     a `restore' or `return' insn can go into the delay slot.  */
3687  if (regno >= 8 && regno < 24)
3688    return 0;
3689
3690  /* If this instruction sets up floating point register and we have a return
3691     instruction, it can probably go in.  But restore will not work
3692     with FP_REGS.  */
3693  if (! SPARC_INT_REG_P (regno))
3694    return TARGET_V9 && !epilogue_renumber (&pat, 1);
3695
3696  return eligible_for_restore_insn (trial, true);
3697}
3698
3699/* Return nonzero if TRIAL can go into the sibling call's delay slot.  */
3700
3701int
3702eligible_for_sibcall_delay (rtx_insn *trial)
3703{
3704  rtx pat;
3705
3706  if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
3707    return 0;
3708
3709  if (!NONJUMP_INSN_P (trial))
3710    return 0;
3711
3712  pat = PATTERN (trial);
3713
3714  if (sparc_leaf_function_p || TARGET_FLAT)
3715    {
3716      /* If the tail call is done using the call instruction,
3717	 we have to restore %o7 in the delay slot.  */
3718      if (LEAF_SIBCALL_SLOT_RESERVED_P)
3719	return 0;
3720
3721      /* %g1 is used to build the function address */
3722      if (reg_mentioned_p (gen_rtx_REG (Pmode, 1), pat))
3723	return 0;
3724
3725      return 1;
3726    }
3727
3728  if (GET_CODE (pat) != SET)
3729    return 0;
3730
3731  /* Otherwise, only operations which can be done in tandem with
3732     a `restore' insn can go into the delay slot.  */
3733  if (GET_CODE (SET_DEST (pat)) != REG
3734      || (REGNO (SET_DEST (pat)) >= 8 && REGNO (SET_DEST (pat)) < 24)
3735      || ! SPARC_INT_REG_P (REGNO (SET_DEST (pat))))
3736    return 0;
3737
3738  /* If it mentions %o7, it can't go in, because sibcall will clobber it
3739     in most cases.  */
3740  if (reg_mentioned_p (gen_rtx_REG (Pmode, 15), pat))
3741    return 0;
3742
3743  return eligible_for_restore_insn (trial, false);
3744}
3745
3746/* Determine if it's legal to put X into the constant pool.  This
3747   is not possible if X contains the address of a symbol that is
3748   not constant (TLS) or not known at final link time (PIC).  */
3749
3750static bool
3751sparc_cannot_force_const_mem (machine_mode mode, rtx x)
3752{
3753  switch (GET_CODE (x))
3754    {
3755    case CONST_INT:
3756    case CONST_DOUBLE:
3757    case CONST_VECTOR:
3758      /* Accept all non-symbolic constants.  */
3759      return false;
3760
3761    case LABEL_REF:
3762      /* Labels are OK iff we are non-PIC.  */
3763      return flag_pic != 0;
3764
3765    case SYMBOL_REF:
3766      /* 'Naked' TLS symbol references are never OK,
3767	 non-TLS symbols are OK iff we are non-PIC.  */
3768      if (SYMBOL_REF_TLS_MODEL (x))
3769	return true;
3770      else
3771	return flag_pic != 0;
3772
3773    case CONST:
3774      return sparc_cannot_force_const_mem (mode, XEXP (x, 0));
3775    case PLUS:
3776    case MINUS:
3777      return sparc_cannot_force_const_mem (mode, XEXP (x, 0))
3778         || sparc_cannot_force_const_mem (mode, XEXP (x, 1));
3779    case UNSPEC:
3780      return true;
3781    default:
3782      gcc_unreachable ();
3783    }
3784}
3785
3786/* Global Offset Table support.  */
3787static GTY(()) rtx got_helper_rtx = NULL_RTX;
3788static GTY(()) rtx global_offset_table_rtx = NULL_RTX;
3789
3790/* Return the SYMBOL_REF for the Global Offset Table.  */
3791
3792static GTY(()) rtx sparc_got_symbol = NULL_RTX;
3793
3794static rtx
3795sparc_got (void)
3796{
3797  if (!sparc_got_symbol)
3798    sparc_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
3799
3800  return sparc_got_symbol;
3801}
3802
3803/* Ensure that we are not using patterns that are not OK with PIC.  */
3804
3805int
3806check_pic (int i)
3807{
3808  rtx op;
3809
3810  switch (flag_pic)
3811    {
3812    case 1:
3813      op = recog_data.operand[i];
3814      gcc_assert (GET_CODE (op) != SYMBOL_REF
3815	  	  && (GET_CODE (op) != CONST
3816		      || (GET_CODE (XEXP (op, 0)) == MINUS
3817			  && XEXP (XEXP (op, 0), 0) == sparc_got ()
3818			  && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST)));
3819    case 2:
3820    default:
3821      return 1;
3822    }
3823}
3824
3825/* Return true if X is an address which needs a temporary register when
3826   reloaded while generating PIC code.  */
3827
3828int
3829pic_address_needs_scratch (rtx x)
3830{
3831  /* An address which is a symbolic plus a non SMALL_INT needs a temp reg.  */
3832  if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS
3833      && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
3834      && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
3835      && ! SMALL_INT (XEXP (XEXP (x, 0), 1)))
3836    return 1;
3837
3838  return 0;
3839}
3840
3841/* Determine if a given RTX is a valid constant.  We already know this
3842   satisfies CONSTANT_P.  */
3843
3844static bool
3845sparc_legitimate_constant_p (machine_mode mode, rtx x)
3846{
3847  switch (GET_CODE (x))
3848    {
3849    case CONST:
3850    case SYMBOL_REF:
3851      if (sparc_tls_referenced_p (x))
3852	return false;
3853      break;
3854
3855    case CONST_DOUBLE:
3856      if (GET_MODE (x) == VOIDmode)
3857        return true;
3858
3859      /* Floating point constants are generally not ok.
3860	 The only exception is 0.0 and all-ones in VIS.  */
3861      if (TARGET_VIS
3862	  && SCALAR_FLOAT_MODE_P (mode)
3863	  && (const_zero_operand (x, mode)
3864	      || const_all_ones_operand (x, mode)))
3865	return true;
3866
3867      return false;
3868
3869    case CONST_VECTOR:
3870      /* Vector constants are generally not ok.
3871	 The only exception is 0 or -1 in VIS.  */
3872      if (TARGET_VIS
3873	  && (const_zero_operand (x, mode)
3874	      || const_all_ones_operand (x, mode)))
3875	return true;
3876
3877      return false;
3878
3879    default:
3880      break;
3881    }
3882
3883  return true;
3884}
3885
3886/* Determine if a given RTX is a valid constant address.  */
3887
3888bool
3889constant_address_p (rtx x)
3890{
3891  switch (GET_CODE (x))
3892    {
3893    case LABEL_REF:
3894    case CONST_INT:
3895    case HIGH:
3896      return true;
3897
3898    case CONST:
3899      if (flag_pic && pic_address_needs_scratch (x))
3900	return false;
3901      return sparc_legitimate_constant_p (Pmode, x);
3902
3903    case SYMBOL_REF:
3904      return !flag_pic && sparc_legitimate_constant_p (Pmode, x);
3905
3906    default:
3907      return false;
3908    }
3909}
3910
3911/* Nonzero if the constant value X is a legitimate general operand
3912   when generating PIC code.  It is given that flag_pic is on and
3913   that X satisfies CONSTANT_P or is a CONST_DOUBLE.  */
3914
3915bool
3916legitimate_pic_operand_p (rtx x)
3917{
3918  if (pic_address_needs_scratch (x))
3919    return false;
3920  if (sparc_tls_referenced_p (x))
3921    return false;
3922  return true;
3923}
3924
3925#define RTX_OK_FOR_OFFSET_P(X, MODE)			\
3926  (CONST_INT_P (X)					\
3927   && INTVAL (X) >= -0x1000				\
3928   && INTVAL (X) <= (0x1000 - GET_MODE_SIZE (MODE)))
3929
3930#define RTX_OK_FOR_OLO10_P(X, MODE)			\
3931  (CONST_INT_P (X)					\
3932   && INTVAL (X) >= -0x1000				\
3933   && INTVAL (X) <= (0xc00 - GET_MODE_SIZE (MODE)))
3934
3935/* Handle the TARGET_LEGITIMATE_ADDRESS_P target hook.
3936
3937   On SPARC, the actual legitimate addresses must be REG+REG or REG+SMALLINT
3938   ordinarily.  This changes a bit when generating PIC.  */
3939
3940static bool
3941sparc_legitimate_address_p (machine_mode mode, rtx addr, bool strict)
3942{
3943  rtx rs1 = NULL, rs2 = NULL, imm1 = NULL;
3944
3945  if (REG_P (addr) || GET_CODE (addr) == SUBREG)
3946    rs1 = addr;
3947  else if (GET_CODE (addr) == PLUS)
3948    {
3949      rs1 = XEXP (addr, 0);
3950      rs2 = XEXP (addr, 1);
3951
3952      /* Canonicalize.  REG comes first, if there are no regs,
3953	 LO_SUM comes first.  */
3954      if (!REG_P (rs1)
3955	  && GET_CODE (rs1) != SUBREG
3956	  && (REG_P (rs2)
3957	      || GET_CODE (rs2) == SUBREG
3958	      || (GET_CODE (rs2) == LO_SUM && GET_CODE (rs1) != LO_SUM)))
3959	{
3960	  rs1 = XEXP (addr, 1);
3961	  rs2 = XEXP (addr, 0);
3962	}
3963
3964      if ((flag_pic == 1
3965	   && rs1 == pic_offset_table_rtx
3966	   && !REG_P (rs2)
3967	   && GET_CODE (rs2) != SUBREG
3968	   && GET_CODE (rs2) != LO_SUM
3969	   && GET_CODE (rs2) != MEM
3970	   && !(GET_CODE (rs2) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs2))
3971	   && (! symbolic_operand (rs2, VOIDmode) || mode == Pmode)
3972	   && (GET_CODE (rs2) != CONST_INT || SMALL_INT (rs2)))
3973	  || ((REG_P (rs1)
3974	       || GET_CODE (rs1) == SUBREG)
3975	      && RTX_OK_FOR_OFFSET_P (rs2, mode)))
3976	{
3977	  imm1 = rs2;
3978	  rs2 = NULL;
3979	}
3980      else if ((REG_P (rs1) || GET_CODE (rs1) == SUBREG)
3981	       && (REG_P (rs2) || GET_CODE (rs2) == SUBREG))
3982	{
3983	  /* We prohibit REG + REG for TFmode when there are no quad move insns
3984	     and we consequently need to split.  We do this because REG+REG
3985	     is not an offsettable address.  If we get the situation in reload
3986	     where source and destination of a movtf pattern are both MEMs with
3987	     REG+REG address, then only one of them gets converted to an
3988	     offsettable address.  */
3989	  if (mode == TFmode
3990	      && ! (TARGET_ARCH64 && TARGET_HARD_QUAD))
3991	    return 0;
3992
3993	  /* Likewise for TImode, but in all cases.  */
3994	  if (mode == TImode)
3995	    return 0;
3996
3997	  /* We prohibit REG + REG on ARCH32 if not optimizing for
3998	     DFmode/DImode because then mem_min_alignment is likely to be zero
3999	     after reload and the  forced split would lack a matching splitter
4000	     pattern.  */
4001	  if (TARGET_ARCH32 && !optimize
4002	      && (mode == DFmode || mode == DImode))
4003	    return 0;
4004	}
4005      else if (USE_AS_OFFSETABLE_LO10
4006	       && GET_CODE (rs1) == LO_SUM
4007	       && TARGET_ARCH64
4008	       && ! TARGET_CM_MEDMID
4009	       && RTX_OK_FOR_OLO10_P (rs2, mode))
4010	{
4011	  rs2 = NULL;
4012	  imm1 = XEXP (rs1, 1);
4013	  rs1 = XEXP (rs1, 0);
4014	  if (!CONSTANT_P (imm1)
4015	      || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
4016	    return 0;
4017	}
4018    }
4019  else if (GET_CODE (addr) == LO_SUM)
4020    {
4021      rs1 = XEXP (addr, 0);
4022      imm1 = XEXP (addr, 1);
4023
4024      if (!CONSTANT_P (imm1)
4025	  || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
4026	return 0;
4027
4028      /* We can't allow TFmode in 32-bit mode, because an offset greater
4029	 than the alignment (8) may cause the LO_SUM to overflow.  */
4030      if (mode == TFmode && TARGET_ARCH32)
4031	return 0;
4032    }
4033  else if (GET_CODE (addr) == CONST_INT && SMALL_INT (addr))
4034    return 1;
4035  else
4036    return 0;
4037
4038  if (GET_CODE (rs1) == SUBREG)
4039    rs1 = SUBREG_REG (rs1);
4040  if (!REG_P (rs1))
4041    return 0;
4042
4043  if (rs2)
4044    {
4045      if (GET_CODE (rs2) == SUBREG)
4046	rs2 = SUBREG_REG (rs2);
4047      if (!REG_P (rs2))
4048	return 0;
4049    }
4050
4051  if (strict)
4052    {
4053      if (!REGNO_OK_FOR_BASE_P (REGNO (rs1))
4054	  || (rs2 && !REGNO_OK_FOR_BASE_P (REGNO (rs2))))
4055	return 0;
4056    }
4057  else
4058    {
4059      if ((! SPARC_INT_REG_P (REGNO (rs1))
4060	   && REGNO (rs1) != FRAME_POINTER_REGNUM
4061	   && REGNO (rs1) < FIRST_PSEUDO_REGISTER)
4062	  || (rs2
4063	      && (! SPARC_INT_REG_P (REGNO (rs2))
4064		  && REGNO (rs2) != FRAME_POINTER_REGNUM
4065		  && REGNO (rs2) < FIRST_PSEUDO_REGISTER)))
4066	return 0;
4067    }
4068  return 1;
4069}
4070
4071/* Return the SYMBOL_REF for the tls_get_addr function.  */
4072
4073static GTY(()) rtx sparc_tls_symbol = NULL_RTX;
4074
4075static rtx
4076sparc_tls_get_addr (void)
4077{
4078  if (!sparc_tls_symbol)
4079    sparc_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_addr");
4080
4081  return sparc_tls_symbol;
4082}
4083
4084/* Return the Global Offset Table to be used in TLS mode.  */
4085
4086static rtx
4087sparc_tls_got (void)
4088{
4089  /* In PIC mode, this is just the PIC offset table.  */
4090  if (flag_pic)
4091    {
4092      crtl->uses_pic_offset_table = 1;
4093      return pic_offset_table_rtx;
4094    }
4095
4096  /* In non-PIC mode, Sun as (unlike GNU as) emits PC-relative relocations for
4097     the GOT symbol with the 32-bit ABI, so we reload the GOT register.  */
4098  if (TARGET_SUN_TLS && TARGET_ARCH32)
4099    {
4100      load_got_register ();
4101      return global_offset_table_rtx;
4102    }
4103
4104  /* In all other cases, we load a new pseudo with the GOT symbol.  */
4105  return copy_to_reg (sparc_got ());
4106}
4107
4108/* Return true if X contains a thread-local symbol.  */
4109
4110static bool
4111sparc_tls_referenced_p (rtx x)
4112{
4113  if (!TARGET_HAVE_TLS)
4114    return false;
4115
4116  if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
4117    x = XEXP (XEXP (x, 0), 0);
4118
4119  if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x))
4120    return true;
4121
4122  /* That's all we handle in sparc_legitimize_tls_address for now.  */
4123  return false;
4124}
4125
4126/* ADDR contains a thread-local SYMBOL_REF.  Generate code to compute
4127   this (thread-local) address.  */
4128
4129static rtx
4130sparc_legitimize_tls_address (rtx addr)
4131{
4132  rtx temp1, temp2, temp3, ret, o0, got;
4133  rtx_insn *insn;
4134
4135  gcc_assert (can_create_pseudo_p ());
4136
4137  if (GET_CODE (addr) == SYMBOL_REF)
4138    switch (SYMBOL_REF_TLS_MODEL (addr))
4139      {
4140      case TLS_MODEL_GLOBAL_DYNAMIC:
4141	start_sequence ();
4142	temp1 = gen_reg_rtx (SImode);
4143	temp2 = gen_reg_rtx (SImode);
4144	ret = gen_reg_rtx (Pmode);
4145	o0 = gen_rtx_REG (Pmode, 8);
4146	got = sparc_tls_got ();
4147	emit_insn (gen_tgd_hi22 (temp1, addr));
4148	emit_insn (gen_tgd_lo10 (temp2, temp1, addr));
4149	if (TARGET_ARCH32)
4150	  {
4151	    emit_insn (gen_tgd_add32 (o0, got, temp2, addr));
4152	    insn = emit_call_insn (gen_tgd_call32 (o0, sparc_tls_get_addr (),
4153						   addr, const1_rtx));
4154	  }
4155	else
4156	  {
4157	    emit_insn (gen_tgd_add64 (o0, got, temp2, addr));
4158	    insn = emit_call_insn (gen_tgd_call64 (o0, sparc_tls_get_addr (),
4159						   addr, const1_rtx));
4160	  }
4161	use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0);
4162	insn = get_insns ();
4163	end_sequence ();
4164	emit_libcall_block (insn, ret, o0, addr);
4165	break;
4166
4167      case TLS_MODEL_LOCAL_DYNAMIC:
4168	start_sequence ();
4169	temp1 = gen_reg_rtx (SImode);
4170	temp2 = gen_reg_rtx (SImode);
4171	temp3 = gen_reg_rtx (Pmode);
4172	ret = gen_reg_rtx (Pmode);
4173	o0 = gen_rtx_REG (Pmode, 8);
4174	got = sparc_tls_got ();
4175	emit_insn (gen_tldm_hi22 (temp1));
4176	emit_insn (gen_tldm_lo10 (temp2, temp1));
4177	if (TARGET_ARCH32)
4178	  {
4179	    emit_insn (gen_tldm_add32 (o0, got, temp2));
4180	    insn = emit_call_insn (gen_tldm_call32 (o0, sparc_tls_get_addr (),
4181						    const1_rtx));
4182	  }
4183	else
4184	  {
4185	    emit_insn (gen_tldm_add64 (o0, got, temp2));
4186	    insn = emit_call_insn (gen_tldm_call64 (o0, sparc_tls_get_addr (),
4187						    const1_rtx));
4188	  }
4189	use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0);
4190	insn = get_insns ();
4191	end_sequence ();
4192	emit_libcall_block (insn, temp3, o0,
4193			    gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
4194					    UNSPEC_TLSLD_BASE));
4195	temp1 = gen_reg_rtx (SImode);
4196	temp2 = gen_reg_rtx (SImode);
4197	emit_insn (gen_tldo_hix22 (temp1, addr));
4198	emit_insn (gen_tldo_lox10 (temp2, temp1, addr));
4199	if (TARGET_ARCH32)
4200	  emit_insn (gen_tldo_add32 (ret, temp3, temp2, addr));
4201	else
4202	  emit_insn (gen_tldo_add64 (ret, temp3, temp2, addr));
4203	break;
4204
4205      case TLS_MODEL_INITIAL_EXEC:
4206	temp1 = gen_reg_rtx (SImode);
4207	temp2 = gen_reg_rtx (SImode);
4208	temp3 = gen_reg_rtx (Pmode);
4209	got = sparc_tls_got ();
4210	emit_insn (gen_tie_hi22 (temp1, addr));
4211	emit_insn (gen_tie_lo10 (temp2, temp1, addr));
4212	if (TARGET_ARCH32)
4213	  emit_insn (gen_tie_ld32 (temp3, got, temp2, addr));
4214	else
4215	  emit_insn (gen_tie_ld64 (temp3, got, temp2, addr));
4216        if (TARGET_SUN_TLS)
4217	  {
4218	    ret = gen_reg_rtx (Pmode);
4219	    if (TARGET_ARCH32)
4220	      emit_insn (gen_tie_add32 (ret, gen_rtx_REG (Pmode, 7),
4221					temp3, addr));
4222	    else
4223	      emit_insn (gen_tie_add64 (ret, gen_rtx_REG (Pmode, 7),
4224					temp3, addr));
4225	  }
4226	else
4227	  ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp3);
4228	break;
4229
4230      case TLS_MODEL_LOCAL_EXEC:
4231	temp1 = gen_reg_rtx (Pmode);
4232	temp2 = gen_reg_rtx (Pmode);
4233	if (TARGET_ARCH32)
4234	  {
4235	    emit_insn (gen_tle_hix22_sp32 (temp1, addr));
4236	    emit_insn (gen_tle_lox10_sp32 (temp2, temp1, addr));
4237	  }
4238	else
4239	  {
4240	    emit_insn (gen_tle_hix22_sp64 (temp1, addr));
4241	    emit_insn (gen_tle_lox10_sp64 (temp2, temp1, addr));
4242	  }
4243	ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp2);
4244	break;
4245
4246      default:
4247	gcc_unreachable ();
4248      }
4249
4250  else if (GET_CODE (addr) == CONST)
4251    {
4252      rtx base, offset;
4253
4254      gcc_assert (GET_CODE (XEXP (addr, 0)) == PLUS);
4255
4256      base = sparc_legitimize_tls_address (XEXP (XEXP (addr, 0), 0));
4257      offset = XEXP (XEXP (addr, 0), 1);
4258
4259      base = force_operand (base, NULL_RTX);
4260      if (!(GET_CODE (offset) == CONST_INT && SMALL_INT (offset)))
4261	offset = force_reg (Pmode, offset);
4262      ret = gen_rtx_PLUS (Pmode, base, offset);
4263    }
4264
4265  else
4266    gcc_unreachable ();  /* for now ... */
4267
4268  return ret;
4269}
4270
4271/* Legitimize PIC addresses.  If the address is already position-independent,
4272   we return ORIG.  Newly generated position-independent addresses go into a
4273   reg.  This is REG if nonzero, otherwise we allocate register(s) as
4274   necessary.  */
4275
4276static rtx
4277sparc_legitimize_pic_address (rtx orig, rtx reg)
4278{
4279  bool gotdata_op = false;
4280
4281  if (GET_CODE (orig) == SYMBOL_REF
4282      /* See the comment in sparc_expand_move.  */
4283      || (GET_CODE (orig) == LABEL_REF && !can_use_mov_pic_label_ref (orig)))
4284    {
4285      rtx pic_ref, address;
4286      rtx_insn *insn;
4287
4288      if (reg == 0)
4289	{
4290	  gcc_assert (can_create_pseudo_p ());
4291	  reg = gen_reg_rtx (Pmode);
4292	}
4293
4294      if (flag_pic == 2)
4295	{
4296	  /* If not during reload, allocate another temp reg here for loading
4297	     in the address, so that these instructions can be optimized
4298	     properly.  */
4299	  rtx temp_reg = (! can_create_pseudo_p ()
4300			  ? reg : gen_reg_rtx (Pmode));
4301
4302	  /* Must put the SYMBOL_REF inside an UNSPEC here so that cse
4303	     won't get confused into thinking that these two instructions
4304	     are loading in the true address of the symbol.  If in the
4305	     future a PIC rtx exists, that should be used instead.  */
4306	  if (TARGET_ARCH64)
4307	    {
4308	      emit_insn (gen_movdi_high_pic (temp_reg, orig));
4309	      emit_insn (gen_movdi_lo_sum_pic (temp_reg, temp_reg, orig));
4310	    }
4311	  else
4312	    {
4313	      emit_insn (gen_movsi_high_pic (temp_reg, orig));
4314	      emit_insn (gen_movsi_lo_sum_pic (temp_reg, temp_reg, orig));
4315	    }
4316	  address = temp_reg;
4317	  gotdata_op = true;
4318	}
4319      else
4320	address = orig;
4321
4322      crtl->uses_pic_offset_table = 1;
4323      if (gotdata_op)
4324	{
4325	  if (TARGET_ARCH64)
4326	    insn = emit_insn (gen_movdi_pic_gotdata_op (reg,
4327							pic_offset_table_rtx,
4328							address, orig));
4329	  else
4330	    insn = emit_insn (gen_movsi_pic_gotdata_op (reg,
4331							pic_offset_table_rtx,
4332							address, orig));
4333	}
4334      else
4335	{
4336	  pic_ref
4337	    = gen_const_mem (Pmode,
4338			     gen_rtx_PLUS (Pmode,
4339					   pic_offset_table_rtx, address));
4340	  insn = emit_move_insn (reg, pic_ref);
4341	}
4342
4343      /* Put a REG_EQUAL note on this insn, so that it can be optimized
4344	 by loop.  */
4345      set_unique_reg_note (insn, REG_EQUAL, orig);
4346      return reg;
4347    }
4348  else if (GET_CODE (orig) == CONST)
4349    {
4350      rtx base, offset;
4351
4352      if (GET_CODE (XEXP (orig, 0)) == PLUS
4353	  && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
4354	return orig;
4355
4356      if (reg == 0)
4357	{
4358	  gcc_assert (can_create_pseudo_p ());
4359	  reg = gen_reg_rtx (Pmode);
4360	}
4361
4362      gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
4363      base = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 0), reg);
4364      offset = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
4365			 		     base == reg ? NULL_RTX : reg);
4366
4367      if (GET_CODE (offset) == CONST_INT)
4368	{
4369	  if (SMALL_INT (offset))
4370	    return plus_constant (Pmode, base, INTVAL (offset));
4371	  else if (can_create_pseudo_p ())
4372	    offset = force_reg (Pmode, offset);
4373	  else
4374	    /* If we reach here, then something is seriously wrong.  */
4375	    gcc_unreachable ();
4376	}
4377      return gen_rtx_PLUS (Pmode, base, offset);
4378    }
4379  else if (GET_CODE (orig) == LABEL_REF)
4380    /* ??? We ought to be checking that the register is live instead, in case
4381       it is eliminated.  */
4382    crtl->uses_pic_offset_table = 1;
4383
4384  return orig;
4385}
4386
4387/* Try machine-dependent ways of modifying an illegitimate address X
4388   to be legitimate.  If we find one, return the new, valid address.
4389
4390   OLDX is the address as it was before break_out_memory_refs was called.
4391   In some cases it is useful to look at this to decide what needs to be done.
4392
4393   MODE is the mode of the operand pointed to by X.
4394
4395   On SPARC, change REG+N into REG+REG, and REG+(X*Y) into REG+REG.  */
4396
4397static rtx
4398sparc_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
4399			  machine_mode mode)
4400{
4401  rtx orig_x = x;
4402
4403  if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT)
4404    x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
4405		      force_operand (XEXP (x, 0), NULL_RTX));
4406  if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == MULT)
4407    x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4408		      force_operand (XEXP (x, 1), NULL_RTX));
4409  if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS)
4410    x = gen_rtx_PLUS (Pmode, force_operand (XEXP (x, 0), NULL_RTX),
4411		      XEXP (x, 1));
4412  if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == PLUS)
4413    x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4414		      force_operand (XEXP (x, 1), NULL_RTX));
4415
4416  if (x != orig_x && sparc_legitimate_address_p (mode, x, FALSE))
4417    return x;
4418
4419  if (sparc_tls_referenced_p (x))
4420    x = sparc_legitimize_tls_address (x);
4421  else if (flag_pic)
4422    x = sparc_legitimize_pic_address (x, NULL_RTX);
4423  else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 1)))
4424    x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4425		      copy_to_mode_reg (Pmode, XEXP (x, 1)));
4426  else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 0)))
4427    x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
4428		      copy_to_mode_reg (Pmode, XEXP (x, 0)));
4429  else if (GET_CODE (x) == SYMBOL_REF
4430	   || GET_CODE (x) == CONST
4431	   || GET_CODE (x) == LABEL_REF)
4432    x = copy_to_suggested_reg (x, NULL_RTX, Pmode);
4433
4434  return x;
4435}
4436
4437/* Delegitimize an address that was legitimized by the above function.  */
4438
4439static rtx
4440sparc_delegitimize_address (rtx x)
4441{
4442  x = delegitimize_mem_from_attrs (x);
4443
4444  if (GET_CODE (x) == LO_SUM && GET_CODE (XEXP (x, 1)) == UNSPEC)
4445    switch (XINT (XEXP (x, 1), 1))
4446      {
4447      case UNSPEC_MOVE_PIC:
4448      case UNSPEC_TLSLE:
4449	x = XVECEXP (XEXP (x, 1), 0, 0);
4450	gcc_assert (GET_CODE (x) == SYMBOL_REF);
4451	break;
4452      default:
4453	break;
4454      }
4455
4456  /* This is generated by mov{si,di}_pic_label_ref in PIC mode.  */
4457  if (GET_CODE (x) == MINUS
4458      && REG_P (XEXP (x, 0))
4459      && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM
4460      && GET_CODE (XEXP (x, 1)) == LO_SUM
4461      && GET_CODE (XEXP (XEXP (x, 1), 1)) == UNSPEC
4462      && XINT (XEXP (XEXP (x, 1), 1), 1) == UNSPEC_MOVE_PIC_LABEL)
4463    {
4464      x = XVECEXP (XEXP (XEXP (x, 1), 1), 0, 0);
4465      gcc_assert (GET_CODE (x) == LABEL_REF);
4466    }
4467
4468  return x;
4469}
4470
4471/* SPARC implementation of LEGITIMIZE_RELOAD_ADDRESS.  Returns a value to
4472   replace the input X, or the original X if no replacement is called for.
4473   The output parameter *WIN is 1 if the calling macro should goto WIN,
4474   0 if it should not.
4475
4476   For SPARC, we wish to handle addresses by splitting them into
4477   HIGH+LO_SUM pairs, retaining the LO_SUM in the memory reference.
4478   This cuts the number of extra insns by one.
4479
4480   Do nothing when generating PIC code and the address is a symbolic
4481   operand or requires a scratch register.  */
4482
4483rtx
4484sparc_legitimize_reload_address (rtx x, machine_mode mode,
4485				 int opnum, int type,
4486				 int ind_levels ATTRIBUTE_UNUSED, int *win)
4487{
4488  /* Decompose SImode constants into HIGH+LO_SUM.  */
4489  if (CONSTANT_P (x)
4490      && (mode != TFmode || TARGET_ARCH64)
4491      && GET_MODE (x) == SImode
4492      && GET_CODE (x) != LO_SUM
4493      && GET_CODE (x) != HIGH
4494      && sparc_cmodel <= CM_MEDLOW
4495      && !(flag_pic
4496	   && (symbolic_operand (x, Pmode) || pic_address_needs_scratch (x))))
4497    {
4498      x = gen_rtx_LO_SUM (GET_MODE (x), gen_rtx_HIGH (GET_MODE (x), x), x);
4499      push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4500		   BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4501		   opnum, (enum reload_type)type);
4502      *win = 1;
4503      return x;
4504    }
4505
4506  /* We have to recognize what we have already generated above.  */
4507  if (GET_CODE (x) == LO_SUM && GET_CODE (XEXP (x, 0)) == HIGH)
4508    {
4509      push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4510		   BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4511		   opnum, (enum reload_type)type);
4512      *win = 1;
4513      return x;
4514    }
4515
4516  *win = 0;
4517  return x;
4518}
4519
4520/* Return true if ADDR (a legitimate address expression)
4521   has an effect that depends on the machine mode it is used for.
4522
4523   In PIC mode,
4524
4525      (mem:HI [%l7+a])
4526
4527   is not equivalent to
4528
4529      (mem:QI [%l7+a]) (mem:QI [%l7+a+1])
4530
4531   because [%l7+a+1] is interpreted as the address of (a+1).  */
4532
4533
4534static bool
4535sparc_mode_dependent_address_p (const_rtx addr,
4536				addr_space_t as ATTRIBUTE_UNUSED)
4537{
4538  if (flag_pic && GET_CODE (addr) == PLUS)
4539    {
4540      rtx op0 = XEXP (addr, 0);
4541      rtx op1 = XEXP (addr, 1);
4542      if (op0 == pic_offset_table_rtx
4543	  && symbolic_operand (op1, VOIDmode))
4544	return true;
4545    }
4546
4547  return false;
4548}
4549
4550#ifdef HAVE_GAS_HIDDEN
4551# define USE_HIDDEN_LINKONCE 1
4552#else
4553# define USE_HIDDEN_LINKONCE 0
4554#endif
4555
4556static void
4557get_pc_thunk_name (char name[32], unsigned int regno)
4558{
4559  const char *reg_name = reg_names[regno];
4560
4561  /* Skip the leading '%' as that cannot be used in a
4562     symbol name.  */
4563  reg_name += 1;
4564
4565  if (USE_HIDDEN_LINKONCE)
4566    sprintf (name, "__sparc_get_pc_thunk.%s", reg_name);
4567  else
4568    ASM_GENERATE_INTERNAL_LABEL (name, "LADDPC", regno);
4569}
4570
4571/* Wrapper around the load_pcrel_sym{si,di} patterns.  */
4572
4573static rtx
4574gen_load_pcrel_sym (rtx op0, rtx op1, rtx op2, rtx op3)
4575{
4576  int orig_flag_pic = flag_pic;
4577  rtx insn;
4578
4579  /* The load_pcrel_sym{si,di} patterns require absolute addressing.  */
4580  flag_pic = 0;
4581  if (TARGET_ARCH64)
4582    insn = gen_load_pcrel_symdi (op0, op1, op2, op3);
4583  else
4584    insn = gen_load_pcrel_symsi (op0, op1, op2, op3);
4585  flag_pic = orig_flag_pic;
4586
4587  return insn;
4588}
4589
4590/* Emit code to load the GOT register.  */
4591
4592void
4593load_got_register (void)
4594{
4595  /* In PIC mode, this will retrieve pic_offset_table_rtx.  */
4596  if (!global_offset_table_rtx)
4597    global_offset_table_rtx = gen_rtx_REG (Pmode, GLOBAL_OFFSET_TABLE_REGNUM);
4598
4599  if (TARGET_VXWORKS_RTP)
4600    emit_insn (gen_vxworks_load_got ());
4601  else
4602    {
4603      /* The GOT symbol is subject to a PC-relative relocation so we need a
4604	 helper function to add the PC value and thus get the final value.  */
4605      if (!got_helper_rtx)
4606	{
4607	  char name[32];
4608	  get_pc_thunk_name (name, GLOBAL_OFFSET_TABLE_REGNUM);
4609	  got_helper_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4610	}
4611
4612      emit_insn (gen_load_pcrel_sym (global_offset_table_rtx, sparc_got (),
4613				     got_helper_rtx,
4614				     GEN_INT (GLOBAL_OFFSET_TABLE_REGNUM)));
4615    }
4616
4617  /* Need to emit this whether or not we obey regdecls,
4618     since setjmp/longjmp can cause life info to screw up.
4619     ??? In the case where we don't obey regdecls, this is not sufficient
4620     since we may not fall out the bottom.  */
4621  emit_use (global_offset_table_rtx);
4622}
4623
4624/* Emit a call instruction with the pattern given by PAT.  ADDR is the
4625   address of the call target.  */
4626
4627void
4628sparc_emit_call_insn (rtx pat, rtx addr)
4629{
4630  rtx_insn *insn;
4631
4632  insn = emit_call_insn (pat);
4633
4634  /* The PIC register is live on entry to VxWorks PIC PLT entries.  */
4635  if (TARGET_VXWORKS_RTP
4636      && flag_pic
4637      && GET_CODE (addr) == SYMBOL_REF
4638      && (SYMBOL_REF_DECL (addr)
4639	  ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
4640	  : !SYMBOL_REF_LOCAL_P (addr)))
4641    {
4642      use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
4643      crtl->uses_pic_offset_table = 1;
4644    }
4645}
4646
4647/* Return 1 if RTX is a MEM which is known to be aligned to at
4648   least a DESIRED byte boundary.  */
4649
4650int
4651mem_min_alignment (rtx mem, int desired)
4652{
4653  rtx addr, base, offset;
4654
4655  /* If it's not a MEM we can't accept it.  */
4656  if (GET_CODE (mem) != MEM)
4657    return 0;
4658
4659  /* Obviously...  */
4660  if (!TARGET_UNALIGNED_DOUBLES
4661      && MEM_ALIGN (mem) / BITS_PER_UNIT >= (unsigned)desired)
4662    return 1;
4663
4664  /* ??? The rest of the function predates MEM_ALIGN so
4665     there is probably a bit of redundancy.  */
4666  addr = XEXP (mem, 0);
4667  base = offset = NULL_RTX;
4668  if (GET_CODE (addr) == PLUS)
4669    {
4670      if (GET_CODE (XEXP (addr, 0)) == REG)
4671	{
4672	  base = XEXP (addr, 0);
4673
4674	  /* What we are saying here is that if the base
4675	     REG is aligned properly, the compiler will make
4676	     sure any REG based index upon it will be so
4677	     as well.  */
4678	  if (GET_CODE (XEXP (addr, 1)) == CONST_INT)
4679	    offset = XEXP (addr, 1);
4680	  else
4681	    offset = const0_rtx;
4682	}
4683    }
4684  else if (GET_CODE (addr) == REG)
4685    {
4686      base = addr;
4687      offset = const0_rtx;
4688    }
4689
4690  if (base != NULL_RTX)
4691    {
4692      int regno = REGNO (base);
4693
4694      if (regno != HARD_FRAME_POINTER_REGNUM && regno != STACK_POINTER_REGNUM)
4695	{
4696	  /* Check if the compiler has recorded some information
4697	     about the alignment of the base REG.  If reload has
4698	     completed, we already matched with proper alignments.
4699	     If not running global_alloc, reload might give us
4700	     unaligned pointer to local stack though.  */
4701	  if (((cfun != 0
4702		&& REGNO_POINTER_ALIGN (regno) >= desired * BITS_PER_UNIT)
4703	       || (optimize && reload_completed))
4704	      && (INTVAL (offset) & (desired - 1)) == 0)
4705	    return 1;
4706	}
4707      else
4708	{
4709	  if (((INTVAL (offset) - SPARC_STACK_BIAS) & (desired - 1)) == 0)
4710	    return 1;
4711	}
4712    }
4713  else if (! TARGET_UNALIGNED_DOUBLES
4714	   || CONSTANT_P (addr)
4715	   || GET_CODE (addr) == LO_SUM)
4716    {
4717      /* Anything else we know is properly aligned unless TARGET_UNALIGNED_DOUBLES
4718	 is true, in which case we can only assume that an access is aligned if
4719	 it is to a constant address, or the address involves a LO_SUM.  */
4720      return 1;
4721    }
4722
4723  /* An obviously unaligned address.  */
4724  return 0;
4725}
4726
4727
4728/* Vectors to keep interesting information about registers where it can easily
4729   be got.  We used to use the actual mode value as the bit number, but there
4730   are more than 32 modes now.  Instead we use two tables: one indexed by
4731   hard register number, and one indexed by mode.  */
4732
4733/* The purpose of sparc_mode_class is to shrink the range of modes so that
4734   they all fit (as bit numbers) in a 32-bit word (again).  Each real mode is
4735   mapped into one sparc_mode_class mode.  */
4736
4737enum sparc_mode_class {
4738  H_MODE, S_MODE, D_MODE, T_MODE, O_MODE,
4739  SF_MODE, DF_MODE, TF_MODE, OF_MODE,
4740  CC_MODE, CCFP_MODE
4741};
4742
4743/* Modes for single-word and smaller quantities.  */
4744#define S_MODES \
4745  ((1 << (int) H_MODE) | (1 << (int) S_MODE) | (1 << (int) SF_MODE))
4746
4747/* Modes for double-word and smaller quantities.  */
4748#define D_MODES (S_MODES | (1 << (int) D_MODE) | (1 << DF_MODE))
4749
4750/* Modes for quad-word and smaller quantities.  */
4751#define T_MODES (D_MODES | (1 << (int) T_MODE) | (1 << (int) TF_MODE))
4752
4753/* Modes for 8-word and smaller quantities.  */
4754#define O_MODES (T_MODES | (1 << (int) O_MODE) | (1 << (int) OF_MODE))
4755
4756/* Modes for single-float quantities.  */
4757#define SF_MODES ((1 << (int) S_MODE) | (1 << (int) SF_MODE))
4758
4759/* Modes for double-float and smaller quantities.  */
4760#define DF_MODES (SF_MODES | (1 << (int) D_MODE) | (1 << DF_MODE))
4761
4762/* Modes for quad-float and smaller quantities.  */
4763#define TF_MODES (DF_MODES | (1 << (int) TF_MODE))
4764
4765/* Modes for quad-float pairs and smaller quantities.  */
4766#define OF_MODES (TF_MODES | (1 << (int) OF_MODE))
4767
4768/* Modes for double-float only quantities.  */
4769#define DF_MODES_NO_S ((1 << (int) D_MODE) | (1 << (int) DF_MODE))
4770
4771/* Modes for quad-float and double-float only quantities.  */
4772#define TF_MODES_NO_S (DF_MODES_NO_S | (1 << (int) TF_MODE))
4773
4774/* Modes for quad-float pairs and double-float only quantities.  */
4775#define OF_MODES_NO_S (TF_MODES_NO_S | (1 << (int) OF_MODE))
4776
4777/* Modes for condition codes.  */
4778#define CC_MODES (1 << (int) CC_MODE)
4779#define CCFP_MODES (1 << (int) CCFP_MODE)
4780
4781/* Value is 1 if register/mode pair is acceptable on sparc.
4782
4783   The funny mixture of D and T modes is because integer operations
4784   do not specially operate on tetra quantities, so non-quad-aligned
4785   registers can hold quadword quantities (except %o4 and %i4 because
4786   they cross fixed registers).
4787
4788   ??? Note that, despite the settings, non-double-aligned parameter
4789   registers can hold double-word quantities in 32-bit mode.  */
4790
4791/* This points to either the 32 bit or the 64 bit version.  */
4792const int *hard_regno_mode_classes;
4793
4794static const int hard_32bit_mode_classes[] = {
4795  S_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
4796  T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
4797  T_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
4798  T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
4799
4800  OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4801  OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4802  OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4803  OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
4804
4805  /* FP regs f32 to f63.  Only the even numbered registers actually exist,
4806     and none can hold SFmode/SImode values.  */
4807  OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4808  OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4809  OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4810  OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4811
4812  /* %fcc[0123] */
4813  CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
4814
4815  /* %icc, %sfp, %gsr */
4816  CC_MODES, 0, D_MODES
4817};
4818
4819static const int hard_64bit_mode_classes[] = {
4820  D_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
4821  O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
4822  T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
4823  O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
4824
4825  OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4826  OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4827  OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4828  OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
4829
4830  /* FP regs f32 to f63.  Only the even numbered registers actually exist,
4831     and none can hold SFmode/SImode values.  */
4832  OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4833  OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4834  OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4835  OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4836
4837  /* %fcc[0123] */
4838  CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
4839
4840  /* %icc, %sfp, %gsr */
4841  CC_MODES, 0, D_MODES
4842};
4843
4844int sparc_mode_class [NUM_MACHINE_MODES];
4845
4846enum reg_class sparc_regno_reg_class[FIRST_PSEUDO_REGISTER];
4847
4848static void
4849sparc_init_modes (void)
4850{
4851  int i;
4852
4853  for (i = 0; i < NUM_MACHINE_MODES; i++)
4854    {
4855      machine_mode m = (machine_mode) i;
4856      unsigned int size = GET_MODE_SIZE (m);
4857
4858      switch (GET_MODE_CLASS (m))
4859	{
4860	case MODE_INT:
4861	case MODE_PARTIAL_INT:
4862	case MODE_COMPLEX_INT:
4863	  if (size < 4)
4864	    sparc_mode_class[i] = 1 << (int) H_MODE;
4865	  else if (size == 4)
4866	    sparc_mode_class[i] = 1 << (int) S_MODE;
4867	  else if (size == 8)
4868	    sparc_mode_class[i] = 1 << (int) D_MODE;
4869	  else if (size == 16)
4870	    sparc_mode_class[i] = 1 << (int) T_MODE;
4871	  else if (size == 32)
4872	    sparc_mode_class[i] = 1 << (int) O_MODE;
4873	  else
4874	    sparc_mode_class[i] = 0;
4875	  break;
4876	case MODE_VECTOR_INT:
4877	  if (size == 4)
4878	    sparc_mode_class[i] = 1 << (int) SF_MODE;
4879	  else if (size == 8)
4880	    sparc_mode_class[i] = 1 << (int) DF_MODE;
4881	  else
4882	    sparc_mode_class[i] = 0;
4883	  break;
4884	case MODE_FLOAT:
4885	case MODE_COMPLEX_FLOAT:
4886	  if (size == 4)
4887	    sparc_mode_class[i] = 1 << (int) SF_MODE;
4888	  else if (size == 8)
4889	    sparc_mode_class[i] = 1 << (int) DF_MODE;
4890	  else if (size == 16)
4891	    sparc_mode_class[i] = 1 << (int) TF_MODE;
4892	  else if (size == 32)
4893	    sparc_mode_class[i] = 1 << (int) OF_MODE;
4894	  else
4895	    sparc_mode_class[i] = 0;
4896	  break;
4897	case MODE_CC:
4898	  if (m == CCFPmode || m == CCFPEmode)
4899	    sparc_mode_class[i] = 1 << (int) CCFP_MODE;
4900	  else
4901	    sparc_mode_class[i] = 1 << (int) CC_MODE;
4902	  break;
4903	default:
4904	  sparc_mode_class[i] = 0;
4905	  break;
4906	}
4907    }
4908
4909  if (TARGET_ARCH64)
4910    hard_regno_mode_classes = hard_64bit_mode_classes;
4911  else
4912    hard_regno_mode_classes = hard_32bit_mode_classes;
4913
4914  /* Initialize the array used by REGNO_REG_CLASS.  */
4915  for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4916    {
4917      if (i < 16 && TARGET_V8PLUS)
4918	sparc_regno_reg_class[i] = I64_REGS;
4919      else if (i < 32 || i == FRAME_POINTER_REGNUM)
4920	sparc_regno_reg_class[i] = GENERAL_REGS;
4921      else if (i < 64)
4922	sparc_regno_reg_class[i] = FP_REGS;
4923      else if (i < 96)
4924	sparc_regno_reg_class[i] = EXTRA_FP_REGS;
4925      else if (i < 100)
4926	sparc_regno_reg_class[i] = FPCC_REGS;
4927      else
4928	sparc_regno_reg_class[i] = NO_REGS;
4929    }
4930}
4931
4932/* Return whether REGNO, a global or FP register, must be saved/restored.  */
4933
4934static inline bool
4935save_global_or_fp_reg_p (unsigned int regno,
4936			 int leaf_function ATTRIBUTE_UNUSED)
4937{
4938  return !call_used_regs[regno] && df_regs_ever_live_p (regno);
4939}
4940
4941/* Return whether the return address register (%i7) is needed.  */
4942
4943static inline bool
4944return_addr_reg_needed_p (int leaf_function)
4945{
4946  /* If it is live, for example because of __builtin_return_address (0).  */
4947  if (df_regs_ever_live_p (RETURN_ADDR_REGNUM))
4948    return true;
4949
4950  /* Otherwise, it is needed as save register if %o7 is clobbered.  */
4951  if (!leaf_function
4952      /* Loading the GOT register clobbers %o7.  */
4953      || crtl->uses_pic_offset_table
4954      || df_regs_ever_live_p (INCOMING_RETURN_ADDR_REGNUM))
4955    return true;
4956
4957  return false;
4958}
4959
4960/* Return whether REGNO, a local or in register, must be saved/restored.  */
4961
4962static bool
4963save_local_or_in_reg_p (unsigned int regno, int leaf_function)
4964{
4965  /* General case: call-saved registers live at some point.  */
4966  if (!call_used_regs[regno] && df_regs_ever_live_p (regno))
4967    return true;
4968
4969  /* Frame pointer register (%fp) if needed.  */
4970  if (regno == HARD_FRAME_POINTER_REGNUM && frame_pointer_needed)
4971    return true;
4972
4973  /* Return address register (%i7) if needed.  */
4974  if (regno == RETURN_ADDR_REGNUM && return_addr_reg_needed_p (leaf_function))
4975    return true;
4976
4977  /* GOT register (%l7) if needed.  */
4978  if (regno == PIC_OFFSET_TABLE_REGNUM && crtl->uses_pic_offset_table)
4979    return true;
4980
4981  /* If the function accesses prior frames, the frame pointer and the return
4982     address of the previous frame must be saved on the stack.  */
4983  if (crtl->accesses_prior_frames
4984      && (regno == HARD_FRAME_POINTER_REGNUM || regno == RETURN_ADDR_REGNUM))
4985    return true;
4986
4987  return false;
4988}
4989
4990/* Compute the frame size required by the function.  This function is called
4991   during the reload pass and also by sparc_expand_prologue.  */
4992
4993HOST_WIDE_INT
4994sparc_compute_frame_size (HOST_WIDE_INT size, int leaf_function)
4995{
4996  HOST_WIDE_INT frame_size, apparent_frame_size;
4997  int args_size, n_global_fp_regs = 0;
4998  bool save_local_in_regs_p = false;
4999  unsigned int i;
5000
5001  /* If the function allocates dynamic stack space, the dynamic offset is
5002     computed early and contains REG_PARM_STACK_SPACE, so we need to cope.  */
5003  if (leaf_function && !cfun->calls_alloca)
5004    args_size = 0;
5005  else
5006    args_size = crtl->outgoing_args_size + REG_PARM_STACK_SPACE (cfun->decl);
5007
5008  /* Calculate space needed for global registers.  */
5009  if (TARGET_ARCH64)
5010    {
5011      for (i = 0; i < 8; i++)
5012	if (save_global_or_fp_reg_p (i, 0))
5013	  n_global_fp_regs += 2;
5014    }
5015  else
5016    {
5017      for (i = 0; i < 8; i += 2)
5018	if (save_global_or_fp_reg_p (i, 0)
5019	    || save_global_or_fp_reg_p (i + 1, 0))
5020	  n_global_fp_regs += 2;
5021    }
5022
5023  /* In the flat window model, find out which local and in registers need to
5024     be saved.  We don't reserve space in the current frame for them as they
5025     will be spilled into the register window save area of the caller's frame.
5026     However, as soon as we use this register window save area, we must create
5027     that of the current frame to make it the live one.  */
5028  if (TARGET_FLAT)
5029    for (i = 16; i < 32; i++)
5030      if (save_local_or_in_reg_p (i, leaf_function))
5031	{
5032	 save_local_in_regs_p = true;
5033	 break;
5034	}
5035
5036  /* Calculate space needed for FP registers.  */
5037  for (i = 32; i < (TARGET_V9 ? 96 : 64); i += 2)
5038    if (save_global_or_fp_reg_p (i, 0) || save_global_or_fp_reg_p (i + 1, 0))
5039      n_global_fp_regs += 2;
5040
5041  if (size == 0
5042      && n_global_fp_regs == 0
5043      && args_size == 0
5044      && !save_local_in_regs_p)
5045    frame_size = apparent_frame_size = 0;
5046  else
5047    {
5048      /* We subtract STARTING_FRAME_OFFSET, remember it's negative.  */
5049      apparent_frame_size = (size - STARTING_FRAME_OFFSET + 7) & -8;
5050      apparent_frame_size += n_global_fp_regs * 4;
5051
5052      /* We need to add the size of the outgoing argument area.  */
5053      frame_size = apparent_frame_size + ((args_size + 7) & -8);
5054
5055      /* And that of the register window save area.  */
5056      frame_size += FIRST_PARM_OFFSET (cfun->decl);
5057
5058      /* Finally, bump to the appropriate alignment.  */
5059      frame_size = SPARC_STACK_ALIGN (frame_size);
5060    }
5061
5062  /* Set up values for use in prologue and epilogue.  */
5063  sparc_frame_size = frame_size;
5064  sparc_apparent_frame_size = apparent_frame_size;
5065  sparc_n_global_fp_regs = n_global_fp_regs;
5066  sparc_save_local_in_regs_p = save_local_in_regs_p;
5067
5068  return frame_size;
5069}
5070
5071/* Implement the macro INITIAL_ELIMINATION_OFFSET, return the OFFSET.  */
5072
5073int
5074sparc_initial_elimination_offset (int to)
5075{
5076  int offset;
5077
5078  if (to == STACK_POINTER_REGNUM)
5079    offset = sparc_compute_frame_size (get_frame_size (), crtl->is_leaf);
5080  else
5081    offset = 0;
5082
5083  offset += SPARC_STACK_BIAS;
5084  return offset;
5085}
5086
5087/* Output any necessary .register pseudo-ops.  */
5088
5089void
5090sparc_output_scratch_registers (FILE *file ATTRIBUTE_UNUSED)
5091{
5092#ifdef HAVE_AS_REGISTER_PSEUDO_OP
5093  int i;
5094
5095  if (TARGET_ARCH32)
5096    return;
5097
5098  /* Check if %g[2367] were used without
5099     .register being printed for them already.  */
5100  for (i = 2; i < 8; i++)
5101    {
5102      if (df_regs_ever_live_p (i)
5103	  && ! sparc_hard_reg_printed [i])
5104	{
5105	  sparc_hard_reg_printed [i] = 1;
5106	  /* %g7 is used as TLS base register, use #ignore
5107	     for it instead of #scratch.  */
5108	  fprintf (file, "\t.register\t%%g%d, #%s\n", i,
5109		   i == 7 ? "ignore" : "scratch");
5110	}
5111      if (i == 3) i = 5;
5112    }
5113#endif
5114}
5115
5116#define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
5117
5118#if PROBE_INTERVAL > 4096
5119#error Cannot use indexed addressing mode for stack probing
5120#endif
5121
5122/* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
5123   inclusive.  These are offsets from the current stack pointer.
5124
5125   Note that we don't use the REG+REG addressing mode for the probes because
5126   of the stack bias in 64-bit mode.  And it doesn't really buy us anything
5127   so the advantages of having a single code win here.  */
5128
5129static void
5130sparc_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
5131{
5132  rtx g1 = gen_rtx_REG (Pmode, 1);
5133
5134  /* See if we have a constant small number of probes to generate.  If so,
5135     that's the easy case.  */
5136  if (size <= PROBE_INTERVAL)
5137    {
5138      emit_move_insn (g1, GEN_INT (first));
5139      emit_insn (gen_rtx_SET (VOIDmode, g1,
5140			      gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5141      emit_stack_probe (plus_constant (Pmode, g1, -size));
5142    }
5143
5144  /* The run-time loop is made up of 10 insns in the generic case while the
5145     compile-time loop is made up of 4+2*(n-2) insns for n # of intervals.  */
5146  else if (size <= 5 * PROBE_INTERVAL)
5147    {
5148      HOST_WIDE_INT i;
5149
5150      emit_move_insn (g1, GEN_INT (first + PROBE_INTERVAL));
5151      emit_insn (gen_rtx_SET (VOIDmode, g1,
5152			      gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5153      emit_stack_probe (g1);
5154
5155      /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
5156	 it exceeds SIZE.  If only two probes are needed, this will not
5157	 generate any code.  Then probe at FIRST + SIZE.  */
5158      for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
5159	{
5160	  emit_insn (gen_rtx_SET (VOIDmode, g1,
5161				  plus_constant (Pmode, g1, -PROBE_INTERVAL)));
5162	  emit_stack_probe (g1);
5163	}
5164
5165      emit_stack_probe (plus_constant (Pmode, g1,
5166				       (i - PROBE_INTERVAL) - size));
5167    }
5168
5169  /* Otherwise, do the same as above, but in a loop.  Note that we must be
5170     extra careful with variables wrapping around because we might be at
5171     the very top (or the very bottom) of the address space and we have
5172     to be able to handle this case properly; in particular, we use an
5173     equality test for the loop condition.  */
5174  else
5175    {
5176      HOST_WIDE_INT rounded_size;
5177      rtx g4 = gen_rtx_REG (Pmode, 4);
5178
5179      emit_move_insn (g1, GEN_INT (first));
5180
5181
5182      /* Step 1: round SIZE to the previous multiple of the interval.  */
5183
5184      rounded_size = size & -PROBE_INTERVAL;
5185      emit_move_insn (g4, GEN_INT (rounded_size));
5186
5187
5188      /* Step 2: compute initial and final value of the loop counter.  */
5189
5190      /* TEST_ADDR = SP + FIRST.  */
5191      emit_insn (gen_rtx_SET (VOIDmode, g1,
5192			      gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5193
5194      /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE.  */
5195      emit_insn (gen_rtx_SET (VOIDmode, g4, gen_rtx_MINUS (Pmode, g1, g4)));
5196
5197
5198      /* Step 3: the loop
5199
5200	 while (TEST_ADDR != LAST_ADDR)
5201	   {
5202	     TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
5203	     probe at TEST_ADDR
5204	   }
5205
5206	 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
5207	 until it is equal to ROUNDED_SIZE.  */
5208
5209      if (TARGET_ARCH64)
5210	emit_insn (gen_probe_stack_rangedi (g1, g1, g4));
5211      else
5212	emit_insn (gen_probe_stack_rangesi (g1, g1, g4));
5213
5214
5215      /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
5216	 that SIZE is equal to ROUNDED_SIZE.  */
5217
5218      if (size != rounded_size)
5219	emit_stack_probe (plus_constant (Pmode, g4, rounded_size - size));
5220    }
5221
5222  /* Make sure nothing is scheduled before we are done.  */
5223  emit_insn (gen_blockage ());
5224}
5225
5226/* Probe a range of stack addresses from REG1 to REG2 inclusive.  These are
5227   absolute addresses.  */
5228
5229const char *
5230output_probe_stack_range (rtx reg1, rtx reg2)
5231{
5232  static int labelno = 0;
5233  char loop_lab[32], end_lab[32];
5234  rtx xops[2];
5235
5236  ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
5237  ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
5238
5239  ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
5240
5241   /* Jump to END_LAB if TEST_ADDR == LAST_ADDR.  */
5242  xops[0] = reg1;
5243  xops[1] = reg2;
5244  output_asm_insn ("cmp\t%0, %1", xops);
5245  if (TARGET_ARCH64)
5246    fputs ("\tbe,pn\t%xcc,", asm_out_file);
5247  else
5248    fputs ("\tbe\t", asm_out_file);
5249  assemble_name_raw (asm_out_file, end_lab);
5250  fputc ('\n', asm_out_file);
5251
5252  /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL.  */
5253  xops[1] = GEN_INT (-PROBE_INTERVAL);
5254  output_asm_insn (" add\t%0, %1, %0", xops);
5255
5256  /* Probe at TEST_ADDR and branch.  */
5257  if (TARGET_ARCH64)
5258    fputs ("\tba,pt\t%xcc,", asm_out_file);
5259  else
5260    fputs ("\tba\t", asm_out_file);
5261  assemble_name_raw (asm_out_file, loop_lab);
5262  fputc ('\n', asm_out_file);
5263  xops[1] = GEN_INT (SPARC_STACK_BIAS);
5264  output_asm_insn (" st\t%%g0, [%0+%1]", xops);
5265
5266  ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
5267
5268  return "";
5269}
5270
5271/* Emit code to save/restore registers from LOW to HIGH at BASE+OFFSET as
5272   needed.  LOW is supposed to be double-word aligned for 32-bit registers.
5273   SAVE_P decides whether a register must be saved/restored.  ACTION_TRUE
5274   is the action to be performed if SAVE_P returns true and ACTION_FALSE
5275   the action to be performed if it returns false.  Return the new offset.  */
5276
5277typedef bool (*sorr_pred_t) (unsigned int, int);
5278typedef enum { SORR_NONE, SORR_ADVANCE, SORR_SAVE, SORR_RESTORE } sorr_act_t;
5279
5280static int
5281emit_save_or_restore_regs (unsigned int low, unsigned int high, rtx base,
5282			   int offset, int leaf_function, sorr_pred_t save_p,
5283			   sorr_act_t action_true, sorr_act_t action_false)
5284{
5285  unsigned int i;
5286  rtx mem;
5287  rtx_insn *insn;
5288
5289  if (TARGET_ARCH64 && high <= 32)
5290    {
5291      int fp_offset = -1;
5292
5293      for (i = low; i < high; i++)
5294	{
5295	  if (save_p (i, leaf_function))
5296	    {
5297	      mem = gen_frame_mem (DImode, plus_constant (Pmode,
5298							  base, offset));
5299	      if (action_true == SORR_SAVE)
5300		{
5301		  insn = emit_move_insn (mem, gen_rtx_REG (DImode, i));
5302		  RTX_FRAME_RELATED_P (insn) = 1;
5303		}
5304	      else  /* action_true == SORR_RESTORE */
5305		{
5306		  /* The frame pointer must be restored last since its old
5307		     value may be used as base address for the frame.  This
5308		     is problematic in 64-bit mode only because of the lack
5309		     of double-word load instruction.  */
5310		  if (i == HARD_FRAME_POINTER_REGNUM)
5311		    fp_offset = offset;
5312		  else
5313		    emit_move_insn (gen_rtx_REG (DImode, i), mem);
5314		}
5315	      offset += 8;
5316	    }
5317	  else if (action_false == SORR_ADVANCE)
5318	    offset += 8;
5319	}
5320
5321      if (fp_offset >= 0)
5322	{
5323	  mem = gen_frame_mem (DImode, plus_constant (Pmode, base, fp_offset));
5324	  emit_move_insn (hard_frame_pointer_rtx, mem);
5325	}
5326    }
5327  else
5328    {
5329      for (i = low; i < high; i += 2)
5330	{
5331	  bool reg0 = save_p (i, leaf_function);
5332	  bool reg1 = save_p (i + 1, leaf_function);
5333	  machine_mode mode;
5334	  int regno;
5335
5336	  if (reg0 && reg1)
5337	    {
5338	      mode = SPARC_INT_REG_P (i) ? DImode : DFmode;
5339	      regno = i;
5340	    }
5341	  else if (reg0)
5342	    {
5343	      mode = SPARC_INT_REG_P (i) ? SImode : SFmode;
5344	      regno = i;
5345	    }
5346	  else if (reg1)
5347	    {
5348	      mode = SPARC_INT_REG_P (i) ? SImode : SFmode;
5349	      regno = i + 1;
5350	      offset += 4;
5351	    }
5352	  else
5353	    {
5354	      if (action_false == SORR_ADVANCE)
5355		offset += 8;
5356	      continue;
5357	    }
5358
5359	  mem = gen_frame_mem (mode, plus_constant (Pmode, base, offset));
5360	  if (action_true == SORR_SAVE)
5361	    {
5362	      insn = emit_move_insn (mem, gen_rtx_REG (mode, regno));
5363	      RTX_FRAME_RELATED_P (insn) = 1;
5364	      if (mode == DImode)
5365		{
5366		  rtx set1, set2;
5367		  mem = gen_frame_mem (SImode, plus_constant (Pmode, base,
5368							      offset));
5369		  set1 = gen_rtx_SET (VOIDmode, mem,
5370				      gen_rtx_REG (SImode, regno));
5371		  RTX_FRAME_RELATED_P (set1) = 1;
5372		  mem
5373		    = gen_frame_mem (SImode, plus_constant (Pmode, base,
5374							    offset + 4));
5375		  set2 = gen_rtx_SET (VOIDmode, mem,
5376				      gen_rtx_REG (SImode, regno + 1));
5377		  RTX_FRAME_RELATED_P (set2) = 1;
5378		  add_reg_note (insn, REG_FRAME_RELATED_EXPR,
5379				gen_rtx_PARALLEL (VOIDmode,
5380						  gen_rtvec (2, set1, set2)));
5381		}
5382	    }
5383	  else  /* action_true == SORR_RESTORE */
5384	    emit_move_insn (gen_rtx_REG (mode, regno), mem);
5385
5386	  /* Always preserve double-word alignment.  */
5387	  offset = (offset + 8) & -8;
5388	}
5389    }
5390
5391  return offset;
5392}
5393
5394/* Emit code to adjust BASE to OFFSET.  Return the new base.  */
5395
5396static rtx
5397emit_adjust_base_to_offset (rtx base, int offset)
5398{
5399  /* ??? This might be optimized a little as %g1 might already have a
5400     value close enough that a single add insn will do.  */
5401  /* ??? Although, all of this is probably only a temporary fix because
5402     if %g1 can hold a function result, then sparc_expand_epilogue will
5403     lose (the result will be clobbered).  */
5404  rtx new_base = gen_rtx_REG (Pmode, 1);
5405  emit_move_insn (new_base, GEN_INT (offset));
5406  emit_insn (gen_rtx_SET (VOIDmode,
5407			  new_base, gen_rtx_PLUS (Pmode, base, new_base)));
5408  return new_base;
5409}
5410
5411/* Emit code to save/restore call-saved global and FP registers.  */
5412
5413static void
5414emit_save_or_restore_global_fp_regs (rtx base, int offset, sorr_act_t action)
5415{
5416  if (offset < -4096 || offset + sparc_n_global_fp_regs * 4 > 4095)
5417    {
5418      base = emit_adjust_base_to_offset  (base, offset);
5419      offset = 0;
5420    }
5421
5422  offset
5423    = emit_save_or_restore_regs (0, 8, base, offset, 0,
5424				 save_global_or_fp_reg_p, action, SORR_NONE);
5425  emit_save_or_restore_regs (32, TARGET_V9 ? 96 : 64, base, offset, 0,
5426			     save_global_or_fp_reg_p, action, SORR_NONE);
5427}
5428
5429/* Emit code to save/restore call-saved local and in registers.  */
5430
5431static void
5432emit_save_or_restore_local_in_regs (rtx base, int offset, sorr_act_t action)
5433{
5434  if (offset < -4096 || offset + 16 * UNITS_PER_WORD > 4095)
5435    {
5436      base = emit_adjust_base_to_offset  (base, offset);
5437      offset = 0;
5438    }
5439
5440  emit_save_or_restore_regs (16, 32, base, offset, sparc_leaf_function_p,
5441			     save_local_or_in_reg_p, action, SORR_ADVANCE);
5442}
5443
5444/* Emit a window_save insn.  */
5445
5446static rtx_insn *
5447emit_window_save (rtx increment)
5448{
5449  rtx_insn *insn = emit_insn (gen_window_save (increment));
5450  RTX_FRAME_RELATED_P (insn) = 1;
5451
5452  /* The incoming return address (%o7) is saved in %i7.  */
5453  add_reg_note (insn, REG_CFA_REGISTER,
5454		gen_rtx_SET (VOIDmode,
5455			     gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM),
5456			     gen_rtx_REG (Pmode,
5457					  INCOMING_RETURN_ADDR_REGNUM)));
5458
5459  /* The window save event.  */
5460  add_reg_note (insn, REG_CFA_WINDOW_SAVE, const0_rtx);
5461
5462  /* The CFA is %fp, the hard frame pointer.  */
5463  add_reg_note (insn, REG_CFA_DEF_CFA,
5464		plus_constant (Pmode, hard_frame_pointer_rtx,
5465			       INCOMING_FRAME_SP_OFFSET));
5466
5467  return insn;
5468}
5469
5470/* Generate an increment for the stack pointer.  */
5471
5472static rtx
5473gen_stack_pointer_inc (rtx increment)
5474{
5475  return gen_rtx_SET (VOIDmode,
5476		      stack_pointer_rtx,
5477		      gen_rtx_PLUS (Pmode,
5478				    stack_pointer_rtx,
5479				    increment));
5480}
5481
5482/* Expand the function prologue.  The prologue is responsible for reserving
5483   storage for the frame, saving the call-saved registers and loading the
5484   GOT register if needed.  */
5485
5486void
5487sparc_expand_prologue (void)
5488{
5489  HOST_WIDE_INT size;
5490  rtx_insn *insn;
5491
5492  /* Compute a snapshot of crtl->uses_only_leaf_regs.  Relying
5493     on the final value of the flag means deferring the prologue/epilogue
5494     expansion until just before the second scheduling pass, which is too
5495     late to emit multiple epilogues or return insns.
5496
5497     Of course we are making the assumption that the value of the flag
5498     will not change between now and its final value.  Of the three parts
5499     of the formula, only the last one can reasonably vary.  Let's take a
5500     closer look, after assuming that the first two ones are set to true
5501     (otherwise the last value is effectively silenced).
5502
5503     If only_leaf_regs_used returns false, the global predicate will also
5504     be false so the actual frame size calculated below will be positive.
5505     As a consequence, the save_register_window insn will be emitted in
5506     the instruction stream; now this insn explicitly references %fp
5507     which is not a leaf register so only_leaf_regs_used will always
5508     return false subsequently.
5509
5510     If only_leaf_regs_used returns true, we hope that the subsequent
5511     optimization passes won't cause non-leaf registers to pop up.  For
5512     example, the regrename pass has special provisions to not rename to
5513     non-leaf registers in a leaf function.  */
5514  sparc_leaf_function_p
5515    = optimize > 0 && crtl->is_leaf && only_leaf_regs_used ();
5516
5517  size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
5518
5519  if (flag_stack_usage_info)
5520    current_function_static_stack_size = size;
5521
5522  if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5523    {
5524      if (crtl->is_leaf && !cfun->calls_alloca)
5525	{
5526	  if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
5527	    sparc_emit_probe_stack_range (STACK_CHECK_PROTECT,
5528					  size - STACK_CHECK_PROTECT);
5529	}
5530      else if (size > 0)
5531	sparc_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
5532    }
5533
5534  if (size == 0)
5535    ; /* do nothing.  */
5536  else if (sparc_leaf_function_p)
5537    {
5538      rtx size_int_rtx = GEN_INT (-size);
5539
5540      if (size <= 4096)
5541	insn = emit_insn (gen_stack_pointer_inc (size_int_rtx));
5542      else if (size <= 8192)
5543	{
5544	  insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
5545	  RTX_FRAME_RELATED_P (insn) = 1;
5546
5547	  /* %sp is still the CFA register.  */
5548	  insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
5549	}
5550      else
5551	{
5552	  rtx size_rtx = gen_rtx_REG (Pmode, 1);
5553	  emit_move_insn (size_rtx, size_int_rtx);
5554	  insn = emit_insn (gen_stack_pointer_inc (size_rtx));
5555	  add_reg_note (insn, REG_FRAME_RELATED_EXPR,
5556			gen_stack_pointer_inc (size_int_rtx));
5557	}
5558
5559      RTX_FRAME_RELATED_P (insn) = 1;
5560    }
5561  else
5562    {
5563      rtx size_int_rtx = GEN_INT (-size);
5564
5565      if (size <= 4096)
5566	emit_window_save (size_int_rtx);
5567      else if (size <= 8192)
5568	{
5569	  emit_window_save (GEN_INT (-4096));
5570
5571	  /* %sp is not the CFA register anymore.  */
5572	  emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
5573
5574	  /* Make sure no %fp-based store is issued until after the frame is
5575	     established.  The offset between the frame pointer and the stack
5576	     pointer is calculated relative to the value of the stack pointer
5577	     at the end of the function prologue, and moving instructions that
5578	     access the stack via the frame pointer between the instructions
5579	     that decrement the stack pointer could result in accessing the
5580	     register window save area, which is volatile.  */
5581	  emit_insn (gen_frame_blockage ());
5582	}
5583      else
5584	{
5585	  rtx size_rtx = gen_rtx_REG (Pmode, 1);
5586	  emit_move_insn (size_rtx, size_int_rtx);
5587	  emit_window_save (size_rtx);
5588	}
5589    }
5590
5591  if (sparc_leaf_function_p)
5592    {
5593      sparc_frame_base_reg = stack_pointer_rtx;
5594      sparc_frame_base_offset = size + SPARC_STACK_BIAS;
5595    }
5596  else
5597    {
5598      sparc_frame_base_reg = hard_frame_pointer_rtx;
5599      sparc_frame_base_offset = SPARC_STACK_BIAS;
5600    }
5601
5602  if (sparc_n_global_fp_regs > 0)
5603    emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5604				         sparc_frame_base_offset
5605					   - sparc_apparent_frame_size,
5606					 SORR_SAVE);
5607
5608  /* Load the GOT register if needed.  */
5609  if (crtl->uses_pic_offset_table)
5610    load_got_register ();
5611
5612  /* Advertise that the data calculated just above are now valid.  */
5613  sparc_prologue_data_valid_p = true;
5614}
5615
5616/* Expand the function prologue.  The prologue is responsible for reserving
5617   storage for the frame, saving the call-saved registers and loading the
5618   GOT register if needed.  */
5619
5620void
5621sparc_flat_expand_prologue (void)
5622{
5623  HOST_WIDE_INT size;
5624  rtx_insn *insn;
5625
5626  sparc_leaf_function_p = optimize > 0 && crtl->is_leaf;
5627
5628  size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
5629
5630  if (flag_stack_usage_info)
5631    current_function_static_stack_size = size;
5632
5633  if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5634    {
5635      if (crtl->is_leaf && !cfun->calls_alloca)
5636	{
5637	  if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
5638	    sparc_emit_probe_stack_range (STACK_CHECK_PROTECT,
5639					  size - STACK_CHECK_PROTECT);
5640	}
5641      else if (size > 0)
5642	sparc_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
5643    }
5644
5645  if (sparc_save_local_in_regs_p)
5646    emit_save_or_restore_local_in_regs (stack_pointer_rtx, SPARC_STACK_BIAS,
5647					SORR_SAVE);
5648
5649  if (size == 0)
5650    ; /* do nothing.  */
5651  else
5652    {
5653      rtx size_int_rtx, size_rtx;
5654
5655      size_rtx = size_int_rtx = GEN_INT (-size);
5656
5657      /* We establish the frame (i.e. decrement the stack pointer) first, even
5658	 if we use a frame pointer, because we cannot clobber any call-saved
5659	 registers, including the frame pointer, if we haven't created a new
5660	 register save area, for the sake of compatibility with the ABI.  */
5661      if (size <= 4096)
5662	insn = emit_insn (gen_stack_pointer_inc (size_int_rtx));
5663      else if (size <= 8192 && !frame_pointer_needed)
5664	{
5665	  insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
5666	  RTX_FRAME_RELATED_P (insn) = 1;
5667	  insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
5668	}
5669      else
5670	{
5671	  size_rtx = gen_rtx_REG (Pmode, 1);
5672	  emit_move_insn (size_rtx, size_int_rtx);
5673	  insn = emit_insn (gen_stack_pointer_inc (size_rtx));
5674	  add_reg_note (insn, REG_CFA_ADJUST_CFA,
5675			gen_stack_pointer_inc (size_int_rtx));
5676	}
5677      RTX_FRAME_RELATED_P (insn) = 1;
5678
5679      /* Ensure nothing is scheduled until after the frame is established.  */
5680      emit_insn (gen_blockage ());
5681
5682      if (frame_pointer_needed)
5683	{
5684	  insn = emit_insn (gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
5685					 gen_rtx_MINUS (Pmode,
5686							stack_pointer_rtx,
5687							size_rtx)));
5688	  RTX_FRAME_RELATED_P (insn) = 1;
5689
5690	  add_reg_note (insn, REG_CFA_ADJUST_CFA,
5691			gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
5692				     plus_constant (Pmode, stack_pointer_rtx,
5693						    size)));
5694	}
5695
5696      if (return_addr_reg_needed_p (sparc_leaf_function_p))
5697	{
5698	  rtx o7 = gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM);
5699	  rtx i7 = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
5700
5701	  insn = emit_move_insn (i7, o7);
5702	  RTX_FRAME_RELATED_P (insn) = 1;
5703
5704	  add_reg_note (insn, REG_CFA_REGISTER,
5705			gen_rtx_SET (VOIDmode, i7, o7));
5706
5707	  /* Prevent this instruction from ever being considered dead,
5708	     even if this function has no epilogue.  */
5709	  emit_use (i7);
5710	}
5711    }
5712
5713  if (frame_pointer_needed)
5714    {
5715      sparc_frame_base_reg = hard_frame_pointer_rtx;
5716      sparc_frame_base_offset = SPARC_STACK_BIAS;
5717    }
5718  else
5719    {
5720      sparc_frame_base_reg = stack_pointer_rtx;
5721      sparc_frame_base_offset = size + SPARC_STACK_BIAS;
5722    }
5723
5724  if (sparc_n_global_fp_regs > 0)
5725    emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5726				         sparc_frame_base_offset
5727					   - sparc_apparent_frame_size,
5728					 SORR_SAVE);
5729
5730  /* Load the GOT register if needed.  */
5731  if (crtl->uses_pic_offset_table)
5732    load_got_register ();
5733
5734  /* Advertise that the data calculated just above are now valid.  */
5735  sparc_prologue_data_valid_p = true;
5736}
5737
5738/* This function generates the assembly code for function entry, which boils
5739   down to emitting the necessary .register directives.  */
5740
5741static void
5742sparc_asm_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5743{
5744  /* Check that the assumption we made in sparc_expand_prologue is valid.  */
5745  if (!TARGET_FLAT)
5746    gcc_assert (sparc_leaf_function_p == crtl->uses_only_leaf_regs);
5747
5748  sparc_output_scratch_registers (file);
5749}
5750
5751/* Expand the function epilogue, either normal or part of a sibcall.
5752   We emit all the instructions except the return or the call.  */
5753
5754void
5755sparc_expand_epilogue (bool for_eh)
5756{
5757  HOST_WIDE_INT size = sparc_frame_size;
5758
5759  if (sparc_n_global_fp_regs > 0)
5760    emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5761				         sparc_frame_base_offset
5762					   - sparc_apparent_frame_size,
5763					 SORR_RESTORE);
5764
5765  if (size == 0 || for_eh)
5766    ; /* do nothing.  */
5767  else if (sparc_leaf_function_p)
5768    {
5769      if (size <= 4096)
5770	emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
5771      else if (size <= 8192)
5772	{
5773	  emit_insn (gen_stack_pointer_inc (GEN_INT (4096)));
5774	  emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096)));
5775	}
5776      else
5777	{
5778	  rtx reg = gen_rtx_REG (Pmode, 1);
5779	  emit_move_insn (reg, GEN_INT (size));
5780	  emit_insn (gen_stack_pointer_inc (reg));
5781	}
5782    }
5783}
5784
5785/* Expand the function epilogue, either normal or part of a sibcall.
5786   We emit all the instructions except the return or the call.  */
5787
5788void
5789sparc_flat_expand_epilogue (bool for_eh)
5790{
5791  HOST_WIDE_INT size = sparc_frame_size;
5792
5793  if (sparc_n_global_fp_regs > 0)
5794    emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5795				         sparc_frame_base_offset
5796					   - sparc_apparent_frame_size,
5797					 SORR_RESTORE);
5798
5799  /* If we have a frame pointer, we'll need both to restore it before the
5800     frame is destroyed and use its current value in destroying the frame.
5801     Since we don't have an atomic way to do that in the flat window model,
5802     we save the current value into a temporary register (%g1).  */
5803  if (frame_pointer_needed && !for_eh)
5804    emit_move_insn (gen_rtx_REG (Pmode, 1), hard_frame_pointer_rtx);
5805
5806  if (return_addr_reg_needed_p (sparc_leaf_function_p))
5807    emit_move_insn (gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM),
5808		    gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM));
5809
5810  if (sparc_save_local_in_regs_p)
5811    emit_save_or_restore_local_in_regs (sparc_frame_base_reg,
5812					sparc_frame_base_offset,
5813					SORR_RESTORE);
5814
5815  if (size == 0 || for_eh)
5816    ; /* do nothing.  */
5817  else if (frame_pointer_needed)
5818    {
5819      /* Make sure the frame is destroyed after everything else is done.  */
5820      emit_insn (gen_blockage ());
5821
5822      emit_move_insn (stack_pointer_rtx, gen_rtx_REG (Pmode, 1));
5823    }
5824  else
5825    {
5826      /* Likewise.  */
5827      emit_insn (gen_blockage ());
5828
5829      if (size <= 4096)
5830	emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
5831      else if (size <= 8192)
5832	{
5833	  emit_insn (gen_stack_pointer_inc (GEN_INT (4096)));
5834	  emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096)));
5835	}
5836      else
5837	{
5838	  rtx reg = gen_rtx_REG (Pmode, 1);
5839	  emit_move_insn (reg, GEN_INT (size));
5840	  emit_insn (gen_stack_pointer_inc (reg));
5841	}
5842    }
5843}
5844
5845/* Return true if it is appropriate to emit `return' instructions in the
5846   body of a function.  */
5847
5848bool
5849sparc_can_use_return_insn_p (void)
5850{
5851  return sparc_prologue_data_valid_p
5852	 && sparc_n_global_fp_regs == 0
5853	 && TARGET_FLAT
5854	    ? (sparc_frame_size == 0 && !sparc_save_local_in_regs_p)
5855	    : (sparc_frame_size == 0 || !sparc_leaf_function_p);
5856}
5857
5858/* This function generates the assembly code for function exit.  */
5859
5860static void
5861sparc_asm_function_epilogue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5862{
5863  /* If the last two instructions of a function are "call foo; dslot;"
5864     the return address might point to the first instruction in the next
5865     function and we have to output a dummy nop for the sake of sane
5866     backtraces in such cases.  This is pointless for sibling calls since
5867     the return address is explicitly adjusted.  */
5868
5869  rtx insn, last_real_insn;
5870
5871  insn = get_last_insn ();
5872
5873  last_real_insn = prev_real_insn (insn);
5874  if (last_real_insn
5875      && NONJUMP_INSN_P (last_real_insn)
5876      && GET_CODE (PATTERN (last_real_insn)) == SEQUENCE)
5877    last_real_insn = XVECEXP (PATTERN (last_real_insn), 0, 0);
5878
5879  if (last_real_insn
5880      && CALL_P (last_real_insn)
5881      && !SIBLING_CALL_P (last_real_insn))
5882    fputs("\tnop\n", file);
5883
5884  sparc_output_deferred_case_vectors ();
5885}
5886
5887/* Output a 'restore' instruction.  */
5888
5889static void
5890output_restore (rtx pat)
5891{
5892  rtx operands[3];
5893
5894  if (! pat)
5895    {
5896      fputs ("\t restore\n", asm_out_file);
5897      return;
5898    }
5899
5900  gcc_assert (GET_CODE (pat) == SET);
5901
5902  operands[0] = SET_DEST (pat);
5903  pat = SET_SRC (pat);
5904
5905  switch (GET_CODE (pat))
5906    {
5907      case PLUS:
5908	operands[1] = XEXP (pat, 0);
5909	operands[2] = XEXP (pat, 1);
5910	output_asm_insn (" restore %r1, %2, %Y0", operands);
5911	break;
5912      case LO_SUM:
5913	operands[1] = XEXP (pat, 0);
5914	operands[2] = XEXP (pat, 1);
5915	output_asm_insn (" restore %r1, %%lo(%a2), %Y0", operands);
5916	break;
5917      case ASHIFT:
5918	operands[1] = XEXP (pat, 0);
5919	gcc_assert (XEXP (pat, 1) == const1_rtx);
5920	output_asm_insn (" restore %r1, %r1, %Y0", operands);
5921	break;
5922      default:
5923	operands[1] = pat;
5924	output_asm_insn (" restore %%g0, %1, %Y0", operands);
5925	break;
5926    }
5927}
5928
5929/* Output a return.  */
5930
5931const char *
5932output_return (rtx_insn *insn)
5933{
5934  if (crtl->calls_eh_return)
5935    {
5936      /* If the function uses __builtin_eh_return, the eh_return
5937	 machinery occupies the delay slot.  */
5938      gcc_assert (!final_sequence);
5939
5940      if (flag_delayed_branch)
5941	{
5942	  if (!TARGET_FLAT && TARGET_V9)
5943	    fputs ("\treturn\t%i7+8\n", asm_out_file);
5944	  else
5945	    {
5946	      if (!TARGET_FLAT)
5947		fputs ("\trestore\n", asm_out_file);
5948
5949	      fputs ("\tjmp\t%o7+8\n", asm_out_file);
5950	    }
5951
5952	  fputs ("\t add\t%sp, %g1, %sp\n", asm_out_file);
5953	}
5954      else
5955	{
5956	  if (!TARGET_FLAT)
5957	    fputs ("\trestore\n", asm_out_file);
5958
5959	  fputs ("\tadd\t%sp, %g1, %sp\n", asm_out_file);
5960	  fputs ("\tjmp\t%o7+8\n\t nop\n", asm_out_file);
5961	}
5962    }
5963  else if (sparc_leaf_function_p || TARGET_FLAT)
5964    {
5965      /* This is a leaf or flat function so we don't have to bother restoring
5966	 the register window, which frees us from dealing with the convoluted
5967	 semantics of restore/return.  We simply output the jump to the
5968	 return address and the insn in the delay slot (if any).  */
5969
5970      return "jmp\t%%o7+%)%#";
5971    }
5972  else
5973    {
5974      /* This is a regular function so we have to restore the register window.
5975	 We may have a pending insn for the delay slot, which will be either
5976	 combined with the 'restore' instruction or put in the delay slot of
5977	 the 'return' instruction.  */
5978
5979      if (final_sequence)
5980	{
5981	  rtx delay, pat;
5982
5983	  delay = NEXT_INSN (insn);
5984	  gcc_assert (delay);
5985
5986	  pat = PATTERN (delay);
5987
5988	  if (TARGET_V9 && ! epilogue_renumber (&pat, 1))
5989	    {
5990	      epilogue_renumber (&pat, 0);
5991	      return "return\t%%i7+%)%#";
5992	    }
5993	  else
5994	    {
5995	      output_asm_insn ("jmp\t%%i7+%)", NULL);
5996	      output_restore (pat);
5997	      PATTERN (delay) = gen_blockage ();
5998	      INSN_CODE (delay) = -1;
5999	    }
6000	}
6001      else
6002        {
6003	  /* The delay slot is empty.  */
6004	  if (TARGET_V9)
6005	    return "return\t%%i7+%)\n\t nop";
6006	  else if (flag_delayed_branch)
6007	    return "jmp\t%%i7+%)\n\t restore";
6008	  else
6009	    return "restore\n\tjmp\t%%o7+%)\n\t nop";
6010	}
6011    }
6012
6013  return "";
6014}
6015
6016/* Output a sibling call.  */
6017
6018const char *
6019output_sibcall (rtx_insn *insn, rtx call_operand)
6020{
6021  rtx operands[1];
6022
6023  gcc_assert (flag_delayed_branch);
6024
6025  operands[0] = call_operand;
6026
6027  if (sparc_leaf_function_p || TARGET_FLAT)
6028    {
6029      /* This is a leaf or flat function so we don't have to bother restoring
6030	 the register window.  We simply output the jump to the function and
6031	 the insn in the delay slot (if any).  */
6032
6033      gcc_assert (!(LEAF_SIBCALL_SLOT_RESERVED_P && final_sequence));
6034
6035      if (final_sequence)
6036	output_asm_insn ("sethi\t%%hi(%a0), %%g1\n\tjmp\t%%g1 + %%lo(%a0)%#",
6037			 operands);
6038      else
6039	/* Use or with rs2 %%g0 instead of mov, so that as/ld can optimize
6040	   it into branch if possible.  */
6041	output_asm_insn ("or\t%%o7, %%g0, %%g1\n\tcall\t%a0, 0\n\t or\t%%g1, %%g0, %%o7",
6042			 operands);
6043    }
6044  else
6045    {
6046      /* This is a regular function so we have to restore the register window.
6047	 We may have a pending insn for the delay slot, which will be combined
6048	 with the 'restore' instruction.  */
6049
6050      output_asm_insn ("call\t%a0, 0", operands);
6051
6052      if (final_sequence)
6053	{
6054	  rtx_insn *delay = NEXT_INSN (insn);
6055	  gcc_assert (delay);
6056
6057	  output_restore (PATTERN (delay));
6058
6059	  PATTERN (delay) = gen_blockage ();
6060	  INSN_CODE (delay) = -1;
6061	}
6062      else
6063	output_restore (NULL_RTX);
6064    }
6065
6066  return "";
6067}
6068
6069/* Functions for handling argument passing.
6070
6071   For 32-bit, the first 6 args are normally in registers and the rest are
6072   pushed.  Any arg that starts within the first 6 words is at least
6073   partially passed in a register unless its data type forbids.
6074
6075   For 64-bit, the argument registers are laid out as an array of 16 elements
6076   and arguments are added sequentially.  The first 6 int args and up to the
6077   first 16 fp args (depending on size) are passed in regs.
6078
6079   Slot    Stack   Integral   Float   Float in structure   Double   Long Double
6080   ----    -----   --------   -----   ------------------   ------   -----------
6081    15   [SP+248]              %f31       %f30,%f31         %d30
6082    14   [SP+240]              %f29       %f28,%f29         %d28       %q28
6083    13   [SP+232]              %f27       %f26,%f27         %d26
6084    12   [SP+224]              %f25       %f24,%f25         %d24       %q24
6085    11   [SP+216]              %f23       %f22,%f23         %d22
6086    10   [SP+208]              %f21       %f20,%f21         %d20       %q20
6087     9   [SP+200]              %f19       %f18,%f19         %d18
6088     8   [SP+192]              %f17       %f16,%f17         %d16       %q16
6089     7   [SP+184]              %f15       %f14,%f15         %d14
6090     6   [SP+176]              %f13       %f12,%f13         %d12       %q12
6091     5   [SP+168]     %o5      %f11       %f10,%f11         %d10
6092     4   [SP+160]     %o4       %f9        %f8,%f9           %d8        %q8
6093     3   [SP+152]     %o3       %f7        %f6,%f7           %d6
6094     2   [SP+144]     %o2       %f5        %f4,%f5           %d4        %q4
6095     1   [SP+136]     %o1       %f3        %f2,%f3           %d2
6096     0   [SP+128]     %o0       %f1        %f0,%f1           %d0        %q0
6097
6098   Here SP = %sp if -mno-stack-bias or %sp+stack_bias otherwise.
6099
6100   Integral arguments are always passed as 64-bit quantities appropriately
6101   extended.
6102
6103   Passing of floating point values is handled as follows.
6104   If a prototype is in scope:
6105     If the value is in a named argument (i.e. not a stdarg function or a
6106     value not part of the `...') then the value is passed in the appropriate
6107     fp reg.
6108     If the value is part of the `...' and is passed in one of the first 6
6109     slots then the value is passed in the appropriate int reg.
6110     If the value is part of the `...' and is not passed in one of the first 6
6111     slots then the value is passed in memory.
6112   If a prototype is not in scope:
6113     If the value is one of the first 6 arguments the value is passed in the
6114     appropriate integer reg and the appropriate fp reg.
6115     If the value is not one of the first 6 arguments the value is passed in
6116     the appropriate fp reg and in memory.
6117
6118
6119   Summary of the calling conventions implemented by GCC on the SPARC:
6120
6121   32-bit ABI:
6122                                size      argument     return value
6123
6124      small integer              <4       int. reg.      int. reg.
6125      word                        4       int. reg.      int. reg.
6126      double word                 8       int. reg.      int. reg.
6127
6128      _Complex small integer     <8       int. reg.      int. reg.
6129      _Complex word               8       int. reg.      int. reg.
6130      _Complex double word       16        memory        int. reg.
6131
6132      vector integer            <=8       int. reg.       FP reg.
6133      vector integer             >8        memory         memory
6134
6135      float                       4       int. reg.       FP reg.
6136      double                      8       int. reg.       FP reg.
6137      long double                16        memory         memory
6138
6139      _Complex float              8        memory         FP reg.
6140      _Complex double            16        memory         FP reg.
6141      _Complex long double       32        memory         FP reg.
6142
6143      vector float              any        memory         memory
6144
6145      aggregate                 any        memory         memory
6146
6147
6148
6149    64-bit ABI:
6150                                size      argument     return value
6151
6152      small integer              <8       int. reg.      int. reg.
6153      word                        8       int. reg.      int. reg.
6154      double word                16       int. reg.      int. reg.
6155
6156      _Complex small integer    <16       int. reg.      int. reg.
6157      _Complex word              16       int. reg.      int. reg.
6158      _Complex double word       32        memory        int. reg.
6159
6160      vector integer           <=16        FP reg.        FP reg.
6161      vector integer       16<s<=32        memory         FP reg.
6162      vector integer            >32        memory         memory
6163
6164      float                       4        FP reg.        FP reg.
6165      double                      8        FP reg.        FP reg.
6166      long double                16        FP reg.        FP reg.
6167
6168      _Complex float              8        FP reg.        FP reg.
6169      _Complex double            16        FP reg.        FP reg.
6170      _Complex long double       32        memory         FP reg.
6171
6172      vector float             <=16        FP reg.        FP reg.
6173      vector float         16<s<=32        memory         FP reg.
6174      vector float              >32        memory         memory
6175
6176      aggregate                <=16         reg.           reg.
6177      aggregate            16<s<=32        memory          reg.
6178      aggregate                 >32        memory         memory
6179
6180
6181
6182Note #1: complex floating-point types follow the extended SPARC ABIs as
6183implemented by the Sun compiler.
6184
6185Note #2: integral vector types follow the scalar floating-point types
6186conventions to match what is implemented by the Sun VIS SDK.
6187
6188Note #3: floating-point vector types follow the aggregate types
6189conventions.  */
6190
6191
6192/* Maximum number of int regs for args.  */
6193#define SPARC_INT_ARG_MAX 6
6194/* Maximum number of fp regs for args.  */
6195#define SPARC_FP_ARG_MAX 16
6196
6197#define ROUND_ADVANCE(SIZE) (((SIZE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
6198
6199/* Handle the INIT_CUMULATIVE_ARGS macro.
6200   Initialize a variable CUM of type CUMULATIVE_ARGS
6201   for a call to a function whose data type is FNTYPE.
6202   For a library call, FNTYPE is 0.  */
6203
6204void
6205init_cumulative_args (struct sparc_args *cum, tree fntype,
6206		      rtx libname ATTRIBUTE_UNUSED,
6207		      tree fndecl ATTRIBUTE_UNUSED)
6208{
6209  cum->words = 0;
6210  cum->prototype_p = fntype && prototype_p (fntype);
6211  cum->libcall_p = fntype == 0;
6212}
6213
6214/* Handle promotion of pointer and integer arguments.  */
6215
6216static machine_mode
6217sparc_promote_function_mode (const_tree type,
6218                             machine_mode mode,
6219                             int *punsignedp,
6220                             const_tree fntype ATTRIBUTE_UNUSED,
6221                             int for_return ATTRIBUTE_UNUSED)
6222{
6223  if (type != NULL_TREE && POINTER_TYPE_P (type))
6224    {
6225      *punsignedp = POINTERS_EXTEND_UNSIGNED;
6226      return Pmode;
6227    }
6228
6229  /* Integral arguments are passed as full words, as per the ABI.  */
6230  if (GET_MODE_CLASS (mode) == MODE_INT
6231      && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
6232    return word_mode;
6233
6234  return mode;
6235}
6236
6237/* Handle the TARGET_STRICT_ARGUMENT_NAMING target hook.  */
6238
6239static bool
6240sparc_strict_argument_naming (cumulative_args_t ca ATTRIBUTE_UNUSED)
6241{
6242  return TARGET_ARCH64 ? true : false;
6243}
6244
6245/* Scan the record type TYPE and return the following predicates:
6246    - INTREGS_P: the record contains at least one field or sub-field
6247      that is eligible for promotion in integer registers.
6248    - FP_REGS_P: the record contains at least one field or sub-field
6249      that is eligible for promotion in floating-point registers.
6250    - PACKED_P: the record contains at least one field that is packed.
6251
6252   Sub-fields are not taken into account for the PACKED_P predicate.  */
6253
6254static void
6255scan_record_type (const_tree type, int *intregs_p, int *fpregs_p,
6256		  int *packed_p)
6257{
6258  tree field;
6259
6260  for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6261    {
6262      if (TREE_CODE (field) == FIELD_DECL)
6263	{
6264	  if (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE)
6265	    scan_record_type (TREE_TYPE (field), intregs_p, fpregs_p, 0);
6266	  else if ((FLOAT_TYPE_P (TREE_TYPE (field))
6267		   || TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
6268		  && TARGET_FPU)
6269	    *fpregs_p = 1;
6270	  else
6271	    *intregs_p = 1;
6272
6273	  if (packed_p && DECL_PACKED (field))
6274	    *packed_p = 1;
6275	}
6276    }
6277}
6278
6279/* Compute the slot number to pass an argument in.
6280   Return the slot number or -1 if passing on the stack.
6281
6282   CUM is a variable of type CUMULATIVE_ARGS which gives info about
6283    the preceding args and about the function being called.
6284   MODE is the argument's machine mode.
6285   TYPE is the data type of the argument (as a tree).
6286    This is null for libcalls where that information may
6287    not be available.
6288   NAMED is nonzero if this argument is a named parameter
6289    (otherwise it is an extra parameter matching an ellipsis).
6290   INCOMING_P is zero for FUNCTION_ARG, nonzero for FUNCTION_INCOMING_ARG.
6291   *PREGNO records the register number to use if scalar type.
6292   *PPADDING records the amount of padding needed in words.  */
6293
6294static int
6295function_arg_slotno (const struct sparc_args *cum, machine_mode mode,
6296		     const_tree type, bool named, bool incoming_p,
6297		     int *pregno, int *ppadding)
6298{
6299  int regbase = (incoming_p
6300		 ? SPARC_INCOMING_INT_ARG_FIRST
6301		 : SPARC_OUTGOING_INT_ARG_FIRST);
6302  int slotno = cum->words;
6303  enum mode_class mclass;
6304  int regno;
6305
6306  *ppadding = 0;
6307
6308  if (type && TREE_ADDRESSABLE (type))
6309    return -1;
6310
6311  if (TARGET_ARCH32
6312      && mode == BLKmode
6313      && type
6314      && TYPE_ALIGN (type) % PARM_BOUNDARY != 0)
6315    return -1;
6316
6317  /* For SPARC64, objects requiring 16-byte alignment get it.  */
6318  if (TARGET_ARCH64
6319      && (type ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode)) >= 128
6320      && (slotno & 1) != 0)
6321    slotno++, *ppadding = 1;
6322
6323  mclass = GET_MODE_CLASS (mode);
6324  if (type && TREE_CODE (type) == VECTOR_TYPE)
6325    {
6326      /* Vector types deserve special treatment because they are
6327	 polymorphic wrt their mode, depending upon whether VIS
6328	 instructions are enabled.  */
6329      if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
6330	{
6331	  /* The SPARC port defines no floating-point vector modes.  */
6332	  gcc_assert (mode == BLKmode);
6333	}
6334      else
6335	{
6336	  /* Integral vector types should either have a vector
6337	     mode or an integral mode, because we are guaranteed
6338	     by pass_by_reference that their size is not greater
6339	     than 16 bytes and TImode is 16-byte wide.  */
6340	  gcc_assert (mode != BLKmode);
6341
6342	  /* Vector integers are handled like floats according to
6343	     the Sun VIS SDK.  */
6344	  mclass = MODE_FLOAT;
6345	}
6346    }
6347
6348  switch (mclass)
6349    {
6350    case MODE_FLOAT:
6351    case MODE_COMPLEX_FLOAT:
6352    case MODE_VECTOR_INT:
6353      if (TARGET_ARCH64 && TARGET_FPU && named)
6354	{
6355	  if (slotno >= SPARC_FP_ARG_MAX)
6356	    return -1;
6357	  regno = SPARC_FP_ARG_FIRST + slotno * 2;
6358	  /* Arguments filling only one single FP register are
6359	     right-justified in the outer double FP register.  */
6360	  if (GET_MODE_SIZE (mode) <= 4)
6361	    regno++;
6362	  break;
6363	}
6364      /* fallthrough */
6365
6366    case MODE_INT:
6367    case MODE_COMPLEX_INT:
6368      if (slotno >= SPARC_INT_ARG_MAX)
6369	return -1;
6370      regno = regbase + slotno;
6371      break;
6372
6373    case MODE_RANDOM:
6374      if (mode == VOIDmode)
6375	/* MODE is VOIDmode when generating the actual call.  */
6376	return -1;
6377
6378      gcc_assert (mode == BLKmode);
6379
6380      if (TARGET_ARCH32
6381	  || !type
6382	  || (TREE_CODE (type) != VECTOR_TYPE
6383	      && TREE_CODE (type) != RECORD_TYPE))
6384	{
6385	  if (slotno >= SPARC_INT_ARG_MAX)
6386	    return -1;
6387	  regno = regbase + slotno;
6388	}
6389      else  /* TARGET_ARCH64 && type */
6390	{
6391	  int intregs_p = 0, fpregs_p = 0, packed_p = 0;
6392
6393	  /* First see what kinds of registers we would need.  */
6394	  if (TREE_CODE (type) == VECTOR_TYPE)
6395	    fpregs_p = 1;
6396	  else
6397	    scan_record_type (type, &intregs_p, &fpregs_p, &packed_p);
6398
6399	  /* The ABI obviously doesn't specify how packed structures
6400	     are passed.  These are defined to be passed in int regs
6401	     if possible, otherwise memory.  */
6402	  if (packed_p || !named)
6403	    fpregs_p = 0, intregs_p = 1;
6404
6405	  /* If all arg slots are filled, then must pass on stack.  */
6406	  if (fpregs_p && slotno >= SPARC_FP_ARG_MAX)
6407	    return -1;
6408
6409	  /* If there are only int args and all int arg slots are filled,
6410	     then must pass on stack.  */
6411	  if (!fpregs_p && intregs_p && slotno >= SPARC_INT_ARG_MAX)
6412	    return -1;
6413
6414	  /* Note that even if all int arg slots are filled, fp members may
6415	     still be passed in regs if such regs are available.
6416	     *PREGNO isn't set because there may be more than one, it's up
6417	     to the caller to compute them.  */
6418	  return slotno;
6419	}
6420      break;
6421
6422    default :
6423      gcc_unreachable ();
6424    }
6425
6426  *pregno = regno;
6427  return slotno;
6428}
6429
6430/* Handle recursive register counting for structure field layout.  */
6431
6432struct function_arg_record_value_parms
6433{
6434  rtx ret;		/* return expression being built.  */
6435  int slotno;		/* slot number of the argument.  */
6436  int named;		/* whether the argument is named.  */
6437  int regbase;		/* regno of the base register.  */
6438  int stack;		/* 1 if part of the argument is on the stack.  */
6439  int intoffset;	/* offset of the first pending integer field.  */
6440  unsigned int nregs;	/* number of words passed in registers.  */
6441};
6442
6443static void function_arg_record_value_3
6444 (HOST_WIDE_INT, struct function_arg_record_value_parms *);
6445static void function_arg_record_value_2
6446 (const_tree, HOST_WIDE_INT, struct function_arg_record_value_parms *, bool);
6447static void function_arg_record_value_1
6448 (const_tree, HOST_WIDE_INT, struct function_arg_record_value_parms *, bool);
6449static rtx function_arg_record_value (const_tree, machine_mode, int, int, int);
6450static rtx function_arg_union_value (int, machine_mode, int, int);
6451
6452/* A subroutine of function_arg_record_value.  Traverse the structure
6453   recursively and determine how many registers will be required.  */
6454
6455static void
6456function_arg_record_value_1 (const_tree type, HOST_WIDE_INT startbitpos,
6457			     struct function_arg_record_value_parms *parms,
6458			     bool packed_p)
6459{
6460  tree field;
6461
6462  /* We need to compute how many registers are needed so we can
6463     allocate the PARALLEL but before we can do that we need to know
6464     whether there are any packed fields.  The ABI obviously doesn't
6465     specify how structures are passed in this case, so they are
6466     defined to be passed in int regs if possible, otherwise memory,
6467     regardless of whether there are fp values present.  */
6468
6469  if (! packed_p)
6470    for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
6471      {
6472	if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field))
6473	  {
6474	    packed_p = true;
6475	    break;
6476	  }
6477      }
6478
6479  /* Compute how many registers we need.  */
6480  for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6481    {
6482      if (TREE_CODE (field) == FIELD_DECL)
6483	{
6484	  HOST_WIDE_INT bitpos = startbitpos;
6485
6486	  if (DECL_SIZE (field) != 0)
6487	    {
6488	      if (integer_zerop (DECL_SIZE (field)))
6489		continue;
6490
6491	      if (tree_fits_uhwi_p (bit_position (field)))
6492		bitpos += int_bit_position (field);
6493	    }
6494
6495	  /* ??? FIXME: else assume zero offset.  */
6496
6497	  if (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE)
6498	    function_arg_record_value_1 (TREE_TYPE (field),
6499	    				 bitpos,
6500					 parms,
6501					 packed_p);
6502	  else if ((FLOAT_TYPE_P (TREE_TYPE (field))
6503		    || TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
6504		   && TARGET_FPU
6505		   && parms->named
6506		   && ! packed_p)
6507	    {
6508	      if (parms->intoffset != -1)
6509		{
6510		  unsigned int startbit, endbit;
6511		  int intslots, this_slotno;
6512
6513		  startbit = parms->intoffset & -BITS_PER_WORD;
6514		  endbit   = (bitpos + BITS_PER_WORD - 1) & -BITS_PER_WORD;
6515
6516		  intslots = (endbit - startbit) / BITS_PER_WORD;
6517		  this_slotno = parms->slotno + parms->intoffset
6518		    / BITS_PER_WORD;
6519
6520		  if (intslots > 0 && intslots > SPARC_INT_ARG_MAX - this_slotno)
6521		    {
6522		      intslots = MAX (0, SPARC_INT_ARG_MAX - this_slotno);
6523		      /* We need to pass this field on the stack.  */
6524		      parms->stack = 1;
6525		    }
6526
6527		  parms->nregs += intslots;
6528		  parms->intoffset = -1;
6529		}
6530
6531	      /* There's no need to check this_slotno < SPARC_FP_ARG MAX.
6532		 If it wasn't true we wouldn't be here.  */
6533	      if (TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE
6534		  && DECL_MODE (field) == BLKmode)
6535		parms->nregs += TYPE_VECTOR_SUBPARTS (TREE_TYPE (field));
6536	      else if (TREE_CODE (TREE_TYPE (field)) == COMPLEX_TYPE)
6537		parms->nregs += 2;
6538	      else
6539		parms->nregs += 1;
6540	    }
6541	  else
6542	    {
6543	      if (parms->intoffset == -1)
6544		parms->intoffset = bitpos;
6545	    }
6546	}
6547    }
6548}
6549
6550/* A subroutine of function_arg_record_value.  Assign the bits of the
6551   structure between parms->intoffset and bitpos to integer registers.  */
6552
6553static void
6554function_arg_record_value_3 (HOST_WIDE_INT bitpos,
6555			     struct function_arg_record_value_parms *parms)
6556{
6557  machine_mode mode;
6558  unsigned int regno;
6559  unsigned int startbit, endbit;
6560  int this_slotno, intslots, intoffset;
6561  rtx reg;
6562
6563  if (parms->intoffset == -1)
6564    return;
6565
6566  intoffset = parms->intoffset;
6567  parms->intoffset = -1;
6568
6569  startbit = intoffset & -BITS_PER_WORD;
6570  endbit = (bitpos + BITS_PER_WORD - 1) & -BITS_PER_WORD;
6571  intslots = (endbit - startbit) / BITS_PER_WORD;
6572  this_slotno = parms->slotno + intoffset / BITS_PER_WORD;
6573
6574  intslots = MIN (intslots, SPARC_INT_ARG_MAX - this_slotno);
6575  if (intslots <= 0)
6576    return;
6577
6578  /* If this is the trailing part of a word, only load that much into
6579     the register.  Otherwise load the whole register.  Note that in
6580     the latter case we may pick up unwanted bits.  It's not a problem
6581     at the moment but may wish to revisit.  */
6582
6583  if (intoffset % BITS_PER_WORD != 0)
6584    mode = smallest_mode_for_size (BITS_PER_WORD - intoffset % BITS_PER_WORD,
6585			  	   MODE_INT);
6586  else
6587    mode = word_mode;
6588
6589  intoffset /= BITS_PER_UNIT;
6590  do
6591    {
6592      regno = parms->regbase + this_slotno;
6593      reg = gen_rtx_REG (mode, regno);
6594      XVECEXP (parms->ret, 0, parms->stack + parms->nregs)
6595	= gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
6596
6597      this_slotno += 1;
6598      intoffset = (intoffset | (UNITS_PER_WORD-1)) + 1;
6599      mode = word_mode;
6600      parms->nregs += 1;
6601      intslots -= 1;
6602    }
6603  while (intslots > 0);
6604}
6605
6606/* A subroutine of function_arg_record_value.  Traverse the structure
6607   recursively and assign bits to floating point registers.  Track which
6608   bits in between need integer registers; invoke function_arg_record_value_3
6609   to make that happen.  */
6610
6611static void
6612function_arg_record_value_2 (const_tree type, HOST_WIDE_INT startbitpos,
6613			     struct function_arg_record_value_parms *parms,
6614			     bool packed_p)
6615{
6616  tree field;
6617
6618  if (! packed_p)
6619    for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6620      {
6621	if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field))
6622	  {
6623	    packed_p = true;
6624	    break;
6625	  }
6626      }
6627
6628  for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6629    {
6630      if (TREE_CODE (field) == FIELD_DECL)
6631	{
6632	  HOST_WIDE_INT bitpos = startbitpos;
6633
6634	  if (DECL_SIZE (field) != 0)
6635	    {
6636	      if (integer_zerop (DECL_SIZE (field)))
6637		continue;
6638
6639	      if (tree_fits_uhwi_p (bit_position (field)))
6640		bitpos += int_bit_position (field);
6641	    }
6642
6643	  /* ??? FIXME: else assume zero offset.  */
6644
6645	  if (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE)
6646	    function_arg_record_value_2 (TREE_TYPE (field),
6647	    				 bitpos,
6648					 parms,
6649					 packed_p);
6650	  else if ((FLOAT_TYPE_P (TREE_TYPE (field))
6651		    || TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
6652		   && TARGET_FPU
6653		   && parms->named
6654		   && ! packed_p)
6655	    {
6656	      int this_slotno = parms->slotno + bitpos / BITS_PER_WORD;
6657	      int regno, nregs, pos;
6658	      machine_mode mode = DECL_MODE (field);
6659	      rtx reg;
6660
6661	      function_arg_record_value_3 (bitpos, parms);
6662
6663	      if (TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE
6664		  && mode == BLKmode)
6665	        {
6666		  mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
6667		  nregs = TYPE_VECTOR_SUBPARTS (TREE_TYPE (field));
6668		}
6669	      else if (TREE_CODE (TREE_TYPE (field)) == COMPLEX_TYPE)
6670	        {
6671		  mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
6672		  nregs = 2;
6673		}
6674	      else
6675	        nregs = 1;
6676
6677	      regno = SPARC_FP_ARG_FIRST + this_slotno * 2;
6678	      if (GET_MODE_SIZE (mode) <= 4 && (bitpos & 32) != 0)
6679		regno++;
6680	      reg = gen_rtx_REG (mode, regno);
6681	      pos = bitpos / BITS_PER_UNIT;
6682	      XVECEXP (parms->ret, 0, parms->stack + parms->nregs)
6683		= gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (pos));
6684	      parms->nregs += 1;
6685	      while (--nregs > 0)
6686		{
6687		  regno += GET_MODE_SIZE (mode) / 4;
6688	  	  reg = gen_rtx_REG (mode, regno);
6689		  pos += GET_MODE_SIZE (mode);
6690		  XVECEXP (parms->ret, 0, parms->stack + parms->nregs)
6691		    = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (pos));
6692		  parms->nregs += 1;
6693		}
6694	    }
6695	  else
6696	    {
6697	      if (parms->intoffset == -1)
6698		parms->intoffset = bitpos;
6699	    }
6700	}
6701    }
6702}
6703
6704/* Used by function_arg and sparc_function_value_1 to implement the complex
6705   conventions of the 64-bit ABI for passing and returning structures.
6706   Return an expression valid as a return value for the FUNCTION_ARG
6707   and TARGET_FUNCTION_VALUE.
6708
6709   TYPE is the data type of the argument (as a tree).
6710    This is null for libcalls where that information may
6711    not be available.
6712   MODE is the argument's machine mode.
6713   SLOTNO is the index number of the argument's slot in the parameter array.
6714   NAMED is nonzero if this argument is a named parameter
6715    (otherwise it is an extra parameter matching an ellipsis).
6716   REGBASE is the regno of the base register for the parameter array.  */
6717
6718static rtx
6719function_arg_record_value (const_tree type, machine_mode mode,
6720			   int slotno, int named, int regbase)
6721{
6722  HOST_WIDE_INT typesize = int_size_in_bytes (type);
6723  struct function_arg_record_value_parms parms;
6724  unsigned int nregs;
6725
6726  parms.ret = NULL_RTX;
6727  parms.slotno = slotno;
6728  parms.named = named;
6729  parms.regbase = regbase;
6730  parms.stack = 0;
6731
6732  /* Compute how many registers we need.  */
6733  parms.nregs = 0;
6734  parms.intoffset = 0;
6735  function_arg_record_value_1 (type, 0, &parms, false);
6736
6737  /* Take into account pending integer fields.  */
6738  if (parms.intoffset != -1)
6739    {
6740      unsigned int startbit, endbit;
6741      int intslots, this_slotno;
6742
6743      startbit = parms.intoffset & -BITS_PER_WORD;
6744      endbit = (typesize*BITS_PER_UNIT + BITS_PER_WORD - 1) & -BITS_PER_WORD;
6745      intslots = (endbit - startbit) / BITS_PER_WORD;
6746      this_slotno = slotno + parms.intoffset / BITS_PER_WORD;
6747
6748      if (intslots > 0 && intslots > SPARC_INT_ARG_MAX - this_slotno)
6749        {
6750	  intslots = MAX (0, SPARC_INT_ARG_MAX - this_slotno);
6751	  /* We need to pass this field on the stack.  */
6752	  parms.stack = 1;
6753        }
6754
6755      parms.nregs += intslots;
6756    }
6757  nregs = parms.nregs;
6758
6759  /* Allocate the vector and handle some annoying special cases.  */
6760  if (nregs == 0)
6761    {
6762      /* ??? Empty structure has no value?  Duh?  */
6763      if (typesize <= 0)
6764	{
6765	  /* Though there's nothing really to store, return a word register
6766	     anyway so the rest of gcc doesn't go nuts.  Returning a PARALLEL
6767	     leads to breakage due to the fact that there are zero bytes to
6768	     load.  */
6769	  return gen_rtx_REG (mode, regbase);
6770	}
6771      else
6772	{
6773	  /* ??? C++ has structures with no fields, and yet a size.  Give up
6774	     for now and pass everything back in integer registers.  */
6775	  nregs = (typesize + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6776	}
6777      if (nregs + slotno > SPARC_INT_ARG_MAX)
6778	nregs = SPARC_INT_ARG_MAX - slotno;
6779    }
6780  gcc_assert (nregs != 0);
6781
6782  parms.ret = gen_rtx_PARALLEL (mode, rtvec_alloc (parms.stack + nregs));
6783
6784  /* If at least one field must be passed on the stack, generate
6785     (parallel [(expr_list (nil) ...) ...]) so that all fields will
6786     also be passed on the stack.  We can't do much better because the
6787     semantics of TARGET_ARG_PARTIAL_BYTES doesn't handle the case
6788     of structures for which the fields passed exclusively in registers
6789     are not at the beginning of the structure.  */
6790  if (parms.stack)
6791    XVECEXP (parms.ret, 0, 0)
6792      = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
6793
6794  /* Fill in the entries.  */
6795  parms.nregs = 0;
6796  parms.intoffset = 0;
6797  function_arg_record_value_2 (type, 0, &parms, false);
6798  function_arg_record_value_3 (typesize * BITS_PER_UNIT, &parms);
6799
6800  gcc_assert (parms.nregs == nregs);
6801
6802  return parms.ret;
6803}
6804
6805/* Used by function_arg and sparc_function_value_1 to implement the conventions
6806   of the 64-bit ABI for passing and returning unions.
6807   Return an expression valid as a return value for the FUNCTION_ARG
6808   and TARGET_FUNCTION_VALUE.
6809
6810   SIZE is the size in bytes of the union.
6811   MODE is the argument's machine mode.
6812   REGNO is the hard register the union will be passed in.  */
6813
6814static rtx
6815function_arg_union_value (int size, machine_mode mode, int slotno,
6816			  int regno)
6817{
6818  int nwords = ROUND_ADVANCE (size), i;
6819  rtx regs;
6820
6821  /* See comment in previous function for empty structures.  */
6822  if (nwords == 0)
6823    return gen_rtx_REG (mode, regno);
6824
6825  if (slotno == SPARC_INT_ARG_MAX - 1)
6826    nwords = 1;
6827
6828  regs = gen_rtx_PARALLEL (mode, rtvec_alloc (nwords));
6829
6830  for (i = 0; i < nwords; i++)
6831    {
6832      /* Unions are passed left-justified.  */
6833      XVECEXP (regs, 0, i)
6834	= gen_rtx_EXPR_LIST (VOIDmode,
6835			     gen_rtx_REG (word_mode, regno),
6836			     GEN_INT (UNITS_PER_WORD * i));
6837      regno++;
6838    }
6839
6840  return regs;
6841}
6842
6843/* Used by function_arg and sparc_function_value_1 to implement the conventions
6844   for passing and returning BLKmode vectors.
6845   Return an expression valid as a return value for the FUNCTION_ARG
6846   and TARGET_FUNCTION_VALUE.
6847
6848   SIZE is the size in bytes of the vector.
6849   REGNO is the FP hard register the vector will be passed in.  */
6850
6851static rtx
6852function_arg_vector_value (int size, int regno)
6853{
6854  const int nregs = MAX (1, size / 8);
6855  rtx regs = gen_rtx_PARALLEL (BLKmode, rtvec_alloc (nregs));
6856
6857  if (size < 8)
6858    XVECEXP (regs, 0, 0)
6859      = gen_rtx_EXPR_LIST (VOIDmode,
6860			   gen_rtx_REG (SImode, regno),
6861			   const0_rtx);
6862  else
6863    for (int i = 0; i < nregs; i++)
6864      XVECEXP (regs, 0, i)
6865	= gen_rtx_EXPR_LIST (VOIDmode,
6866			     gen_rtx_REG (DImode, regno + 2*i),
6867			     GEN_INT (i*8));
6868
6869  return regs;
6870}
6871
6872/* Determine where to put an argument to a function.
6873   Value is zero to push the argument on the stack,
6874   or a hard register in which to store the argument.
6875
6876   CUM is a variable of type CUMULATIVE_ARGS which gives info about
6877    the preceding args and about the function being called.
6878   MODE is the argument's machine mode.
6879   TYPE is the data type of the argument (as a tree).
6880    This is null for libcalls where that information may
6881    not be available.
6882   NAMED is true if this argument is a named parameter
6883    (otherwise it is an extra parameter matching an ellipsis).
6884   INCOMING_P is false for TARGET_FUNCTION_ARG, true for
6885    TARGET_FUNCTION_INCOMING_ARG.  */
6886
6887static rtx
6888sparc_function_arg_1 (cumulative_args_t cum_v, machine_mode mode,
6889		      const_tree type, bool named, bool incoming_p)
6890{
6891  const CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
6892
6893  int regbase = (incoming_p
6894		 ? SPARC_INCOMING_INT_ARG_FIRST
6895		 : SPARC_OUTGOING_INT_ARG_FIRST);
6896  int slotno, regno, padding;
6897  enum mode_class mclass = GET_MODE_CLASS (mode);
6898
6899  slotno = function_arg_slotno (cum, mode, type, named, incoming_p,
6900				&regno, &padding);
6901  if (slotno == -1)
6902    return 0;
6903
6904  /* Vector types deserve special treatment because they are polymorphic wrt
6905     their mode, depending upon whether VIS instructions are enabled.  */
6906  if (type && TREE_CODE (type) == VECTOR_TYPE)
6907    {
6908      HOST_WIDE_INT size = int_size_in_bytes (type);
6909      gcc_assert ((TARGET_ARCH32 && size <= 8)
6910		  || (TARGET_ARCH64 && size <= 16));
6911
6912      if (mode == BLKmode)
6913	return function_arg_vector_value (size, SPARC_FP_ARG_FIRST + 2*slotno);
6914
6915      mclass = MODE_FLOAT;
6916    }
6917
6918  if (TARGET_ARCH32)
6919    return gen_rtx_REG (mode, regno);
6920
6921  /* Structures up to 16 bytes in size are passed in arg slots on the stack
6922     and are promoted to registers if possible.  */
6923  if (type && TREE_CODE (type) == RECORD_TYPE)
6924    {
6925      HOST_WIDE_INT size = int_size_in_bytes (type);
6926      gcc_assert (size <= 16);
6927
6928      return function_arg_record_value (type, mode, slotno, named, regbase);
6929    }
6930
6931  /* Unions up to 16 bytes in size are passed in integer registers.  */
6932  else if (type && TREE_CODE (type) == UNION_TYPE)
6933    {
6934      HOST_WIDE_INT size = int_size_in_bytes (type);
6935      gcc_assert (size <= 16);
6936
6937      return function_arg_union_value (size, mode, slotno, regno);
6938    }
6939
6940  /* v9 fp args in reg slots beyond the int reg slots get passed in regs
6941     but also have the slot allocated for them.
6942     If no prototype is in scope fp values in register slots get passed
6943     in two places, either fp regs and int regs or fp regs and memory.  */
6944  else if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
6945	   && SPARC_FP_REG_P (regno))
6946    {
6947      rtx reg = gen_rtx_REG (mode, regno);
6948      if (cum->prototype_p || cum->libcall_p)
6949	{
6950	  /* "* 2" because fp reg numbers are recorded in 4 byte
6951	     quantities.  */
6952#if 0
6953	  /* ??? This will cause the value to be passed in the fp reg and
6954	     in the stack.  When a prototype exists we want to pass the
6955	     value in the reg but reserve space on the stack.  That's an
6956	     optimization, and is deferred [for a bit].  */
6957	  if ((regno - SPARC_FP_ARG_FIRST) >= SPARC_INT_ARG_MAX * 2)
6958	    return gen_rtx_PARALLEL (mode,
6959			    gen_rtvec (2,
6960				       gen_rtx_EXPR_LIST (VOIDmode,
6961						NULL_RTX, const0_rtx),
6962				       gen_rtx_EXPR_LIST (VOIDmode,
6963						reg, const0_rtx)));
6964	  else
6965#else
6966	  /* ??? It seems that passing back a register even when past
6967	     the area declared by REG_PARM_STACK_SPACE will allocate
6968	     space appropriately, and will not copy the data onto the
6969	     stack, exactly as we desire.
6970
6971	     This is due to locate_and_pad_parm being called in
6972	     expand_call whenever reg_parm_stack_space > 0, which
6973	     while beneficial to our example here, would seem to be
6974	     in error from what had been intended.  Ho hum...  -- r~ */
6975#endif
6976	    return reg;
6977	}
6978      else
6979	{
6980	  rtx v0, v1;
6981
6982	  if ((regno - SPARC_FP_ARG_FIRST) < SPARC_INT_ARG_MAX * 2)
6983	    {
6984	      int intreg;
6985
6986	      /* On incoming, we don't need to know that the value
6987		 is passed in %f0 and %i0, and it confuses other parts
6988		 causing needless spillage even on the simplest cases.  */
6989	      if (incoming_p)
6990		return reg;
6991
6992	      intreg = (SPARC_OUTGOING_INT_ARG_FIRST
6993			+ (regno - SPARC_FP_ARG_FIRST) / 2);
6994
6995	      v0 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
6996	      v1 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (mode, intreg),
6997				      const0_rtx);
6998	      return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
6999	    }
7000	  else
7001	    {
7002	      v0 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
7003	      v1 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
7004	      return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
7005	    }
7006	}
7007    }
7008
7009  /* All other aggregate types are passed in an integer register in a mode
7010     corresponding to the size of the type.  */
7011  else if (type && AGGREGATE_TYPE_P (type))
7012    {
7013      HOST_WIDE_INT size = int_size_in_bytes (type);
7014      gcc_assert (size <= 16);
7015
7016      mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
7017    }
7018
7019  return gen_rtx_REG (mode, regno);
7020}
7021
7022/* Handle the TARGET_FUNCTION_ARG target hook.  */
7023
7024static rtx
7025sparc_function_arg (cumulative_args_t cum, machine_mode mode,
7026		    const_tree type, bool named)
7027{
7028  return sparc_function_arg_1 (cum, mode, type, named, false);
7029}
7030
7031/* Handle the TARGET_FUNCTION_INCOMING_ARG target hook.  */
7032
7033static rtx
7034sparc_function_incoming_arg (cumulative_args_t cum, machine_mode mode,
7035			     const_tree type, bool named)
7036{
7037  return sparc_function_arg_1 (cum, mode, type, named, true);
7038}
7039
7040/* For sparc64, objects requiring 16 byte alignment are passed that way.  */
7041
7042static unsigned int
7043sparc_function_arg_boundary (machine_mode mode, const_tree type)
7044{
7045  return ((TARGET_ARCH64
7046	   && (GET_MODE_ALIGNMENT (mode) == 128
7047	       || (type && TYPE_ALIGN (type) == 128)))
7048	  ? 128
7049	  : PARM_BOUNDARY);
7050}
7051
7052/* For an arg passed partly in registers and partly in memory,
7053   this is the number of bytes of registers used.
7054   For args passed entirely in registers or entirely in memory, zero.
7055
7056   Any arg that starts in the first 6 regs but won't entirely fit in them
7057   needs partial registers on v8.  On v9, structures with integer
7058   values in arg slots 5,6 will be passed in %o5 and SP+176, and complex fp
7059   values that begin in the last fp reg [where "last fp reg" varies with the
7060   mode] will be split between that reg and memory.  */
7061
7062static int
7063sparc_arg_partial_bytes (cumulative_args_t cum, machine_mode mode,
7064			 tree type, bool named)
7065{
7066  int slotno, regno, padding;
7067
7068  /* We pass false for incoming_p here, it doesn't matter.  */
7069  slotno = function_arg_slotno (get_cumulative_args (cum), mode, type, named,
7070				false, &regno, &padding);
7071
7072  if (slotno == -1)
7073    return 0;
7074
7075  if (TARGET_ARCH32)
7076    {
7077      if ((slotno + (mode == BLKmode
7078		     ? ROUND_ADVANCE (int_size_in_bytes (type))
7079		     : ROUND_ADVANCE (GET_MODE_SIZE (mode))))
7080	  > SPARC_INT_ARG_MAX)
7081	return (SPARC_INT_ARG_MAX - slotno) * UNITS_PER_WORD;
7082    }
7083  else
7084    {
7085      /* We are guaranteed by pass_by_reference that the size of the
7086	 argument is not greater than 16 bytes, so we only need to return
7087	 one word if the argument is partially passed in registers.  */
7088
7089      if (type && AGGREGATE_TYPE_P (type))
7090	{
7091	  int size = int_size_in_bytes (type);
7092
7093	  if (size > UNITS_PER_WORD
7094	      && slotno == SPARC_INT_ARG_MAX - 1)
7095	    return UNITS_PER_WORD;
7096	}
7097      else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_INT
7098	       || (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
7099		   && ! (TARGET_FPU && named)))
7100	{
7101	  /* The complex types are passed as packed types.  */
7102	  if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
7103	      && slotno == SPARC_INT_ARG_MAX - 1)
7104	    return UNITS_PER_WORD;
7105	}
7106      else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
7107	{
7108	  if ((slotno + GET_MODE_SIZE (mode) / UNITS_PER_WORD)
7109	      > SPARC_FP_ARG_MAX)
7110	    return UNITS_PER_WORD;
7111	}
7112    }
7113
7114  return 0;
7115}
7116
7117/* Handle the TARGET_PASS_BY_REFERENCE target hook.
7118   Specify whether to pass the argument by reference.  */
7119
7120static bool
7121sparc_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
7122			 machine_mode mode, const_tree type,
7123			 bool named ATTRIBUTE_UNUSED)
7124{
7125  if (TARGET_ARCH32)
7126    /* Original SPARC 32-bit ABI says that structures and unions,
7127       and quad-precision floats are passed by reference.  For Pascal,
7128       also pass arrays by reference.  All other base types are passed
7129       in registers.
7130
7131       Extended ABI (as implemented by the Sun compiler) says that all
7132       complex floats are passed by reference.  Pass complex integers
7133       in registers up to 8 bytes.  More generally, enforce the 2-word
7134       cap for passing arguments in registers.
7135
7136       Vector ABI (as implemented by the Sun VIS SDK) says that vector
7137       integers are passed like floats of the same size, that is in
7138       registers up to 8 bytes.  Pass all vector floats by reference
7139       like structure and unions.  */
7140    return ((type && (AGGREGATE_TYPE_P (type) || VECTOR_FLOAT_TYPE_P (type)))
7141	    || mode == SCmode
7142	    /* Catch CDImode, TFmode, DCmode and TCmode.  */
7143	    || GET_MODE_SIZE (mode) > 8
7144	    || (type
7145		&& TREE_CODE (type) == VECTOR_TYPE
7146		&& (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
7147  else
7148    /* Original SPARC 64-bit ABI says that structures and unions
7149       smaller than 16 bytes are passed in registers, as well as
7150       all other base types.
7151
7152       Extended ABI (as implemented by the Sun compiler) says that
7153       complex floats are passed in registers up to 16 bytes.  Pass
7154       all complex integers in registers up to 16 bytes.  More generally,
7155       enforce the 2-word cap for passing arguments in registers.
7156
7157       Vector ABI (as implemented by the Sun VIS SDK) says that vector
7158       integers are passed like floats of the same size, that is in
7159       registers (up to 16 bytes).  Pass all vector floats like structure
7160       and unions.  */
7161    return ((type
7162	     && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == VECTOR_TYPE)
7163	     && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 16)
7164	    /* Catch CTImode and TCmode.  */
7165	    || GET_MODE_SIZE (mode) > 16);
7166}
7167
7168/* Handle the TARGET_FUNCTION_ARG_ADVANCE hook.
7169   Update the data in CUM to advance over an argument
7170   of mode MODE and data type TYPE.
7171   TYPE is null for libcalls where that information may not be available.  */
7172
7173static void
7174sparc_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
7175			    const_tree type, bool named)
7176{
7177  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7178  int regno, padding;
7179
7180  /* We pass false for incoming_p here, it doesn't matter.  */
7181  function_arg_slotno (cum, mode, type, named, false, &regno, &padding);
7182
7183  /* If argument requires leading padding, add it.  */
7184  cum->words += padding;
7185
7186  if (TARGET_ARCH32)
7187    {
7188      cum->words += (mode != BLKmode
7189		     ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
7190		     : ROUND_ADVANCE (int_size_in_bytes (type)));
7191    }
7192  else
7193    {
7194      if (type && AGGREGATE_TYPE_P (type))
7195	{
7196	  int size = int_size_in_bytes (type);
7197
7198	  if (size <= 8)
7199	    ++cum->words;
7200	  else if (size <= 16)
7201	    cum->words += 2;
7202	  else /* passed by reference */
7203	    ++cum->words;
7204	}
7205      else
7206	{
7207	  cum->words += (mode != BLKmode
7208			 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
7209			 : ROUND_ADVANCE (int_size_in_bytes (type)));
7210	}
7211    }
7212}
7213
7214/* Handle the FUNCTION_ARG_PADDING macro.
7215   For the 64 bit ABI structs are always stored left shifted in their
7216   argument slot.  */
7217
7218enum direction
7219function_arg_padding (machine_mode mode, const_tree type)
7220{
7221  if (TARGET_ARCH64 && type != 0 && AGGREGATE_TYPE_P (type))
7222    return upward;
7223
7224  /* Fall back to the default.  */
7225  return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
7226}
7227
7228/* Handle the TARGET_RETURN_IN_MEMORY target hook.
7229   Specify whether to return the return value in memory.  */
7230
7231static bool
7232sparc_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
7233{
7234  if (TARGET_ARCH32)
7235    /* Original SPARC 32-bit ABI says that structures and unions,
7236       and quad-precision floats are returned in memory.  All other
7237       base types are returned in registers.
7238
7239       Extended ABI (as implemented by the Sun compiler) says that
7240       all complex floats are returned in registers (8 FP registers
7241       at most for '_Complex long double').  Return all complex integers
7242       in registers (4 at most for '_Complex long long').
7243
7244       Vector ABI (as implemented by the Sun VIS SDK) says that vector
7245       integers are returned like floats of the same size, that is in
7246       registers up to 8 bytes and in memory otherwise.  Return all
7247       vector floats in memory like structure and unions; note that
7248       they always have BLKmode like the latter.  */
7249    return (TYPE_MODE (type) == BLKmode
7250	    || TYPE_MODE (type) == TFmode
7251	    || (TREE_CODE (type) == VECTOR_TYPE
7252		&& (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
7253  else
7254    /* Original SPARC 64-bit ABI says that structures and unions
7255       smaller than 32 bytes are returned in registers, as well as
7256       all other base types.
7257
7258       Extended ABI (as implemented by the Sun compiler) says that all
7259       complex floats are returned in registers (8 FP registers at most
7260       for '_Complex long double').  Return all complex integers in
7261       registers (4 at most for '_Complex TItype').
7262
7263       Vector ABI (as implemented by the Sun VIS SDK) says that vector
7264       integers are returned like floats of the same size, that is in
7265       registers.  Return all vector floats like structure and unions;
7266       note that they always have BLKmode like the latter.  */
7267    return (TYPE_MODE (type) == BLKmode
7268	    && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 32);
7269}
7270
7271/* Handle the TARGET_STRUCT_VALUE target hook.
7272   Return where to find the structure return value address.  */
7273
7274static rtx
7275sparc_struct_value_rtx (tree fndecl, int incoming)
7276{
7277  if (TARGET_ARCH64)
7278    return 0;
7279  else
7280    {
7281      rtx mem;
7282
7283      if (incoming)
7284	mem = gen_frame_mem (Pmode, plus_constant (Pmode, frame_pointer_rtx,
7285						   STRUCT_VALUE_OFFSET));
7286      else
7287	mem = gen_frame_mem (Pmode, plus_constant (Pmode, stack_pointer_rtx,
7288						   STRUCT_VALUE_OFFSET));
7289
7290      /* Only follow the SPARC ABI for fixed-size structure returns.
7291         Variable size structure returns are handled per the normal
7292         procedures in GCC. This is enabled by -mstd-struct-return */
7293      if (incoming == 2
7294	  && sparc_std_struct_return
7295	  && TYPE_SIZE_UNIT (TREE_TYPE (fndecl))
7296	  && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (fndecl))) == INTEGER_CST)
7297	{
7298	  /* We must check and adjust the return address, as it is
7299	     optional as to whether the return object is really
7300	     provided.  */
7301	  rtx ret_reg = gen_rtx_REG (Pmode, 31);
7302	  rtx scratch = gen_reg_rtx (SImode);
7303	  rtx_code_label *endlab = gen_label_rtx ();
7304
7305	  /* Calculate the return object size */
7306	  tree size = TYPE_SIZE_UNIT (TREE_TYPE (fndecl));
7307	  rtx size_rtx = GEN_INT (TREE_INT_CST_LOW (size) & 0xfff);
7308	  /* Construct a temporary return value */
7309	  rtx temp_val
7310	    = assign_stack_local (Pmode, TREE_INT_CST_LOW (size), 0);
7311
7312	  /* Implement SPARC 32-bit psABI callee return struct checking:
7313
7314	     Fetch the instruction where we will return to and see if
7315	     it's an unimp instruction (the most significant 10 bits
7316	     will be zero).  */
7317	  emit_move_insn (scratch, gen_rtx_MEM (SImode,
7318						plus_constant (Pmode,
7319							       ret_reg, 8)));
7320	  /* Assume the size is valid and pre-adjust */
7321	  emit_insn (gen_add3_insn (ret_reg, ret_reg, GEN_INT (4)));
7322	  emit_cmp_and_jump_insns (scratch, size_rtx, EQ, const0_rtx, SImode,
7323				   0, endlab);
7324	  emit_insn (gen_sub3_insn (ret_reg, ret_reg, GEN_INT (4)));
7325	  /* Write the address of the memory pointed to by temp_val into
7326	     the memory pointed to by mem */
7327	  emit_move_insn (mem, XEXP (temp_val, 0));
7328	  emit_label (endlab);
7329	}
7330
7331      return mem;
7332    }
7333}
7334
7335/* Handle TARGET_FUNCTION_VALUE, and TARGET_LIBCALL_VALUE target hook.
7336   For v9, function return values are subject to the same rules as arguments,
7337   except that up to 32 bytes may be returned in registers.  */
7338
7339static rtx
7340sparc_function_value_1 (const_tree type, machine_mode mode,
7341			bool outgoing)
7342{
7343  /* Beware that the two values are swapped here wrt function_arg.  */
7344  int regbase = (outgoing
7345		 ? SPARC_INCOMING_INT_ARG_FIRST
7346		 : SPARC_OUTGOING_INT_ARG_FIRST);
7347  enum mode_class mclass = GET_MODE_CLASS (mode);
7348  int regno;
7349
7350  /* Vector types deserve special treatment because they are polymorphic wrt
7351     their mode, depending upon whether VIS instructions are enabled.  */
7352  if (type && TREE_CODE (type) == VECTOR_TYPE)
7353    {
7354      HOST_WIDE_INT size = int_size_in_bytes (type);
7355      gcc_assert ((TARGET_ARCH32 && size <= 8)
7356		  || (TARGET_ARCH64 && size <= 32));
7357
7358      if (mode == BLKmode)
7359	return function_arg_vector_value (size, SPARC_FP_ARG_FIRST);
7360
7361      mclass = MODE_FLOAT;
7362    }
7363
7364  if (TARGET_ARCH64 && type)
7365    {
7366      /* Structures up to 32 bytes in size are returned in registers.  */
7367      if (TREE_CODE (type) == RECORD_TYPE)
7368	{
7369	  HOST_WIDE_INT size = int_size_in_bytes (type);
7370	  gcc_assert (size <= 32);
7371
7372	  return function_arg_record_value (type, mode, 0, 1, regbase);
7373	}
7374
7375      /* Unions up to 32 bytes in size are returned in integer registers.  */
7376      else if (TREE_CODE (type) == UNION_TYPE)
7377	{
7378	  HOST_WIDE_INT size = int_size_in_bytes (type);
7379	  gcc_assert (size <= 32);
7380
7381	  return function_arg_union_value (size, mode, 0, regbase);
7382	}
7383
7384      /* Objects that require it are returned in FP registers.  */
7385      else if (mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
7386	;
7387
7388      /* All other aggregate types are returned in an integer register in a
7389	 mode corresponding to the size of the type.  */
7390      else if (AGGREGATE_TYPE_P (type))
7391	{
7392	  /* All other aggregate types are passed in an integer register
7393	     in a mode corresponding to the size of the type.  */
7394	  HOST_WIDE_INT size = int_size_in_bytes (type);
7395	  gcc_assert (size <= 32);
7396
7397	  mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
7398
7399	  /* ??? We probably should have made the same ABI change in
7400	     3.4.0 as the one we made for unions.   The latter was
7401	     required by the SCD though, while the former is not
7402	     specified, so we favored compatibility and efficiency.
7403
7404	     Now we're stuck for aggregates larger than 16 bytes,
7405	     because OImode vanished in the meantime.  Let's not
7406	     try to be unduly clever, and simply follow the ABI
7407	     for unions in that case.  */
7408	  if (mode == BLKmode)
7409	    return function_arg_union_value (size, mode, 0, regbase);
7410	  else
7411	    mclass = MODE_INT;
7412	}
7413
7414      /* We should only have pointer and integer types at this point.  This
7415	 must match sparc_promote_function_mode.  */
7416      else if (mclass == MODE_INT && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
7417	mode = word_mode;
7418    }
7419
7420  /* We should only have pointer and integer types at this point, except with
7421     -freg-struct-return.  This must match sparc_promote_function_mode.  */
7422  else if (TARGET_ARCH32
7423	   && !(type && AGGREGATE_TYPE_P (type))
7424	   && mclass == MODE_INT
7425	   && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
7426    mode = word_mode;
7427
7428  if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT) && TARGET_FPU)
7429    regno = SPARC_FP_ARG_FIRST;
7430  else
7431    regno = regbase;
7432
7433  return gen_rtx_REG (mode, regno);
7434}
7435
7436/* Handle TARGET_FUNCTION_VALUE.
7437   On the SPARC, the value is found in the first "output" register, but the
7438   called function leaves it in the first "input" register.  */
7439
7440static rtx
7441sparc_function_value (const_tree valtype,
7442		      const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
7443		      bool outgoing)
7444{
7445  return sparc_function_value_1 (valtype, TYPE_MODE (valtype), outgoing);
7446}
7447
7448/* Handle TARGET_LIBCALL_VALUE.  */
7449
7450static rtx
7451sparc_libcall_value (machine_mode mode,
7452		     const_rtx fun ATTRIBUTE_UNUSED)
7453{
7454  return sparc_function_value_1 (NULL_TREE, mode, false);
7455}
7456
7457/* Handle FUNCTION_VALUE_REGNO_P.
7458   On the SPARC, the first "output" reg is used for integer values, and the
7459   first floating point register is used for floating point values.  */
7460
7461static bool
7462sparc_function_value_regno_p (const unsigned int regno)
7463{
7464  return (regno == 8 || (TARGET_FPU && regno == 32));
7465}
7466
7467/* Do what is necessary for `va_start'.  We look at the current function
7468   to determine if stdarg or varargs is used and return the address of
7469   the first unnamed parameter.  */
7470
7471static rtx
7472sparc_builtin_saveregs (void)
7473{
7474  int first_reg = crtl->args.info.words;
7475  rtx address;
7476  int regno;
7477
7478  for (regno = first_reg; regno < SPARC_INT_ARG_MAX; regno++)
7479    emit_move_insn (gen_rtx_MEM (word_mode,
7480				 gen_rtx_PLUS (Pmode,
7481					       frame_pointer_rtx,
7482					       GEN_INT (FIRST_PARM_OFFSET (0)
7483							+ (UNITS_PER_WORD
7484							   * regno)))),
7485		    gen_rtx_REG (word_mode,
7486				 SPARC_INCOMING_INT_ARG_FIRST + regno));
7487
7488  address = gen_rtx_PLUS (Pmode,
7489			  frame_pointer_rtx,
7490			  GEN_INT (FIRST_PARM_OFFSET (0)
7491				   + UNITS_PER_WORD * first_reg));
7492
7493  return address;
7494}
7495
7496/* Implement `va_start' for stdarg.  */
7497
7498static void
7499sparc_va_start (tree valist, rtx nextarg)
7500{
7501  nextarg = expand_builtin_saveregs ();
7502  std_expand_builtin_va_start (valist, nextarg);
7503}
7504
7505/* Implement `va_arg' for stdarg.  */
7506
7507static tree
7508sparc_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
7509		       gimple_seq *post_p)
7510{
7511  HOST_WIDE_INT size, rsize, align;
7512  tree addr, incr;
7513  bool indirect;
7514  tree ptrtype = build_pointer_type (type);
7515
7516  if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
7517    {
7518      indirect = true;
7519      size = rsize = UNITS_PER_WORD;
7520      align = 0;
7521    }
7522  else
7523    {
7524      indirect = false;
7525      size = int_size_in_bytes (type);
7526      rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
7527      align = 0;
7528
7529      if (TARGET_ARCH64)
7530	{
7531	  /* For SPARC64, objects requiring 16-byte alignment get it.  */
7532	  if (TYPE_ALIGN (type) >= 2 * (unsigned) BITS_PER_WORD)
7533	    align = 2 * UNITS_PER_WORD;
7534
7535	  /* SPARC-V9 ABI states that structures up to 16 bytes in size
7536	     are left-justified in their slots.  */
7537	  if (AGGREGATE_TYPE_P (type))
7538	    {
7539	      if (size == 0)
7540		size = rsize = UNITS_PER_WORD;
7541	      else
7542		size = rsize;
7543	    }
7544	}
7545    }
7546
7547  incr = valist;
7548  if (align)
7549    {
7550      incr = fold_build_pointer_plus_hwi (incr, align - 1);
7551      incr = fold_convert (sizetype, incr);
7552      incr = fold_build2 (BIT_AND_EXPR, sizetype, incr,
7553			  size_int (-align));
7554      incr = fold_convert (ptr_type_node, incr);
7555    }
7556
7557  gimplify_expr (&incr, pre_p, post_p, is_gimple_val, fb_rvalue);
7558  addr = incr;
7559
7560  if (BYTES_BIG_ENDIAN && size < rsize)
7561    addr = fold_build_pointer_plus_hwi (incr, rsize - size);
7562
7563  if (indirect)
7564    {
7565      addr = fold_convert (build_pointer_type (ptrtype), addr);
7566      addr = build_va_arg_indirect_ref (addr);
7567    }
7568
7569  /* If the address isn't aligned properly for the type, we need a temporary.
7570     FIXME: This is inefficient, usually we can do this in registers.  */
7571  else if (align == 0 && TYPE_ALIGN (type) > BITS_PER_WORD)
7572    {
7573      tree tmp = create_tmp_var (type, "va_arg_tmp");
7574      tree dest_addr = build_fold_addr_expr (tmp);
7575      tree copy = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
7576				   3, dest_addr, addr, size_int (rsize));
7577      TREE_ADDRESSABLE (tmp) = 1;
7578      gimplify_and_add (copy, pre_p);
7579      addr = dest_addr;
7580    }
7581
7582  else
7583    addr = fold_convert (ptrtype, addr);
7584
7585  incr = fold_build_pointer_plus_hwi (incr, rsize);
7586  gimplify_assign (valist, incr, post_p);
7587
7588  return build_va_arg_indirect_ref (addr);
7589}
7590
7591/* Implement the TARGET_VECTOR_MODE_SUPPORTED_P target hook.
7592   Specify whether the vector mode is supported by the hardware.  */
7593
7594static bool
7595sparc_vector_mode_supported_p (machine_mode mode)
7596{
7597  return TARGET_VIS && VECTOR_MODE_P (mode) ? true : false;
7598}
7599
7600/* Implement the TARGET_VECTORIZE_PREFERRED_SIMD_MODE target hook.  */
7601
7602static machine_mode
7603sparc_preferred_simd_mode (machine_mode mode)
7604{
7605  if (TARGET_VIS)
7606    switch (mode)
7607      {
7608      case SImode:
7609	return V2SImode;
7610      case HImode:
7611	return V4HImode;
7612      case QImode:
7613	return V8QImode;
7614
7615      default:;
7616      }
7617
7618  return word_mode;
7619}
7620
7621/* Return the string to output an unconditional branch to LABEL, which is
7622   the operand number of the label.
7623
7624   DEST is the destination insn (i.e. the label), INSN is the source.  */
7625
7626const char *
7627output_ubranch (rtx dest, rtx_insn *insn)
7628{
7629  static char string[64];
7630  bool v9_form = false;
7631  int delta;
7632  char *p;
7633
7634  /* Even if we are trying to use cbcond for this, evaluate
7635     whether we can use V9 branches as our backup plan.  */
7636
7637  delta = 5000000;
7638  if (INSN_ADDRESSES_SET_P ())
7639    delta = (INSN_ADDRESSES (INSN_UID (dest))
7640	     - INSN_ADDRESSES (INSN_UID (insn)));
7641
7642  /* Leave some instructions for "slop".  */
7643  if (TARGET_V9 && delta >= -260000 && delta < 260000)
7644    v9_form = true;
7645
7646  if (TARGET_CBCOND)
7647    {
7648      bool emit_nop = emit_cbcond_nop (insn);
7649      bool far = false;
7650      const char *rval;
7651
7652      if (delta < -500 || delta > 500)
7653	far = true;
7654
7655      if (far)
7656	{
7657	  if (v9_form)
7658	    rval = "ba,a,pt\t%%xcc, %l0";
7659	  else
7660	    rval = "b,a\t%l0";
7661	}
7662      else
7663	{
7664	  if (emit_nop)
7665	    rval = "cwbe\t%%g0, %%g0, %l0\n\tnop";
7666	  else
7667	    rval = "cwbe\t%%g0, %%g0, %l0";
7668	}
7669      return rval;
7670    }
7671
7672  if (v9_form)
7673    strcpy (string, "ba%*,pt\t%%xcc, ");
7674  else
7675    strcpy (string, "b%*\t");
7676
7677  p = strchr (string, '\0');
7678  *p++ = '%';
7679  *p++ = 'l';
7680  *p++ = '0';
7681  *p++ = '%';
7682  *p++ = '(';
7683  *p = '\0';
7684
7685  return string;
7686}
7687
7688/* Return the string to output a conditional branch to LABEL, which is
7689   the operand number of the label.  OP is the conditional expression.
7690   XEXP (OP, 0) is assumed to be a condition code register (integer or
7691   floating point) and its mode specifies what kind of comparison we made.
7692
7693   DEST is the destination insn (i.e. the label), INSN is the source.
7694
7695   REVERSED is nonzero if we should reverse the sense of the comparison.
7696
7697   ANNUL is nonzero if we should generate an annulling branch.  */
7698
7699const char *
7700output_cbranch (rtx op, rtx dest, int label, int reversed, int annul,
7701		rtx_insn *insn)
7702{
7703  static char string[64];
7704  enum rtx_code code = GET_CODE (op);
7705  rtx cc_reg = XEXP (op, 0);
7706  machine_mode mode = GET_MODE (cc_reg);
7707  const char *labelno, *branch;
7708  int spaces = 8, far;
7709  char *p;
7710
7711  /* v9 branches are limited to +-1MB.  If it is too far away,
7712     change
7713
7714     bne,pt %xcc, .LC30
7715
7716     to
7717
7718     be,pn %xcc, .+12
7719      nop
7720     ba .LC30
7721
7722     and
7723
7724     fbne,a,pn %fcc2, .LC29
7725
7726     to
7727
7728     fbe,pt %fcc2, .+16
7729      nop
7730     ba .LC29  */
7731
7732  far = TARGET_V9 && (get_attr_length (insn) >= 3);
7733  if (reversed ^ far)
7734    {
7735      /* Reversal of FP compares takes care -- an ordered compare
7736	 becomes an unordered compare and vice versa.  */
7737      if (mode == CCFPmode || mode == CCFPEmode)
7738	code = reverse_condition_maybe_unordered (code);
7739      else
7740	code = reverse_condition (code);
7741    }
7742
7743  /* Start by writing the branch condition.  */
7744  if (mode == CCFPmode || mode == CCFPEmode)
7745    {
7746      switch (code)
7747	{
7748	case NE:
7749	  branch = "fbne";
7750	  break;
7751	case EQ:
7752	  branch = "fbe";
7753	  break;
7754	case GE:
7755	  branch = "fbge";
7756	  break;
7757	case GT:
7758	  branch = "fbg";
7759	  break;
7760	case LE:
7761	  branch = "fble";
7762	  break;
7763	case LT:
7764	  branch = "fbl";
7765	  break;
7766	case UNORDERED:
7767	  branch = "fbu";
7768	  break;
7769	case ORDERED:
7770	  branch = "fbo";
7771	  break;
7772	case UNGT:
7773	  branch = "fbug";
7774	  break;
7775	case UNLT:
7776	  branch = "fbul";
7777	  break;
7778	case UNEQ:
7779	  branch = "fbue";
7780	  break;
7781	case UNGE:
7782	  branch = "fbuge";
7783	  break;
7784	case UNLE:
7785	  branch = "fbule";
7786	  break;
7787	case LTGT:
7788	  branch = "fblg";
7789	  break;
7790
7791	default:
7792	  gcc_unreachable ();
7793	}
7794
7795      /* ??? !v9: FP branches cannot be preceded by another floating point
7796	 insn.  Because there is currently no concept of pre-delay slots,
7797	 we can fix this only by always emitting a nop before a floating
7798	 point branch.  */
7799
7800      string[0] = '\0';
7801      if (! TARGET_V9)
7802	strcpy (string, "nop\n\t");
7803      strcat (string, branch);
7804    }
7805  else
7806    {
7807      switch (code)
7808	{
7809	case NE:
7810	  branch = "bne";
7811	  break;
7812	case EQ:
7813	  branch = "be";
7814	  break;
7815	case GE:
7816	  if (mode == CC_NOOVmode || mode == CCX_NOOVmode)
7817	    branch = "bpos";
7818	  else
7819	    branch = "bge";
7820	  break;
7821	case GT:
7822	  branch = "bg";
7823	  break;
7824	case LE:
7825	  branch = "ble";
7826	  break;
7827	case LT:
7828	  if (mode == CC_NOOVmode || mode == CCX_NOOVmode)
7829	    branch = "bneg";
7830	  else
7831	    branch = "bl";
7832	  break;
7833	case GEU:
7834	  branch = "bgeu";
7835	  break;
7836	case GTU:
7837	  branch = "bgu";
7838	  break;
7839	case LEU:
7840	  branch = "bleu";
7841	  break;
7842	case LTU:
7843	  branch = "blu";
7844	  break;
7845
7846	default:
7847	  gcc_unreachable ();
7848	}
7849      strcpy (string, branch);
7850    }
7851  spaces -= strlen (branch);
7852  p = strchr (string, '\0');
7853
7854  /* Now add the annulling, the label, and a possible noop.  */
7855  if (annul && ! far)
7856    {
7857      strcpy (p, ",a");
7858      p += 2;
7859      spaces -= 2;
7860    }
7861
7862  if (TARGET_V9)
7863    {
7864      rtx note;
7865      int v8 = 0;
7866
7867      if (! far && insn && INSN_ADDRESSES_SET_P ())
7868	{
7869	  int delta = (INSN_ADDRESSES (INSN_UID (dest))
7870		       - INSN_ADDRESSES (INSN_UID (insn)));
7871	  /* Leave some instructions for "slop".  */
7872	  if (delta < -260000 || delta >= 260000)
7873	    v8 = 1;
7874	}
7875
7876      if (mode == CCFPmode || mode == CCFPEmode)
7877	{
7878	  static char v9_fcc_labelno[] = "%%fccX, ";
7879	  /* Set the char indicating the number of the fcc reg to use.  */
7880	  v9_fcc_labelno[5] = REGNO (cc_reg) - SPARC_FIRST_V9_FCC_REG + '0';
7881	  labelno = v9_fcc_labelno;
7882	  if (v8)
7883	    {
7884	      gcc_assert (REGNO (cc_reg) == SPARC_FCC_REG);
7885	      labelno = "";
7886	    }
7887	}
7888      else if (mode == CCXmode || mode == CCX_NOOVmode)
7889	{
7890	  labelno = "%%xcc, ";
7891	  gcc_assert (! v8);
7892	}
7893      else
7894	{
7895	  labelno = "%%icc, ";
7896	  if (v8)
7897	    labelno = "";
7898	}
7899
7900      if (*labelno && insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
7901	{
7902	  strcpy (p,
7903		  ((XINT (note, 0) >= REG_BR_PROB_BASE / 2) ^ far)
7904		  ? ",pt" : ",pn");
7905	  p += 3;
7906	  spaces -= 3;
7907	}
7908    }
7909  else
7910    labelno = "";
7911
7912  if (spaces > 0)
7913    *p++ = '\t';
7914  else
7915    *p++ = ' ';
7916  strcpy (p, labelno);
7917  p = strchr (p, '\0');
7918  if (far)
7919    {
7920      strcpy (p, ".+12\n\t nop\n\tb\t");
7921      /* Skip the next insn if requested or
7922	 if we know that it will be a nop.  */
7923      if (annul || ! final_sequence)
7924        p[3] = '6';
7925      p += 14;
7926    }
7927  *p++ = '%';
7928  *p++ = 'l';
7929  *p++ = label + '0';
7930  *p++ = '%';
7931  *p++ = '#';
7932  *p = '\0';
7933
7934  return string;
7935}
7936
7937/* Emit a library call comparison between floating point X and Y.
7938   COMPARISON is the operator to compare with (EQ, NE, GT, etc).
7939   Return the new operator to be used in the comparison sequence.
7940
7941   TARGET_ARCH64 uses _Qp_* functions, which use pointers to TFmode
7942   values as arguments instead of the TFmode registers themselves,
7943   that's why we cannot call emit_float_lib_cmp.  */
7944
7945rtx
7946sparc_emit_float_lib_cmp (rtx x, rtx y, enum rtx_code comparison)
7947{
7948  const char *qpfunc;
7949  rtx slot0, slot1, result, tem, tem2, libfunc;
7950  machine_mode mode;
7951  enum rtx_code new_comparison;
7952
7953  switch (comparison)
7954    {
7955    case EQ:
7956      qpfunc = (TARGET_ARCH64 ? "_Qp_feq" : "_Q_feq");
7957      break;
7958
7959    case NE:
7960      qpfunc = (TARGET_ARCH64 ? "_Qp_fne" : "_Q_fne");
7961      break;
7962
7963    case GT:
7964      qpfunc = (TARGET_ARCH64 ? "_Qp_fgt" : "_Q_fgt");
7965      break;
7966
7967    case GE:
7968      qpfunc = (TARGET_ARCH64 ? "_Qp_fge" : "_Q_fge");
7969      break;
7970
7971    case LT:
7972      qpfunc = (TARGET_ARCH64 ? "_Qp_flt" : "_Q_flt");
7973      break;
7974
7975    case LE:
7976      qpfunc = (TARGET_ARCH64 ? "_Qp_fle" : "_Q_fle");
7977      break;
7978
7979    case ORDERED:
7980    case UNORDERED:
7981    case UNGT:
7982    case UNLT:
7983    case UNEQ:
7984    case UNGE:
7985    case UNLE:
7986    case LTGT:
7987      qpfunc = (TARGET_ARCH64 ? "_Qp_cmp" : "_Q_cmp");
7988      break;
7989
7990    default:
7991      gcc_unreachable ();
7992    }
7993
7994  if (TARGET_ARCH64)
7995    {
7996      if (MEM_P (x))
7997	{
7998	  tree expr = MEM_EXPR (x);
7999	  if (expr)
8000	    mark_addressable (expr);
8001	  slot0 = x;
8002	}
8003      else
8004	{
8005	  slot0 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode));
8006	  emit_move_insn (slot0, x);
8007	}
8008
8009      if (MEM_P (y))
8010	{
8011	  tree expr = MEM_EXPR (y);
8012	  if (expr)
8013	    mark_addressable (expr);
8014	  slot1 = y;
8015	}
8016      else
8017	{
8018	  slot1 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode));
8019	  emit_move_insn (slot1, y);
8020	}
8021
8022      libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
8023      emit_library_call (libfunc, LCT_NORMAL,
8024			 DImode, 2,
8025			 XEXP (slot0, 0), Pmode,
8026			 XEXP (slot1, 0), Pmode);
8027      mode = DImode;
8028    }
8029  else
8030    {
8031      libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
8032      emit_library_call (libfunc, LCT_NORMAL,
8033			 SImode, 2,
8034			 x, TFmode, y, TFmode);
8035      mode = SImode;
8036    }
8037
8038
8039  /* Immediately move the result of the libcall into a pseudo
8040     register so reload doesn't clobber the value if it needs
8041     the return register for a spill reg.  */
8042  result = gen_reg_rtx (mode);
8043  emit_move_insn (result, hard_libcall_value (mode, libfunc));
8044
8045  switch (comparison)
8046    {
8047    default:
8048      return gen_rtx_NE (VOIDmode, result, const0_rtx);
8049    case ORDERED:
8050    case UNORDERED:
8051      new_comparison = (comparison == UNORDERED ? EQ : NE);
8052      return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, GEN_INT(3));
8053    case UNGT:
8054    case UNGE:
8055      new_comparison = (comparison == UNGT ? GT : NE);
8056      return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, const1_rtx);
8057    case UNLE:
8058      return gen_rtx_NE (VOIDmode, result, const2_rtx);
8059    case UNLT:
8060      tem = gen_reg_rtx (mode);
8061      if (TARGET_ARCH32)
8062	emit_insn (gen_andsi3 (tem, result, const1_rtx));
8063      else
8064	emit_insn (gen_anddi3 (tem, result, const1_rtx));
8065      return gen_rtx_NE (VOIDmode, tem, const0_rtx);
8066    case UNEQ:
8067    case LTGT:
8068      tem = gen_reg_rtx (mode);
8069      if (TARGET_ARCH32)
8070	emit_insn (gen_addsi3 (tem, result, const1_rtx));
8071      else
8072	emit_insn (gen_adddi3 (tem, result, const1_rtx));
8073      tem2 = gen_reg_rtx (mode);
8074      if (TARGET_ARCH32)
8075	emit_insn (gen_andsi3 (tem2, tem, const2_rtx));
8076      else
8077	emit_insn (gen_anddi3 (tem2, tem, const2_rtx));
8078      new_comparison = (comparison == UNEQ ? EQ : NE);
8079      return gen_rtx_fmt_ee (new_comparison, VOIDmode, tem2, const0_rtx);
8080    }
8081
8082  gcc_unreachable ();
8083}
8084
8085/* Generate an unsigned DImode to FP conversion.  This is the same code
8086   optabs would emit if we didn't have TFmode patterns.  */
8087
8088void
8089sparc_emit_floatunsdi (rtx *operands, machine_mode mode)
8090{
8091  rtx i0, i1, f0, in, out;
8092
8093  out = operands[0];
8094  in = force_reg (DImode, operands[1]);
8095  rtx_code_label *neglab = gen_label_rtx ();
8096  rtx_code_label *donelab = gen_label_rtx ();
8097  i0 = gen_reg_rtx (DImode);
8098  i1 = gen_reg_rtx (DImode);
8099  f0 = gen_reg_rtx (mode);
8100
8101  emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, DImode, 0, neglab);
8102
8103  emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
8104  emit_jump_insn (gen_jump (donelab));
8105  emit_barrier ();
8106
8107  emit_label (neglab);
8108
8109  emit_insn (gen_lshrdi3 (i0, in, const1_rtx));
8110  emit_insn (gen_anddi3 (i1, in, const1_rtx));
8111  emit_insn (gen_iordi3 (i0, i0, i1));
8112  emit_insn (gen_rtx_SET (VOIDmode, f0, gen_rtx_FLOAT (mode, i0)));
8113  emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
8114
8115  emit_label (donelab);
8116}
8117
8118/* Generate an FP to unsigned DImode conversion.  This is the same code
8119   optabs would emit if we didn't have TFmode patterns.  */
8120
8121void
8122sparc_emit_fixunsdi (rtx *operands, machine_mode mode)
8123{
8124  rtx i0, i1, f0, in, out, limit;
8125
8126  out = operands[0];
8127  in = force_reg (mode, operands[1]);
8128  rtx_code_label *neglab = gen_label_rtx ();
8129  rtx_code_label *donelab = gen_label_rtx ();
8130  i0 = gen_reg_rtx (DImode);
8131  i1 = gen_reg_rtx (DImode);
8132  limit = gen_reg_rtx (mode);
8133  f0 = gen_reg_rtx (mode);
8134
8135  emit_move_insn (limit,
8136		  CONST_DOUBLE_FROM_REAL_VALUE (
8137		    REAL_VALUE_ATOF ("9223372036854775808.0", mode), mode));
8138  emit_cmp_and_jump_insns (in, limit, GE, NULL_RTX, mode, 0, neglab);
8139
8140  emit_insn (gen_rtx_SET (VOIDmode,
8141			  out,
8142			  gen_rtx_FIX (DImode, gen_rtx_FIX (mode, in))));
8143  emit_jump_insn (gen_jump (donelab));
8144  emit_barrier ();
8145
8146  emit_label (neglab);
8147
8148  emit_insn (gen_rtx_SET (VOIDmode, f0, gen_rtx_MINUS (mode, in, limit)));
8149  emit_insn (gen_rtx_SET (VOIDmode,
8150			  i0,
8151			  gen_rtx_FIX (DImode, gen_rtx_FIX (mode, f0))));
8152  emit_insn (gen_movdi (i1, const1_rtx));
8153  emit_insn (gen_ashldi3 (i1, i1, GEN_INT (63)));
8154  emit_insn (gen_xordi3 (out, i0, i1));
8155
8156  emit_label (donelab);
8157}
8158
8159/* Return the string to output a compare and branch instruction to DEST.
8160   DEST is the destination insn (i.e. the label), INSN is the source,
8161   and OP is the conditional expression.  */
8162
8163const char *
8164output_cbcond (rtx op, rtx dest, rtx_insn *insn)
8165{
8166  machine_mode mode = GET_MODE (XEXP (op, 0));
8167  enum rtx_code code = GET_CODE (op);
8168  const char *cond_str, *tmpl;
8169  int far, emit_nop, len;
8170  static char string[64];
8171  char size_char;
8172
8173  /* Compare and Branch is limited to +-2KB.  If it is too far away,
8174     change
8175
8176     cxbne X, Y, .LC30
8177
8178     to
8179
8180     cxbe X, Y, .+16
8181     nop
8182     ba,pt xcc, .LC30
8183      nop  */
8184
8185  len = get_attr_length (insn);
8186
8187  far = len == 4;
8188  emit_nop = len == 2;
8189
8190  if (far)
8191    code = reverse_condition (code);
8192
8193  size_char = ((mode == SImode) ? 'w' : 'x');
8194
8195  switch (code)
8196    {
8197    case NE:
8198      cond_str = "ne";
8199      break;
8200
8201    case EQ:
8202      cond_str = "e";
8203      break;
8204
8205    case GE:
8206      if (mode == CC_NOOVmode || mode == CCX_NOOVmode)
8207	cond_str = "pos";
8208      else
8209	cond_str = "ge";
8210      break;
8211
8212    case GT:
8213      cond_str = "g";
8214      break;
8215
8216    case LE:
8217      cond_str = "le";
8218      break;
8219
8220    case LT:
8221      if (mode == CC_NOOVmode || mode == CCX_NOOVmode)
8222	cond_str = "neg";
8223      else
8224	cond_str = "l";
8225      break;
8226
8227    case GEU:
8228      cond_str = "cc";
8229      break;
8230
8231    case GTU:
8232      cond_str = "gu";
8233      break;
8234
8235    case LEU:
8236      cond_str = "leu";
8237      break;
8238
8239    case LTU:
8240      cond_str = "cs";
8241      break;
8242
8243    default:
8244      gcc_unreachable ();
8245    }
8246
8247  if (far)
8248    {
8249      int veryfar = 1, delta;
8250
8251      if (INSN_ADDRESSES_SET_P ())
8252	{
8253	  delta = (INSN_ADDRESSES (INSN_UID (dest))
8254		   - INSN_ADDRESSES (INSN_UID (insn)));
8255	  /* Leave some instructions for "slop".  */
8256	  if (delta >= -260000 && delta < 260000)
8257	    veryfar = 0;
8258	}
8259
8260      if (veryfar)
8261	tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tb\t%%3\n\tnop";
8262      else
8263	tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tba,pt\t%%%%xcc, %%3\n\tnop";
8264    }
8265  else
8266    {
8267      if (emit_nop)
8268	tmpl = "c%cb%s\t%%1, %%2, %%3\n\tnop";
8269      else
8270	tmpl = "c%cb%s\t%%1, %%2, %%3";
8271    }
8272
8273  snprintf (string, sizeof(string), tmpl, size_char, cond_str);
8274
8275  return string;
8276}
8277
8278/* Return the string to output a conditional branch to LABEL, testing
8279   register REG.  LABEL is the operand number of the label; REG is the
8280   operand number of the reg.  OP is the conditional expression.  The mode
8281   of REG says what kind of comparison we made.
8282
8283   DEST is the destination insn (i.e. the label), INSN is the source.
8284
8285   REVERSED is nonzero if we should reverse the sense of the comparison.
8286
8287   ANNUL is nonzero if we should generate an annulling branch.  */
8288
8289const char *
8290output_v9branch (rtx op, rtx dest, int reg, int label, int reversed,
8291		 int annul, rtx_insn *insn)
8292{
8293  static char string[64];
8294  enum rtx_code code = GET_CODE (op);
8295  machine_mode mode = GET_MODE (XEXP (op, 0));
8296  rtx note;
8297  int far;
8298  char *p;
8299
8300  /* branch on register are limited to +-128KB.  If it is too far away,
8301     change
8302
8303     brnz,pt %g1, .LC30
8304
8305     to
8306
8307     brz,pn %g1, .+12
8308      nop
8309     ba,pt %xcc, .LC30
8310
8311     and
8312
8313     brgez,a,pn %o1, .LC29
8314
8315     to
8316
8317     brlz,pt %o1, .+16
8318      nop
8319     ba,pt %xcc, .LC29  */
8320
8321  far = get_attr_length (insn) >= 3;
8322
8323  /* If not floating-point or if EQ or NE, we can just reverse the code.  */
8324  if (reversed ^ far)
8325    code = reverse_condition (code);
8326
8327  /* Only 64 bit versions of these instructions exist.  */
8328  gcc_assert (mode == DImode);
8329
8330  /* Start by writing the branch condition.  */
8331
8332  switch (code)
8333    {
8334    case NE:
8335      strcpy (string, "brnz");
8336      break;
8337
8338    case EQ:
8339      strcpy (string, "brz");
8340      break;
8341
8342    case GE:
8343      strcpy (string, "brgez");
8344      break;
8345
8346    case LT:
8347      strcpy (string, "brlz");
8348      break;
8349
8350    case LE:
8351      strcpy (string, "brlez");
8352      break;
8353
8354    case GT:
8355      strcpy (string, "brgz");
8356      break;
8357
8358    default:
8359      gcc_unreachable ();
8360    }
8361
8362  p = strchr (string, '\0');
8363
8364  /* Now add the annulling, reg, label, and nop.  */
8365  if (annul && ! far)
8366    {
8367      strcpy (p, ",a");
8368      p += 2;
8369    }
8370
8371  if (insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
8372    {
8373      strcpy (p,
8374	      ((XINT (note, 0) >= REG_BR_PROB_BASE / 2) ^ far)
8375	      ? ",pt" : ",pn");
8376      p += 3;
8377    }
8378
8379  *p = p < string + 8 ? '\t' : ' ';
8380  p++;
8381  *p++ = '%';
8382  *p++ = '0' + reg;
8383  *p++ = ',';
8384  *p++ = ' ';
8385  if (far)
8386    {
8387      int veryfar = 1, delta;
8388
8389      if (INSN_ADDRESSES_SET_P ())
8390	{
8391	  delta = (INSN_ADDRESSES (INSN_UID (dest))
8392		   - INSN_ADDRESSES (INSN_UID (insn)));
8393	  /* Leave some instructions for "slop".  */
8394	  if (delta >= -260000 && delta < 260000)
8395	    veryfar = 0;
8396	}
8397
8398      strcpy (p, ".+12\n\t nop\n\t");
8399      /* Skip the next insn if requested or
8400	 if we know that it will be a nop.  */
8401      if (annul || ! final_sequence)
8402        p[3] = '6';
8403      p += 12;
8404      if (veryfar)
8405	{
8406	  strcpy (p, "b\t");
8407	  p += 2;
8408	}
8409      else
8410	{
8411	  strcpy (p, "ba,pt\t%%xcc, ");
8412	  p += 13;
8413	}
8414    }
8415  *p++ = '%';
8416  *p++ = 'l';
8417  *p++ = '0' + label;
8418  *p++ = '%';
8419  *p++ = '#';
8420  *p = '\0';
8421
8422  return string;
8423}
8424
8425/* Return 1, if any of the registers of the instruction are %l[0-7] or %o[0-7].
8426   Such instructions cannot be used in the delay slot of return insn on v9.
8427   If TEST is 0, also rename all %i[0-7] registers to their %o[0-7] counterparts.
8428 */
8429
8430static int
8431epilogue_renumber (register rtx *where, int test)
8432{
8433  register const char *fmt;
8434  register int i;
8435  register enum rtx_code code;
8436
8437  if (*where == 0)
8438    return 0;
8439
8440  code = GET_CODE (*where);
8441
8442  switch (code)
8443    {
8444    case REG:
8445      if (REGNO (*where) >= 8 && REGNO (*where) < 24)      /* oX or lX */
8446	return 1;
8447      if (! test && REGNO (*where) >= 24 && REGNO (*where) < 32)
8448	*where = gen_rtx_REG (GET_MODE (*where), OUTGOING_REGNO (REGNO(*where)));
8449    case SCRATCH:
8450    case CC0:
8451    case PC:
8452    case CONST_INT:
8453    case CONST_DOUBLE:
8454      return 0;
8455
8456      /* Do not replace the frame pointer with the stack pointer because
8457	 it can cause the delayed instruction to load below the stack.
8458	 This occurs when instructions like:
8459
8460	 (set (reg/i:SI 24 %i0)
8461	     (mem/f:SI (plus:SI (reg/f:SI 30 %fp)
8462                       (const_int -20 [0xffffffec])) 0))
8463
8464	 are in the return delayed slot.  */
8465    case PLUS:
8466      if (GET_CODE (XEXP (*where, 0)) == REG
8467	  && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM
8468	  && (GET_CODE (XEXP (*where, 1)) != CONST_INT
8469	      || INTVAL (XEXP (*where, 1)) < SPARC_STACK_BIAS))
8470	return 1;
8471      break;
8472
8473    case MEM:
8474      if (SPARC_STACK_BIAS
8475	  && GET_CODE (XEXP (*where, 0)) == REG
8476	  && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM)
8477	return 1;
8478      break;
8479
8480    default:
8481      break;
8482    }
8483
8484  fmt = GET_RTX_FORMAT (code);
8485
8486  for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
8487    {
8488      if (fmt[i] == 'E')
8489	{
8490	  register int j;
8491	  for (j = XVECLEN (*where, i) - 1; j >= 0; j--)
8492	    if (epilogue_renumber (&(XVECEXP (*where, i, j)), test))
8493	      return 1;
8494	}
8495      else if (fmt[i] == 'e'
8496	       && epilogue_renumber (&(XEXP (*where, i)), test))
8497	return 1;
8498    }
8499  return 0;
8500}
8501
8502/* Leaf functions and non-leaf functions have different needs.  */
8503
8504static const int
8505reg_leaf_alloc_order[] = REG_LEAF_ALLOC_ORDER;
8506
8507static const int
8508reg_nonleaf_alloc_order[] = REG_ALLOC_ORDER;
8509
8510static const int *const reg_alloc_orders[] = {
8511  reg_leaf_alloc_order,
8512  reg_nonleaf_alloc_order};
8513
8514void
8515order_regs_for_local_alloc (void)
8516{
8517  static int last_order_nonleaf = 1;
8518
8519  if (df_regs_ever_live_p (15) != last_order_nonleaf)
8520    {
8521      last_order_nonleaf = !last_order_nonleaf;
8522      memcpy ((char *) reg_alloc_order,
8523	      (const char *) reg_alloc_orders[last_order_nonleaf],
8524	      FIRST_PSEUDO_REGISTER * sizeof (int));
8525    }
8526}
8527
8528/* Return 1 if REG and MEM are legitimate enough to allow the various
8529   mem<-->reg splits to be run.  */
8530
8531int
8532sparc_splitdi_legitimate (rtx reg, rtx mem)
8533{
8534  /* Punt if we are here by mistake.  */
8535  gcc_assert (reload_completed);
8536
8537  /* We must have an offsettable memory reference.  */
8538  if (! offsettable_memref_p (mem))
8539    return 0;
8540
8541  /* If we have legitimate args for ldd/std, we do not want
8542     the split to happen.  */
8543  if ((REGNO (reg) % 2) == 0
8544      && mem_min_alignment (mem, 8))
8545    return 0;
8546
8547  /* Success.  */
8548  return 1;
8549}
8550
8551/* Like sparc_splitdi_legitimate but for REG <--> REG moves.  */
8552
8553int
8554sparc_split_regreg_legitimate (rtx reg1, rtx reg2)
8555{
8556  int regno1, regno2;
8557
8558  if (GET_CODE (reg1) == SUBREG)
8559    reg1 = SUBREG_REG (reg1);
8560  if (GET_CODE (reg1) != REG)
8561    return 0;
8562  regno1 = REGNO (reg1);
8563
8564  if (GET_CODE (reg2) == SUBREG)
8565    reg2 = SUBREG_REG (reg2);
8566  if (GET_CODE (reg2) != REG)
8567    return 0;
8568  regno2 = REGNO (reg2);
8569
8570  if (SPARC_INT_REG_P (regno1) && SPARC_INT_REG_P (regno2))
8571    return 1;
8572
8573  if (TARGET_VIS3)
8574    {
8575      if ((SPARC_INT_REG_P (regno1) && SPARC_FP_REG_P (regno2))
8576	  || (SPARC_FP_REG_P (regno1) && SPARC_INT_REG_P (regno2)))
8577	return 1;
8578    }
8579
8580  return 0;
8581}
8582
8583/* Return 1 if REGNO (reg1) is even and REGNO (reg1) == REGNO (reg2) - 1.
8584   This makes them candidates for using ldd and std insns.
8585
8586   Note reg1 and reg2 *must* be hard registers.  */
8587
8588int
8589registers_ok_for_ldd_peep (rtx reg1, rtx reg2)
8590{
8591  /* We might have been passed a SUBREG.  */
8592  if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG)
8593    return 0;
8594
8595  if (REGNO (reg1) % 2 != 0)
8596    return 0;
8597
8598  /* Integer ldd is deprecated in SPARC V9 */
8599  if (TARGET_V9 && SPARC_INT_REG_P (REGNO (reg1)))
8600    return 0;
8601
8602  return (REGNO (reg1) == REGNO (reg2) - 1);
8603}
8604
8605/* Return 1 if the addresses in mem1 and mem2 are suitable for use in
8606   an ldd or std insn.
8607
8608   This can only happen when addr1 and addr2, the addresses in mem1
8609   and mem2, are consecutive memory locations (addr1 + 4 == addr2).
8610   addr1 must also be aligned on a 64-bit boundary.
8611
8612   Also iff dependent_reg_rtx is not null it should not be used to
8613   compute the address for mem1, i.e. we cannot optimize a sequence
8614   like:
8615   	ld [%o0], %o0
8616	ld [%o0 + 4], %o1
8617   to
8618   	ldd [%o0], %o0
8619   nor:
8620	ld [%g3 + 4], %g3
8621	ld [%g3], %g2
8622   to
8623        ldd [%g3], %g2
8624
8625   But, note that the transformation from:
8626	ld [%g2 + 4], %g3
8627        ld [%g2], %g2
8628   to
8629	ldd [%g2], %g2
8630   is perfectly fine.  Thus, the peephole2 patterns always pass us
8631   the destination register of the first load, never the second one.
8632
8633   For stores we don't have a similar problem, so dependent_reg_rtx is
8634   NULL_RTX.  */
8635
8636int
8637mems_ok_for_ldd_peep (rtx mem1, rtx mem2, rtx dependent_reg_rtx)
8638{
8639  rtx addr1, addr2;
8640  unsigned int reg1;
8641  HOST_WIDE_INT offset1;
8642
8643  /* The mems cannot be volatile.  */
8644  if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
8645    return 0;
8646
8647  /* MEM1 should be aligned on a 64-bit boundary.  */
8648  if (MEM_ALIGN (mem1) < 64)
8649    return 0;
8650
8651  addr1 = XEXP (mem1, 0);
8652  addr2 = XEXP (mem2, 0);
8653
8654  /* Extract a register number and offset (if used) from the first addr.  */
8655  if (GET_CODE (addr1) == PLUS)
8656    {
8657      /* If not a REG, return zero.  */
8658      if (GET_CODE (XEXP (addr1, 0)) != REG)
8659	return 0;
8660      else
8661	{
8662          reg1 = REGNO (XEXP (addr1, 0));
8663	  /* The offset must be constant!  */
8664	  if (GET_CODE (XEXP (addr1, 1)) != CONST_INT)
8665            return 0;
8666          offset1 = INTVAL (XEXP (addr1, 1));
8667	}
8668    }
8669  else if (GET_CODE (addr1) != REG)
8670    return 0;
8671  else
8672    {
8673      reg1 = REGNO (addr1);
8674      /* This was a simple (mem (reg)) expression.  Offset is 0.  */
8675      offset1 = 0;
8676    }
8677
8678  /* Make sure the second address is a (mem (plus (reg) (const_int).  */
8679  if (GET_CODE (addr2) != PLUS)
8680    return 0;
8681
8682  if (GET_CODE (XEXP (addr2, 0)) != REG
8683      || GET_CODE (XEXP (addr2, 1)) != CONST_INT)
8684    return 0;
8685
8686  if (reg1 != REGNO (XEXP (addr2, 0)))
8687    return 0;
8688
8689  if (dependent_reg_rtx != NULL_RTX && reg1 == REGNO (dependent_reg_rtx))
8690    return 0;
8691
8692  /* The first offset must be evenly divisible by 8 to ensure the
8693     address is 64 bit aligned.  */
8694  if (offset1 % 8 != 0)
8695    return 0;
8696
8697  /* The offset for the second addr must be 4 more than the first addr.  */
8698  if (INTVAL (XEXP (addr2, 1)) != offset1 + 4)
8699    return 0;
8700
8701  /* All the tests passed.  addr1 and addr2 are valid for ldd and std
8702     instructions.  */
8703  return 1;
8704}
8705
8706/* Return the widened memory access made of MEM1 and MEM2 in MODE.  */
8707
8708rtx
8709widen_mem_for_ldd_peep (rtx mem1, rtx mem2, machine_mode mode)
8710{
8711  rtx x = widen_memory_access (mem1, mode, 0);
8712  MEM_NOTRAP_P (x) = MEM_NOTRAP_P (mem1) && MEM_NOTRAP_P (mem2);
8713  return x;
8714}
8715
8716/* Return 1 if reg is a pseudo, or is the first register in
8717   a hard register pair.  This makes it suitable for use in
8718   ldd and std insns.  */
8719
8720int
8721register_ok_for_ldd (rtx reg)
8722{
8723  /* We might have been passed a SUBREG.  */
8724  if (!REG_P (reg))
8725    return 0;
8726
8727  if (REGNO (reg) < FIRST_PSEUDO_REGISTER)
8728    return (REGNO (reg) % 2 == 0);
8729
8730  return 1;
8731}
8732
8733/* Return 1 if OP, a MEM, has an address which is known to be
8734   aligned to an 8-byte boundary.  */
8735
8736int
8737memory_ok_for_ldd (rtx op)
8738{
8739  /* In 64-bit mode, we assume that the address is word-aligned.  */
8740  if (TARGET_ARCH32 && !mem_min_alignment (op, 8))
8741    return 0;
8742
8743  if (! can_create_pseudo_p ()
8744      && !strict_memory_address_p (Pmode, XEXP (op, 0)))
8745    return 0;
8746
8747  return 1;
8748}
8749
8750/* Implement TARGET_PRINT_OPERAND_PUNCT_VALID_P.  */
8751
8752static bool
8753sparc_print_operand_punct_valid_p (unsigned char code)
8754{
8755  if (code == '#'
8756      || code == '*'
8757      || code == '('
8758      || code == ')'
8759      || code == '_'
8760      || code == '&')
8761    return true;
8762
8763  return false;
8764}
8765
8766/* Implement TARGET_PRINT_OPERAND.
8767   Print operand X (an rtx) in assembler syntax to file FILE.
8768   CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
8769   For `%' followed by punctuation, CODE is the punctuation and X is null.  */
8770
8771static void
8772sparc_print_operand (FILE *file, rtx x, int code)
8773{
8774  switch (code)
8775    {
8776    case '#':
8777      /* Output an insn in a delay slot.  */
8778      if (final_sequence)
8779        sparc_indent_opcode = 1;
8780      else
8781	fputs ("\n\t nop", file);
8782      return;
8783    case '*':
8784      /* Output an annul flag if there's nothing for the delay slot and we
8785	 are optimizing.  This is always used with '(' below.
8786         Sun OS 4.1.1 dbx can't handle an annulled unconditional branch;
8787	 this is a dbx bug.  So, we only do this when optimizing.
8788         On UltraSPARC, a branch in a delay slot causes a pipeline flush.
8789	 Always emit a nop in case the next instruction is a branch.  */
8790      if (! final_sequence && (optimize && (int)sparc_cpu < PROCESSOR_V9))
8791	fputs (",a", file);
8792      return;
8793    case '(':
8794      /* Output a 'nop' if there's nothing for the delay slot and we are
8795	 not optimizing.  This is always used with '*' above.  */
8796      if (! final_sequence && ! (optimize && (int)sparc_cpu < PROCESSOR_V9))
8797	fputs ("\n\t nop", file);
8798      else if (final_sequence)
8799        sparc_indent_opcode = 1;
8800      return;
8801    case ')':
8802      /* Output the right displacement from the saved PC on function return.
8803	 The caller may have placed an "unimp" insn immediately after the call
8804	 so we have to account for it.  This insn is used in the 32-bit ABI
8805	 when calling a function that returns a non zero-sized structure.  The
8806	 64-bit ABI doesn't have it.  Be careful to have this test be the same
8807	 as that for the call.  The exception is when sparc_std_struct_return
8808	 is enabled, the psABI is followed exactly and the adjustment is made
8809	 by the code in sparc_struct_value_rtx.  The call emitted is the same
8810	 when sparc_std_struct_return is enabled. */
8811     if (!TARGET_ARCH64
8812	 && cfun->returns_struct
8813	 && !sparc_std_struct_return
8814	 && DECL_SIZE (DECL_RESULT (current_function_decl))
8815	 && TREE_CODE (DECL_SIZE (DECL_RESULT (current_function_decl)))
8816	     == INTEGER_CST
8817	 && !integer_zerop (DECL_SIZE (DECL_RESULT (current_function_decl))))
8818	fputs ("12", file);
8819      else
8820        fputc ('8', file);
8821      return;
8822    case '_':
8823      /* Output the Embedded Medium/Anywhere code model base register.  */
8824      fputs (EMBMEDANY_BASE_REG, file);
8825      return;
8826    case '&':
8827      /* Print some local dynamic TLS name.  */
8828      if (const char *name = get_some_local_dynamic_name ())
8829	assemble_name (file, name);
8830      else
8831	output_operand_lossage ("'%%&' used without any "
8832				"local dynamic TLS references");
8833      return;
8834
8835    case 'Y':
8836      /* Adjust the operand to take into account a RESTORE operation.  */
8837      if (GET_CODE (x) == CONST_INT)
8838	break;
8839      else if (GET_CODE (x) != REG)
8840	output_operand_lossage ("invalid %%Y operand");
8841      else if (REGNO (x) < 8)
8842	fputs (reg_names[REGNO (x)], file);
8843      else if (REGNO (x) >= 24 && REGNO (x) < 32)
8844	fputs (reg_names[REGNO (x)-16], file);
8845      else
8846	output_operand_lossage ("invalid %%Y operand");
8847      return;
8848    case 'L':
8849      /* Print out the low order register name of a register pair.  */
8850      if (WORDS_BIG_ENDIAN)
8851	fputs (reg_names[REGNO (x)+1], file);
8852      else
8853	fputs (reg_names[REGNO (x)], file);
8854      return;
8855    case 'H':
8856      /* Print out the high order register name of a register pair.  */
8857      if (WORDS_BIG_ENDIAN)
8858	fputs (reg_names[REGNO (x)], file);
8859      else
8860	fputs (reg_names[REGNO (x)+1], file);
8861      return;
8862    case 'R':
8863      /* Print out the second register name of a register pair or quad.
8864	 I.e., R (%o0) => %o1.  */
8865      fputs (reg_names[REGNO (x)+1], file);
8866      return;
8867    case 'S':
8868      /* Print out the third register name of a register quad.
8869	 I.e., S (%o0) => %o2.  */
8870      fputs (reg_names[REGNO (x)+2], file);
8871      return;
8872    case 'T':
8873      /* Print out the fourth register name of a register quad.
8874	 I.e., T (%o0) => %o3.  */
8875      fputs (reg_names[REGNO (x)+3], file);
8876      return;
8877    case 'x':
8878      /* Print a condition code register.  */
8879      if (REGNO (x) == SPARC_ICC_REG)
8880	{
8881	  /* We don't handle CC[X]_NOOVmode because they're not supposed
8882	     to occur here.  */
8883	  if (GET_MODE (x) == CCmode)
8884	    fputs ("%icc", file);
8885	  else if (GET_MODE (x) == CCXmode)
8886	    fputs ("%xcc", file);
8887	  else
8888	    gcc_unreachable ();
8889	}
8890      else
8891	/* %fccN register */
8892	fputs (reg_names[REGNO (x)], file);
8893      return;
8894    case 'm':
8895      /* Print the operand's address only.  */
8896      output_address (XEXP (x, 0));
8897      return;
8898    case 'r':
8899      /* In this case we need a register.  Use %g0 if the
8900	 operand is const0_rtx.  */
8901      if (x == const0_rtx
8902	  || (GET_MODE (x) != VOIDmode && x == CONST0_RTX (GET_MODE (x))))
8903	{
8904	  fputs ("%g0", file);
8905	  return;
8906	}
8907      else
8908	break;
8909
8910    case 'A':
8911      switch (GET_CODE (x))
8912	{
8913	case IOR: fputs ("or", file); break;
8914	case AND: fputs ("and", file); break;
8915	case XOR: fputs ("xor", file); break;
8916	default: output_operand_lossage ("invalid %%A operand");
8917	}
8918      return;
8919
8920    case 'B':
8921      switch (GET_CODE (x))
8922	{
8923	case IOR: fputs ("orn", file); break;
8924	case AND: fputs ("andn", file); break;
8925	case XOR: fputs ("xnor", file); break;
8926	default: output_operand_lossage ("invalid %%B operand");
8927	}
8928      return;
8929
8930      /* This is used by the conditional move instructions.  */
8931    case 'C':
8932      {
8933	enum rtx_code rc = GET_CODE (x);
8934
8935	switch (rc)
8936	  {
8937	  case NE: fputs ("ne", file); break;
8938	  case EQ: fputs ("e", file); break;
8939	  case GE: fputs ("ge", file); break;
8940	  case GT: fputs ("g", file); break;
8941	  case LE: fputs ("le", file); break;
8942	  case LT: fputs ("l", file); break;
8943	  case GEU: fputs ("geu", file); break;
8944	  case GTU: fputs ("gu", file); break;
8945	  case LEU: fputs ("leu", file); break;
8946	  case LTU: fputs ("lu", file); break;
8947	  case LTGT: fputs ("lg", file); break;
8948	  case UNORDERED: fputs ("u", file); break;
8949	  case ORDERED: fputs ("o", file); break;
8950	  case UNLT: fputs ("ul", file); break;
8951	  case UNLE: fputs ("ule", file); break;
8952	  case UNGT: fputs ("ug", file); break;
8953	  case UNGE: fputs ("uge", file); break;
8954	  case UNEQ: fputs ("ue", file); break;
8955	  default: output_operand_lossage ("invalid %%C operand");
8956	  }
8957	return;
8958      }
8959
8960      /* This are used by the movr instruction pattern.  */
8961    case 'D':
8962      {
8963	enum rtx_code rc = GET_CODE (x);
8964	switch (rc)
8965	  {
8966	  case NE: fputs ("ne", file); break;
8967	  case EQ: fputs ("e", file); break;
8968	  case GE: fputs ("gez", file); break;
8969	  case LT: fputs ("lz", file); break;
8970	  case LE: fputs ("lez", file); break;
8971	  case GT: fputs ("gz", file); break;
8972	  default: output_operand_lossage ("invalid %%D operand");
8973	  }
8974	return;
8975      }
8976
8977    case 'b':
8978      {
8979	/* Print a sign-extended character.  */
8980	int i = trunc_int_for_mode (INTVAL (x), QImode);
8981	fprintf (file, "%d", i);
8982	return;
8983      }
8984
8985    case 'f':
8986      /* Operand must be a MEM; write its address.  */
8987      if (GET_CODE (x) != MEM)
8988	output_operand_lossage ("invalid %%f operand");
8989      output_address (XEXP (x, 0));
8990      return;
8991
8992    case 's':
8993      {
8994	/* Print a sign-extended 32-bit value.  */
8995	HOST_WIDE_INT i;
8996	if (GET_CODE(x) == CONST_INT)
8997	  i = INTVAL (x);
8998	else if (GET_CODE(x) == CONST_DOUBLE)
8999	  i = CONST_DOUBLE_LOW (x);
9000	else
9001	  {
9002	    output_operand_lossage ("invalid %%s operand");
9003	    return;
9004	  }
9005	i = trunc_int_for_mode (i, SImode);
9006	fprintf (file, HOST_WIDE_INT_PRINT_DEC, i);
9007	return;
9008      }
9009
9010    case 0:
9011      /* Do nothing special.  */
9012      break;
9013
9014    default:
9015      /* Undocumented flag.  */
9016      output_operand_lossage ("invalid operand output code");
9017    }
9018
9019  if (GET_CODE (x) == REG)
9020    fputs (reg_names[REGNO (x)], file);
9021  else if (GET_CODE (x) == MEM)
9022    {
9023      fputc ('[', file);
9024	/* Poor Sun assembler doesn't understand absolute addressing.  */
9025      if (CONSTANT_P (XEXP (x, 0)))
9026	fputs ("%g0+", file);
9027      output_address (XEXP (x, 0));
9028      fputc (']', file);
9029    }
9030  else if (GET_CODE (x) == HIGH)
9031    {
9032      fputs ("%hi(", file);
9033      output_addr_const (file, XEXP (x, 0));
9034      fputc (')', file);
9035    }
9036  else if (GET_CODE (x) == LO_SUM)
9037    {
9038      sparc_print_operand (file, XEXP (x, 0), 0);
9039      if (TARGET_CM_MEDMID)
9040	fputs ("+%l44(", file);
9041      else
9042	fputs ("+%lo(", file);
9043      output_addr_const (file, XEXP (x, 1));
9044      fputc (')', file);
9045    }
9046  else if (GET_CODE (x) == CONST_DOUBLE
9047	   && (GET_MODE (x) == VOIDmode
9048	       || GET_MODE_CLASS (GET_MODE (x)) == MODE_INT))
9049    {
9050      if (CONST_DOUBLE_HIGH (x) == 0)
9051	fprintf (file, "%u", (unsigned int) CONST_DOUBLE_LOW (x));
9052      else if (CONST_DOUBLE_HIGH (x) == -1
9053	       && CONST_DOUBLE_LOW (x) < 0)
9054	fprintf (file, "%d", (int) CONST_DOUBLE_LOW (x));
9055      else
9056	output_operand_lossage ("long long constant not a valid immediate operand");
9057    }
9058  else if (GET_CODE (x) == CONST_DOUBLE)
9059    output_operand_lossage ("floating point constant not a valid immediate operand");
9060  else { output_addr_const (file, x); }
9061}
9062
9063/* Implement TARGET_PRINT_OPERAND_ADDRESS.  */
9064
9065static void
9066sparc_print_operand_address (FILE *file, rtx x)
9067{
9068  register rtx base, index = 0;
9069  int offset = 0;
9070  register rtx addr = x;
9071
9072  if (REG_P (addr))
9073    fputs (reg_names[REGNO (addr)], file);
9074  else if (GET_CODE (addr) == PLUS)
9075    {
9076      if (CONST_INT_P (XEXP (addr, 0)))
9077	offset = INTVAL (XEXP (addr, 0)), base = XEXP (addr, 1);
9078      else if (CONST_INT_P (XEXP (addr, 1)))
9079	offset = INTVAL (XEXP (addr, 1)), base = XEXP (addr, 0);
9080      else
9081	base = XEXP (addr, 0), index = XEXP (addr, 1);
9082      if (GET_CODE (base) == LO_SUM)
9083	{
9084	  gcc_assert (USE_AS_OFFSETABLE_LO10
9085		      && TARGET_ARCH64
9086		      && ! TARGET_CM_MEDMID);
9087	  output_operand (XEXP (base, 0), 0);
9088	  fputs ("+%lo(", file);
9089	  output_address (XEXP (base, 1));
9090	  fprintf (file, ")+%d", offset);
9091	}
9092      else
9093	{
9094	  fputs (reg_names[REGNO (base)], file);
9095	  if (index == 0)
9096	    fprintf (file, "%+d", offset);
9097	  else if (REG_P (index))
9098	    fprintf (file, "+%s", reg_names[REGNO (index)]);
9099	  else if (GET_CODE (index) == SYMBOL_REF
9100		   || GET_CODE (index) == LABEL_REF
9101		   || GET_CODE (index) == CONST)
9102	    fputc ('+', file), output_addr_const (file, index);
9103	  else gcc_unreachable ();
9104	}
9105    }
9106  else if (GET_CODE (addr) == MINUS
9107	   && GET_CODE (XEXP (addr, 1)) == LABEL_REF)
9108    {
9109      output_addr_const (file, XEXP (addr, 0));
9110      fputs ("-(", file);
9111      output_addr_const (file, XEXP (addr, 1));
9112      fputs ("-.)", file);
9113    }
9114  else if (GET_CODE (addr) == LO_SUM)
9115    {
9116      output_operand (XEXP (addr, 0), 0);
9117      if (TARGET_CM_MEDMID)
9118        fputs ("+%l44(", file);
9119      else
9120        fputs ("+%lo(", file);
9121      output_address (XEXP (addr, 1));
9122      fputc (')', file);
9123    }
9124  else if (flag_pic
9125	   && GET_CODE (addr) == CONST
9126	   && GET_CODE (XEXP (addr, 0)) == MINUS
9127	   && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST
9128	   && GET_CODE (XEXP (XEXP (XEXP (addr, 0), 1), 0)) == MINUS
9129	   && XEXP (XEXP (XEXP (XEXP (addr, 0), 1), 0), 1) == pc_rtx)
9130    {
9131      addr = XEXP (addr, 0);
9132      output_addr_const (file, XEXP (addr, 0));
9133      /* Group the args of the second CONST in parenthesis.  */
9134      fputs ("-(", file);
9135      /* Skip past the second CONST--it does nothing for us.  */
9136      output_addr_const (file, XEXP (XEXP (addr, 1), 0));
9137      /* Close the parenthesis.  */
9138      fputc (')', file);
9139    }
9140  else
9141    {
9142      output_addr_const (file, addr);
9143    }
9144}
9145
9146/* Target hook for assembling integer objects.  The sparc version has
9147   special handling for aligned DI-mode objects.  */
9148
9149static bool
9150sparc_assemble_integer (rtx x, unsigned int size, int aligned_p)
9151{
9152  /* ??? We only output .xword's for symbols and only then in environments
9153     where the assembler can handle them.  */
9154  if (aligned_p && size == 8
9155      && (GET_CODE (x) != CONST_INT && GET_CODE (x) != CONST_DOUBLE))
9156    {
9157      if (TARGET_V9)
9158	{
9159	  assemble_integer_with_op ("\t.xword\t", x);
9160	  return true;
9161	}
9162      else
9163	{
9164	  assemble_aligned_integer (4, const0_rtx);
9165	  assemble_aligned_integer (4, x);
9166	  return true;
9167	}
9168    }
9169  return default_assemble_integer (x, size, aligned_p);
9170}
9171
9172/* Return the value of a code used in the .proc pseudo-op that says
9173   what kind of result this function returns.  For non-C types, we pick
9174   the closest C type.  */
9175
9176#ifndef SHORT_TYPE_SIZE
9177#define SHORT_TYPE_SIZE (BITS_PER_UNIT * 2)
9178#endif
9179
9180#ifndef INT_TYPE_SIZE
9181#define INT_TYPE_SIZE BITS_PER_WORD
9182#endif
9183
9184#ifndef LONG_TYPE_SIZE
9185#define LONG_TYPE_SIZE BITS_PER_WORD
9186#endif
9187
9188#ifndef LONG_LONG_TYPE_SIZE
9189#define LONG_LONG_TYPE_SIZE (BITS_PER_WORD * 2)
9190#endif
9191
9192#ifndef FLOAT_TYPE_SIZE
9193#define FLOAT_TYPE_SIZE BITS_PER_WORD
9194#endif
9195
9196#ifndef DOUBLE_TYPE_SIZE
9197#define DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
9198#endif
9199
9200#ifndef LONG_DOUBLE_TYPE_SIZE
9201#define LONG_DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
9202#endif
9203
9204unsigned long
9205sparc_type_code (register tree type)
9206{
9207  register unsigned long qualifiers = 0;
9208  register unsigned shift;
9209
9210  /* Only the first 30 bits of the qualifier are valid.  We must refrain from
9211     setting more, since some assemblers will give an error for this.  Also,
9212     we must be careful to avoid shifts of 32 bits or more to avoid getting
9213     unpredictable results.  */
9214
9215  for (shift = 6; shift < 30; shift += 2, type = TREE_TYPE (type))
9216    {
9217      switch (TREE_CODE (type))
9218	{
9219	case ERROR_MARK:
9220	  return qualifiers;
9221
9222	case ARRAY_TYPE:
9223	  qualifiers |= (3 << shift);
9224	  break;
9225
9226	case FUNCTION_TYPE:
9227	case METHOD_TYPE:
9228	  qualifiers |= (2 << shift);
9229	  break;
9230
9231	case POINTER_TYPE:
9232	case REFERENCE_TYPE:
9233	case OFFSET_TYPE:
9234	  qualifiers |= (1 << shift);
9235	  break;
9236
9237	case RECORD_TYPE:
9238	  return (qualifiers | 8);
9239
9240	case UNION_TYPE:
9241	case QUAL_UNION_TYPE:
9242	  return (qualifiers | 9);
9243
9244	case ENUMERAL_TYPE:
9245	  return (qualifiers | 10);
9246
9247	case VOID_TYPE:
9248	  return (qualifiers | 16);
9249
9250	case INTEGER_TYPE:
9251	  /* If this is a range type, consider it to be the underlying
9252	     type.  */
9253	  if (TREE_TYPE (type) != 0)
9254	    break;
9255
9256	  /* Carefully distinguish all the standard types of C,
9257	     without messing up if the language is not C.  We do this by
9258	     testing TYPE_PRECISION and TYPE_UNSIGNED.  The old code used to
9259	     look at both the names and the above fields, but that's redundant.
9260	     Any type whose size is between two C types will be considered
9261	     to be the wider of the two types.  Also, we do not have a
9262	     special code to use for "long long", so anything wider than
9263	     long is treated the same.  Note that we can't distinguish
9264	     between "int" and "long" in this code if they are the same
9265	     size, but that's fine, since neither can the assembler.  */
9266
9267	  if (TYPE_PRECISION (type) <= CHAR_TYPE_SIZE)
9268	    return (qualifiers | (TYPE_UNSIGNED (type) ? 12 : 2));
9269
9270	  else if (TYPE_PRECISION (type) <= SHORT_TYPE_SIZE)
9271	    return (qualifiers | (TYPE_UNSIGNED (type) ? 13 : 3));
9272
9273	  else if (TYPE_PRECISION (type) <= INT_TYPE_SIZE)
9274	    return (qualifiers | (TYPE_UNSIGNED (type) ? 14 : 4));
9275
9276	  else
9277	    return (qualifiers | (TYPE_UNSIGNED (type) ? 15 : 5));
9278
9279	case REAL_TYPE:
9280	  /* If this is a range type, consider it to be the underlying
9281	     type.  */
9282	  if (TREE_TYPE (type) != 0)
9283	    break;
9284
9285	  /* Carefully distinguish all the standard types of C,
9286	     without messing up if the language is not C.  */
9287
9288	  if (TYPE_PRECISION (type) == FLOAT_TYPE_SIZE)
9289	    return (qualifiers | 6);
9290
9291	  else
9292	    return (qualifiers | 7);
9293
9294	case COMPLEX_TYPE:	/* GNU Fortran COMPLEX type.  */
9295	  /* ??? We need to distinguish between double and float complex types,
9296	     but I don't know how yet because I can't reach this code from
9297	     existing front-ends.  */
9298	  return (qualifiers | 7);	/* Who knows? */
9299
9300	case VECTOR_TYPE:
9301	case BOOLEAN_TYPE:	/* Boolean truth value type.  */
9302	case LANG_TYPE:
9303	case NULLPTR_TYPE:
9304	  return qualifiers;
9305
9306	default:
9307	  gcc_unreachable ();		/* Not a type! */
9308        }
9309    }
9310
9311  return qualifiers;
9312}
9313
9314/* Nested function support.  */
9315
9316/* Emit RTL insns to initialize the variable parts of a trampoline.
9317   FNADDR is an RTX for the address of the function's pure code.
9318   CXT is an RTX for the static chain value for the function.
9319
9320   This takes 16 insns: 2 shifts & 2 ands (to split up addresses), 4 sethi
9321   (to load in opcodes), 4 iors (to merge address and opcodes), and 4 writes
9322   (to store insns).  This is a bit excessive.  Perhaps a different
9323   mechanism would be better here.
9324
9325   Emit enough FLUSH insns to synchronize the data and instruction caches.  */
9326
9327static void
9328sparc32_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
9329{
9330  /* SPARC 32-bit trampoline:
9331
9332 	sethi	%hi(fn), %g1
9333 	sethi	%hi(static), %g2
9334 	jmp	%g1+%lo(fn)
9335 	or	%g2, %lo(static), %g2
9336
9337    SETHI i,r  = 00rr rrr1 00ii iiii iiii iiii iiii iiii
9338    JMPL r+i,d = 10dd ddd1 1100 0rrr rr1i iiii iiii iiii
9339   */
9340
9341  emit_move_insn
9342    (adjust_address (m_tramp, SImode, 0),
9343     expand_binop (SImode, ior_optab,
9344		   expand_shift (RSHIFT_EXPR, SImode, fnaddr, 10, 0, 1),
9345		   GEN_INT (trunc_int_for_mode (0x03000000, SImode)),
9346		   NULL_RTX, 1, OPTAB_DIRECT));
9347
9348  emit_move_insn
9349    (adjust_address (m_tramp, SImode, 4),
9350     expand_binop (SImode, ior_optab,
9351		   expand_shift (RSHIFT_EXPR, SImode, cxt, 10, 0, 1),
9352		   GEN_INT (trunc_int_for_mode (0x05000000, SImode)),
9353		   NULL_RTX, 1, OPTAB_DIRECT));
9354
9355  emit_move_insn
9356    (adjust_address (m_tramp, SImode, 8),
9357     expand_binop (SImode, ior_optab,
9358		   expand_and (SImode, fnaddr, GEN_INT (0x3ff), NULL_RTX),
9359		   GEN_INT (trunc_int_for_mode (0x81c06000, SImode)),
9360		   NULL_RTX, 1, OPTAB_DIRECT));
9361
9362  emit_move_insn
9363    (adjust_address (m_tramp, SImode, 12),
9364     expand_binop (SImode, ior_optab,
9365		   expand_and (SImode, cxt, GEN_INT (0x3ff), NULL_RTX),
9366		   GEN_INT (trunc_int_for_mode (0x8410a000, SImode)),
9367		   NULL_RTX, 1, OPTAB_DIRECT));
9368
9369  /* On UltraSPARC a flush flushes an entire cache line.  The trampoline is
9370     aligned on a 16 byte boundary so one flush clears it all.  */
9371  emit_insn (gen_flushsi (validize_mem (adjust_address (m_tramp, SImode, 0))));
9372  if (sparc_cpu != PROCESSOR_ULTRASPARC
9373      && sparc_cpu != PROCESSOR_ULTRASPARC3
9374      && sparc_cpu != PROCESSOR_NIAGARA
9375      && sparc_cpu != PROCESSOR_NIAGARA2
9376      && sparc_cpu != PROCESSOR_NIAGARA3
9377      && sparc_cpu != PROCESSOR_NIAGARA4)
9378    emit_insn (gen_flushsi (validize_mem (adjust_address (m_tramp, SImode, 8))));
9379
9380  /* Call __enable_execute_stack after writing onto the stack to make sure
9381     the stack address is accessible.  */
9382#ifdef HAVE_ENABLE_EXECUTE_STACK
9383  emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
9384                     LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
9385#endif
9386
9387}
9388
9389/* The 64-bit version is simpler because it makes more sense to load the
9390   values as "immediate" data out of the trampoline.  It's also easier since
9391   we can read the PC without clobbering a register.  */
9392
9393static void
9394sparc64_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
9395{
9396  /* SPARC 64-bit trampoline:
9397
9398	rd	%pc, %g1
9399	ldx	[%g1+24], %g5
9400	jmp	%g5
9401	ldx	[%g1+16], %g5
9402	+16 bytes data
9403   */
9404
9405  emit_move_insn (adjust_address (m_tramp, SImode, 0),
9406		  GEN_INT (trunc_int_for_mode (0x83414000, SImode)));
9407  emit_move_insn (adjust_address (m_tramp, SImode, 4),
9408		  GEN_INT (trunc_int_for_mode (0xca586018, SImode)));
9409  emit_move_insn (adjust_address (m_tramp, SImode, 8),
9410		  GEN_INT (trunc_int_for_mode (0x81c14000, SImode)));
9411  emit_move_insn (adjust_address (m_tramp, SImode, 12),
9412		  GEN_INT (trunc_int_for_mode (0xca586010, SImode)));
9413  emit_move_insn (adjust_address (m_tramp, DImode, 16), cxt);
9414  emit_move_insn (adjust_address (m_tramp, DImode, 24), fnaddr);
9415  emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 0))));
9416
9417  if (sparc_cpu != PROCESSOR_ULTRASPARC
9418      && sparc_cpu != PROCESSOR_ULTRASPARC3
9419      && sparc_cpu != PROCESSOR_NIAGARA
9420      && sparc_cpu != PROCESSOR_NIAGARA2
9421      && sparc_cpu != PROCESSOR_NIAGARA3
9422      && sparc_cpu != PROCESSOR_NIAGARA4)
9423    emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 8))));
9424
9425  /* Call __enable_execute_stack after writing onto the stack to make sure
9426     the stack address is accessible.  */
9427#ifdef HAVE_ENABLE_EXECUTE_STACK
9428  emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
9429                     LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
9430#endif
9431}
9432
9433/* Worker for TARGET_TRAMPOLINE_INIT.  */
9434
9435static void
9436sparc_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
9437{
9438  rtx fnaddr = force_reg (Pmode, XEXP (DECL_RTL (fndecl), 0));
9439  cxt = force_reg (Pmode, cxt);
9440  if (TARGET_ARCH64)
9441    sparc64_initialize_trampoline (m_tramp, fnaddr, cxt);
9442  else
9443    sparc32_initialize_trampoline (m_tramp, fnaddr, cxt);
9444}
9445
9446/* Adjust the cost of a scheduling dependency.  Return the new cost of
9447   a dependency LINK or INSN on DEP_INSN.  COST is the current cost.  */
9448
9449static int
9450supersparc_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn, int cost)
9451{
9452  enum attr_type insn_type;
9453
9454  if (! recog_memoized (insn))
9455    return 0;
9456
9457  insn_type = get_attr_type (insn);
9458
9459  if (REG_NOTE_KIND (link) == 0)
9460    {
9461      /* Data dependency; DEP_INSN writes a register that INSN reads some
9462	 cycles later.  */
9463
9464      /* if a load, then the dependence must be on the memory address;
9465	 add an extra "cycle".  Note that the cost could be two cycles
9466	 if the reg was written late in an instruction group; we ca not tell
9467	 here.  */
9468      if (insn_type == TYPE_LOAD || insn_type == TYPE_FPLOAD)
9469	return cost + 3;
9470
9471      /* Get the delay only if the address of the store is the dependence.  */
9472      if (insn_type == TYPE_STORE || insn_type == TYPE_FPSTORE)
9473	{
9474	  rtx pat = PATTERN(insn);
9475	  rtx dep_pat = PATTERN (dep_insn);
9476
9477	  if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
9478	    return cost;  /* This should not happen!  */
9479
9480	  /* The dependency between the two instructions was on the data that
9481	     is being stored.  Assume that this implies that the address of the
9482	     store is not dependent.  */
9483	  if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
9484	    return cost;
9485
9486	  return cost + 3;  /* An approximation.  */
9487	}
9488
9489      /* A shift instruction cannot receive its data from an instruction
9490	 in the same cycle; add a one cycle penalty.  */
9491      if (insn_type == TYPE_SHIFT)
9492	return cost + 3;   /* Split before cascade into shift.  */
9493    }
9494  else
9495    {
9496      /* Anti- or output- dependency; DEP_INSN reads/writes a register that
9497	 INSN writes some cycles later.  */
9498
9499      /* These are only significant for the fpu unit; writing a fp reg before
9500         the fpu has finished with it stalls the processor.  */
9501
9502      /* Reusing an integer register causes no problems.  */
9503      if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
9504	return 0;
9505    }
9506
9507  return cost;
9508}
9509
9510static int
9511hypersparc_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn, int cost)
9512{
9513  enum attr_type insn_type, dep_type;
9514  rtx pat = PATTERN(insn);
9515  rtx dep_pat = PATTERN (dep_insn);
9516
9517  if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
9518    return cost;
9519
9520  insn_type = get_attr_type (insn);
9521  dep_type = get_attr_type (dep_insn);
9522
9523  switch (REG_NOTE_KIND (link))
9524    {
9525    case 0:
9526      /* Data dependency; DEP_INSN writes a register that INSN reads some
9527	 cycles later.  */
9528
9529      switch (insn_type)
9530	{
9531	case TYPE_STORE:
9532	case TYPE_FPSTORE:
9533	  /* Get the delay iff the address of the store is the dependence.  */
9534	  if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
9535	    return cost;
9536
9537	  if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
9538	    return cost;
9539	  return cost + 3;
9540
9541	case TYPE_LOAD:
9542	case TYPE_SLOAD:
9543	case TYPE_FPLOAD:
9544	  /* If a load, then the dependence must be on the memory address.  If
9545	     the addresses aren't equal, then it might be a false dependency */
9546	  if (dep_type == TYPE_STORE || dep_type == TYPE_FPSTORE)
9547	    {
9548	      if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET
9549		  || GET_CODE (SET_DEST (dep_pat)) != MEM
9550		  || GET_CODE (SET_SRC (pat)) != MEM
9551		  || ! rtx_equal_p (XEXP (SET_DEST (dep_pat), 0),
9552				    XEXP (SET_SRC (pat), 0)))
9553		return cost + 2;
9554
9555	      return cost + 8;
9556	    }
9557	  break;
9558
9559	case TYPE_BRANCH:
9560	  /* Compare to branch latency is 0.  There is no benefit from
9561	     separating compare and branch.  */
9562	  if (dep_type == TYPE_COMPARE)
9563	    return 0;
9564	  /* Floating point compare to branch latency is less than
9565	     compare to conditional move.  */
9566	  if (dep_type == TYPE_FPCMP)
9567	    return cost - 1;
9568	  break;
9569	default:
9570	  break;
9571	}
9572	break;
9573
9574    case REG_DEP_ANTI:
9575      /* Anti-dependencies only penalize the fpu unit.  */
9576      if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
9577        return 0;
9578      break;
9579
9580    default:
9581      break;
9582    }
9583
9584  return cost;
9585}
9586
9587static int
9588sparc_adjust_cost(rtx_insn *insn, rtx link, rtx_insn *dep, int cost)
9589{
9590  switch (sparc_cpu)
9591    {
9592    case PROCESSOR_SUPERSPARC:
9593      cost = supersparc_adjust_cost (insn, link, dep, cost);
9594      break;
9595    case PROCESSOR_HYPERSPARC:
9596    case PROCESSOR_SPARCLITE86X:
9597      cost = hypersparc_adjust_cost (insn, link, dep, cost);
9598      break;
9599    default:
9600      break;
9601    }
9602  return cost;
9603}
9604
9605static void
9606sparc_sched_init (FILE *dump ATTRIBUTE_UNUSED,
9607		  int sched_verbose ATTRIBUTE_UNUSED,
9608		  int max_ready ATTRIBUTE_UNUSED)
9609{}
9610
9611static int
9612sparc_use_sched_lookahead (void)
9613{
9614  if (sparc_cpu == PROCESSOR_NIAGARA
9615      || sparc_cpu == PROCESSOR_NIAGARA2
9616      || sparc_cpu == PROCESSOR_NIAGARA3)
9617    return 0;
9618  if (sparc_cpu == PROCESSOR_NIAGARA4)
9619    return 2;
9620  if (sparc_cpu == PROCESSOR_ULTRASPARC
9621      || sparc_cpu == PROCESSOR_ULTRASPARC3)
9622    return 4;
9623  if ((1 << sparc_cpu) &
9624      ((1 << PROCESSOR_SUPERSPARC) | (1 << PROCESSOR_HYPERSPARC) |
9625       (1 << PROCESSOR_SPARCLITE86X)))
9626    return 3;
9627  return 0;
9628}
9629
9630static int
9631sparc_issue_rate (void)
9632{
9633  switch (sparc_cpu)
9634    {
9635    case PROCESSOR_NIAGARA:
9636    case PROCESSOR_NIAGARA2:
9637    case PROCESSOR_NIAGARA3:
9638    default:
9639      return 1;
9640    case PROCESSOR_NIAGARA4:
9641    case PROCESSOR_V9:
9642      /* Assume V9 processors are capable of at least dual-issue.  */
9643      return 2;
9644    case PROCESSOR_SUPERSPARC:
9645      return 3;
9646    case PROCESSOR_HYPERSPARC:
9647    case PROCESSOR_SPARCLITE86X:
9648      return 2;
9649    case PROCESSOR_ULTRASPARC:
9650    case PROCESSOR_ULTRASPARC3:
9651      return 4;
9652    }
9653}
9654
9655static int
9656set_extends (rtx_insn *insn)
9657{
9658  register rtx pat = PATTERN (insn);
9659
9660  switch (GET_CODE (SET_SRC (pat)))
9661    {
9662      /* Load and some shift instructions zero extend.  */
9663    case MEM:
9664    case ZERO_EXTEND:
9665      /* sethi clears the high bits */
9666    case HIGH:
9667      /* LO_SUM is used with sethi.  sethi cleared the high
9668	 bits and the values used with lo_sum are positive */
9669    case LO_SUM:
9670      /* Store flag stores 0 or 1 */
9671    case LT: case LTU:
9672    case GT: case GTU:
9673    case LE: case LEU:
9674    case GE: case GEU:
9675    case EQ:
9676    case NE:
9677      return 1;
9678    case AND:
9679      {
9680	rtx op0 = XEXP (SET_SRC (pat), 0);
9681	rtx op1 = XEXP (SET_SRC (pat), 1);
9682	if (GET_CODE (op1) == CONST_INT)
9683	  return INTVAL (op1) >= 0;
9684	if (GET_CODE (op0) != REG)
9685	  return 0;
9686	if (sparc_check_64 (op0, insn) == 1)
9687	  return 1;
9688	return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
9689      }
9690    case IOR:
9691    case XOR:
9692      {
9693	rtx op0 = XEXP (SET_SRC (pat), 0);
9694	rtx op1 = XEXP (SET_SRC (pat), 1);
9695	if (GET_CODE (op0) != REG || sparc_check_64 (op0, insn) <= 0)
9696	  return 0;
9697	if (GET_CODE (op1) == CONST_INT)
9698	  return INTVAL (op1) >= 0;
9699	return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
9700      }
9701    case LSHIFTRT:
9702      return GET_MODE (SET_SRC (pat)) == SImode;
9703      /* Positive integers leave the high bits zero.  */
9704    case CONST_DOUBLE:
9705      return ! (CONST_DOUBLE_LOW (SET_SRC (pat)) & 0x80000000);
9706    case CONST_INT:
9707      return ! (INTVAL (SET_SRC (pat)) & 0x80000000);
9708    case ASHIFTRT:
9709    case SIGN_EXTEND:
9710      return - (GET_MODE (SET_SRC (pat)) == SImode);
9711    case REG:
9712      return sparc_check_64 (SET_SRC (pat), insn);
9713    default:
9714      return 0;
9715    }
9716}
9717
9718/* We _ought_ to have only one kind per function, but...  */
9719static GTY(()) rtx sparc_addr_diff_list;
9720static GTY(()) rtx sparc_addr_list;
9721
9722void
9723sparc_defer_case_vector (rtx lab, rtx vec, int diff)
9724{
9725  vec = gen_rtx_EXPR_LIST (VOIDmode, lab, vec);
9726  if (diff)
9727    sparc_addr_diff_list
9728      = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_diff_list);
9729  else
9730    sparc_addr_list = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_list);
9731}
9732
9733static void
9734sparc_output_addr_vec (rtx vec)
9735{
9736  rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
9737  int idx, vlen = XVECLEN (body, 0);
9738
9739#ifdef ASM_OUTPUT_ADDR_VEC_START
9740  ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
9741#endif
9742
9743#ifdef ASM_OUTPUT_CASE_LABEL
9744  ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
9745			 NEXT_INSN (lab));
9746#else
9747  (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
9748#endif
9749
9750  for (idx = 0; idx < vlen; idx++)
9751    {
9752      ASM_OUTPUT_ADDR_VEC_ELT
9753	(asm_out_file, CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 0, idx), 0)));
9754    }
9755
9756#ifdef ASM_OUTPUT_ADDR_VEC_END
9757  ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
9758#endif
9759}
9760
9761static void
9762sparc_output_addr_diff_vec (rtx vec)
9763{
9764  rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
9765  rtx base = XEXP (XEXP (body, 0), 0);
9766  int idx, vlen = XVECLEN (body, 1);
9767
9768#ifdef ASM_OUTPUT_ADDR_VEC_START
9769  ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
9770#endif
9771
9772#ifdef ASM_OUTPUT_CASE_LABEL
9773  ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
9774			 NEXT_INSN (lab));
9775#else
9776  (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
9777#endif
9778
9779  for (idx = 0; idx < vlen; idx++)
9780    {
9781      ASM_OUTPUT_ADDR_DIFF_ELT
9782        (asm_out_file,
9783         body,
9784         CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 1, idx), 0)),
9785         CODE_LABEL_NUMBER (base));
9786    }
9787
9788#ifdef ASM_OUTPUT_ADDR_VEC_END
9789  ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
9790#endif
9791}
9792
9793static void
9794sparc_output_deferred_case_vectors (void)
9795{
9796  rtx t;
9797  int align;
9798
9799  if (sparc_addr_list == NULL_RTX
9800      && sparc_addr_diff_list == NULL_RTX)
9801    return;
9802
9803  /* Align to cache line in the function's code section.  */
9804  switch_to_section (current_function_section ());
9805
9806  align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
9807  if (align > 0)
9808    ASM_OUTPUT_ALIGN (asm_out_file, align);
9809
9810  for (t = sparc_addr_list; t ; t = XEXP (t, 1))
9811    sparc_output_addr_vec (XEXP (t, 0));
9812  for (t = sparc_addr_diff_list; t ; t = XEXP (t, 1))
9813    sparc_output_addr_diff_vec (XEXP (t, 0));
9814
9815  sparc_addr_list = sparc_addr_diff_list = NULL_RTX;
9816}
9817
9818/* Return 0 if the high 32 bits of X (the low word of X, if DImode) are
9819   unknown.  Return 1 if the high bits are zero, -1 if the register is
9820   sign extended.  */
9821int
9822sparc_check_64 (rtx x, rtx_insn *insn)
9823{
9824  /* If a register is set only once it is safe to ignore insns this
9825     code does not know how to handle.  The loop will either recognize
9826     the single set and return the correct value or fail to recognize
9827     it and return 0.  */
9828  int set_once = 0;
9829  rtx y = x;
9830
9831  gcc_assert (GET_CODE (x) == REG);
9832
9833  if (GET_MODE (x) == DImode)
9834    y = gen_rtx_REG (SImode, REGNO (x) + WORDS_BIG_ENDIAN);
9835
9836  if (flag_expensive_optimizations
9837      && df && DF_REG_DEF_COUNT (REGNO (y)) == 1)
9838    set_once = 1;
9839
9840  if (insn == 0)
9841    {
9842      if (set_once)
9843	insn = get_last_insn_anywhere ();
9844      else
9845	return 0;
9846    }
9847
9848  while ((insn = PREV_INSN (insn)))
9849    {
9850      switch (GET_CODE (insn))
9851	{
9852	case JUMP_INSN:
9853	case NOTE:
9854	  break;
9855	case CODE_LABEL:
9856	case CALL_INSN:
9857	default:
9858	  if (! set_once)
9859	    return 0;
9860	  break;
9861	case INSN:
9862	  {
9863	    rtx pat = PATTERN (insn);
9864	    if (GET_CODE (pat) != SET)
9865	      return 0;
9866	    if (rtx_equal_p (x, SET_DEST (pat)))
9867	      return set_extends (insn);
9868	    if (y && rtx_equal_p (y, SET_DEST (pat)))
9869	      return set_extends (insn);
9870	    if (reg_overlap_mentioned_p (SET_DEST (pat), y))
9871	      return 0;
9872	  }
9873	}
9874    }
9875  return 0;
9876}
9877
9878/* Output a wide shift instruction in V8+ mode.  INSN is the instruction,
9879   OPERANDS are its operands and OPCODE is the mnemonic to be used.  */
9880
9881const char *
9882output_v8plus_shift (rtx_insn *insn, rtx *operands, const char *opcode)
9883{
9884  static char asm_code[60];
9885
9886  /* The scratch register is only required when the destination
9887     register is not a 64-bit global or out register.  */
9888  if (which_alternative != 2)
9889    operands[3] = operands[0];
9890
9891  /* We can only shift by constants <= 63. */
9892  if (GET_CODE (operands[2]) == CONST_INT)
9893    operands[2] = GEN_INT (INTVAL (operands[2]) & 0x3f);
9894
9895  if (GET_CODE (operands[1]) == CONST_INT)
9896    {
9897      output_asm_insn ("mov\t%1, %3", operands);
9898    }
9899  else
9900    {
9901      output_asm_insn ("sllx\t%H1, 32, %3", operands);
9902      if (sparc_check_64 (operands[1], insn) <= 0)
9903	output_asm_insn ("srl\t%L1, 0, %L1", operands);
9904      output_asm_insn ("or\t%L1, %3, %3", operands);
9905    }
9906
9907  strcpy (asm_code, opcode);
9908
9909  if (which_alternative != 2)
9910    return strcat (asm_code, "\t%0, %2, %L0\n\tsrlx\t%L0, 32, %H0");
9911  else
9912    return
9913      strcat (asm_code, "\t%3, %2, %3\n\tsrlx\t%3, 32, %H0\n\tmov\t%3, %L0");
9914}
9915
9916/* Output rtl to increment the profiler label LABELNO
9917   for profiling a function entry.  */
9918
9919void
9920sparc_profile_hook (int labelno)
9921{
9922  char buf[32];
9923  rtx lab, fun;
9924
9925  fun = gen_rtx_SYMBOL_REF (Pmode, MCOUNT_FUNCTION);
9926  if (NO_PROFILE_COUNTERS)
9927    {
9928      emit_library_call (fun, LCT_NORMAL, VOIDmode, 0);
9929    }
9930  else
9931    {
9932      ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
9933      lab = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
9934      emit_library_call (fun, LCT_NORMAL, VOIDmode, 1, lab, Pmode);
9935    }
9936}
9937
9938#ifdef TARGET_SOLARIS
9939/* Solaris implementation of TARGET_ASM_NAMED_SECTION.  */
9940
9941static void
9942sparc_solaris_elf_asm_named_section (const char *name, unsigned int flags,
9943				     tree decl ATTRIBUTE_UNUSED)
9944{
9945  if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
9946    {
9947      solaris_elf_asm_comdat_section (name, flags, decl);
9948      return;
9949    }
9950
9951  fprintf (asm_out_file, "\t.section\t\"%s\"", name);
9952
9953  if (!(flags & SECTION_DEBUG))
9954    fputs (",#alloc", asm_out_file);
9955  if (flags & SECTION_WRITE)
9956    fputs (",#write", asm_out_file);
9957  if (flags & SECTION_TLS)
9958    fputs (",#tls", asm_out_file);
9959  if (flags & SECTION_CODE)
9960    fputs (",#execinstr", asm_out_file);
9961
9962  /* Sun as only supports #nobits/#progbits since Solaris 10.  */
9963  if (HAVE_AS_SPARC_NOBITS)
9964    {
9965      if (flags & SECTION_BSS)
9966	fputs (",#nobits", asm_out_file);
9967      else
9968	fputs (",#progbits", asm_out_file);
9969    }
9970
9971  fputc ('\n', asm_out_file);
9972}
9973#endif /* TARGET_SOLARIS */
9974
9975/* We do not allow indirect calls to be optimized into sibling calls.
9976
9977   We cannot use sibling calls when delayed branches are disabled
9978   because they will likely require the call delay slot to be filled.
9979
9980   Also, on SPARC 32-bit we cannot emit a sibling call when the
9981   current function returns a structure.  This is because the "unimp
9982   after call" convention would cause the callee to return to the
9983   wrong place.  The generic code already disallows cases where the
9984   function being called returns a structure.
9985
9986   It may seem strange how this last case could occur.  Usually there
9987   is code after the call which jumps to epilogue code which dumps the
9988   return value into the struct return area.  That ought to invalidate
9989   the sibling call right?  Well, in the C++ case we can end up passing
9990   the pointer to the struct return area to a constructor (which returns
9991   void) and then nothing else happens.  Such a sibling call would look
9992   valid without the added check here.
9993
9994   VxWorks PIC PLT entries require the global pointer to be initialized
9995   on entry.  We therefore can't emit sibling calls to them.  */
9996static bool
9997sparc_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
9998{
9999  return (decl
10000	  && flag_delayed_branch
10001	  && (TARGET_ARCH64 || ! cfun->returns_struct)
10002	  && !(TARGET_VXWORKS_RTP
10003	       && flag_pic
10004	       && !targetm.binds_local_p (decl)));
10005}
10006
10007/* libfunc renaming.  */
10008
10009static void
10010sparc_init_libfuncs (void)
10011{
10012  if (TARGET_ARCH32)
10013    {
10014      /* Use the subroutines that Sun's library provides for integer
10015	 multiply and divide.  The `*' prevents an underscore from
10016	 being prepended by the compiler. .umul is a little faster
10017	 than .mul.  */
10018      set_optab_libfunc (smul_optab, SImode, "*.umul");
10019      set_optab_libfunc (sdiv_optab, SImode, "*.div");
10020      set_optab_libfunc (udiv_optab, SImode, "*.udiv");
10021      set_optab_libfunc (smod_optab, SImode, "*.rem");
10022      set_optab_libfunc (umod_optab, SImode, "*.urem");
10023
10024      /* TFmode arithmetic.  These names are part of the SPARC 32bit ABI.  */
10025      set_optab_libfunc (add_optab, TFmode, "_Q_add");
10026      set_optab_libfunc (sub_optab, TFmode, "_Q_sub");
10027      set_optab_libfunc (neg_optab, TFmode, "_Q_neg");
10028      set_optab_libfunc (smul_optab, TFmode, "_Q_mul");
10029      set_optab_libfunc (sdiv_optab, TFmode, "_Q_div");
10030
10031      /* We can define the TFmode sqrt optab only if TARGET_FPU.  This
10032	 is because with soft-float, the SFmode and DFmode sqrt
10033	 instructions will be absent, and the compiler will notice and
10034	 try to use the TFmode sqrt instruction for calls to the
10035	 builtin function sqrt, but this fails.  */
10036      if (TARGET_FPU)
10037	set_optab_libfunc (sqrt_optab, TFmode, "_Q_sqrt");
10038
10039      set_optab_libfunc (eq_optab, TFmode, "_Q_feq");
10040      set_optab_libfunc (ne_optab, TFmode, "_Q_fne");
10041      set_optab_libfunc (gt_optab, TFmode, "_Q_fgt");
10042      set_optab_libfunc (ge_optab, TFmode, "_Q_fge");
10043      set_optab_libfunc (lt_optab, TFmode, "_Q_flt");
10044      set_optab_libfunc (le_optab, TFmode, "_Q_fle");
10045
10046      set_conv_libfunc (sext_optab,   TFmode, SFmode, "_Q_stoq");
10047      set_conv_libfunc (sext_optab,   TFmode, DFmode, "_Q_dtoq");
10048      set_conv_libfunc (trunc_optab,  SFmode, TFmode, "_Q_qtos");
10049      set_conv_libfunc (trunc_optab,  DFmode, TFmode, "_Q_qtod");
10050
10051      set_conv_libfunc (sfix_optab,   SImode, TFmode, "_Q_qtoi");
10052      set_conv_libfunc (ufix_optab,   SImode, TFmode, "_Q_qtou");
10053      set_conv_libfunc (sfloat_optab, TFmode, SImode, "_Q_itoq");
10054      set_conv_libfunc (ufloat_optab, TFmode, SImode, "_Q_utoq");
10055
10056      if (DITF_CONVERSION_LIBFUNCS)
10057	{
10058	  set_conv_libfunc (sfix_optab,   DImode, TFmode, "_Q_qtoll");
10059	  set_conv_libfunc (ufix_optab,   DImode, TFmode, "_Q_qtoull");
10060	  set_conv_libfunc (sfloat_optab, TFmode, DImode, "_Q_lltoq");
10061	  set_conv_libfunc (ufloat_optab, TFmode, DImode, "_Q_ulltoq");
10062	}
10063
10064      if (SUN_CONVERSION_LIBFUNCS)
10065	{
10066	  set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftoll");
10067	  set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoull");
10068	  set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtoll");
10069	  set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoull");
10070	}
10071    }
10072  if (TARGET_ARCH64)
10073    {
10074      /* In the SPARC 64bit ABI, SImode multiply and divide functions
10075	 do not exist in the library.  Make sure the compiler does not
10076	 emit calls to them by accident.  (It should always use the
10077         hardware instructions.)  */
10078      set_optab_libfunc (smul_optab, SImode, 0);
10079      set_optab_libfunc (sdiv_optab, SImode, 0);
10080      set_optab_libfunc (udiv_optab, SImode, 0);
10081      set_optab_libfunc (smod_optab, SImode, 0);
10082      set_optab_libfunc (umod_optab, SImode, 0);
10083
10084      if (SUN_INTEGER_MULTIPLY_64)
10085	{
10086	  set_optab_libfunc (smul_optab, DImode, "__mul64");
10087	  set_optab_libfunc (sdiv_optab, DImode, "__div64");
10088	  set_optab_libfunc (udiv_optab, DImode, "__udiv64");
10089	  set_optab_libfunc (smod_optab, DImode, "__rem64");
10090	  set_optab_libfunc (umod_optab, DImode, "__urem64");
10091	}
10092
10093      if (SUN_CONVERSION_LIBFUNCS)
10094	{
10095	  set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftol");
10096	  set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoul");
10097	  set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtol");
10098	  set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoul");
10099	}
10100    }
10101}
10102
10103/* SPARC builtins.  */
10104enum sparc_builtins
10105{
10106  /* FPU builtins.  */
10107  SPARC_BUILTIN_LDFSR,
10108  SPARC_BUILTIN_STFSR,
10109
10110  /* VIS 1.0 builtins.  */
10111  SPARC_BUILTIN_FPACK16,
10112  SPARC_BUILTIN_FPACK32,
10113  SPARC_BUILTIN_FPACKFIX,
10114  SPARC_BUILTIN_FEXPAND,
10115  SPARC_BUILTIN_FPMERGE,
10116  SPARC_BUILTIN_FMUL8X16,
10117  SPARC_BUILTIN_FMUL8X16AU,
10118  SPARC_BUILTIN_FMUL8X16AL,
10119  SPARC_BUILTIN_FMUL8SUX16,
10120  SPARC_BUILTIN_FMUL8ULX16,
10121  SPARC_BUILTIN_FMULD8SUX16,
10122  SPARC_BUILTIN_FMULD8ULX16,
10123  SPARC_BUILTIN_FALIGNDATAV4HI,
10124  SPARC_BUILTIN_FALIGNDATAV8QI,
10125  SPARC_BUILTIN_FALIGNDATAV2SI,
10126  SPARC_BUILTIN_FALIGNDATADI,
10127  SPARC_BUILTIN_WRGSR,
10128  SPARC_BUILTIN_RDGSR,
10129  SPARC_BUILTIN_ALIGNADDR,
10130  SPARC_BUILTIN_ALIGNADDRL,
10131  SPARC_BUILTIN_PDIST,
10132  SPARC_BUILTIN_EDGE8,
10133  SPARC_BUILTIN_EDGE8L,
10134  SPARC_BUILTIN_EDGE16,
10135  SPARC_BUILTIN_EDGE16L,
10136  SPARC_BUILTIN_EDGE32,
10137  SPARC_BUILTIN_EDGE32L,
10138  SPARC_BUILTIN_FCMPLE16,
10139  SPARC_BUILTIN_FCMPLE32,
10140  SPARC_BUILTIN_FCMPNE16,
10141  SPARC_BUILTIN_FCMPNE32,
10142  SPARC_BUILTIN_FCMPGT16,
10143  SPARC_BUILTIN_FCMPGT32,
10144  SPARC_BUILTIN_FCMPEQ16,
10145  SPARC_BUILTIN_FCMPEQ32,
10146  SPARC_BUILTIN_FPADD16,
10147  SPARC_BUILTIN_FPADD16S,
10148  SPARC_BUILTIN_FPADD32,
10149  SPARC_BUILTIN_FPADD32S,
10150  SPARC_BUILTIN_FPSUB16,
10151  SPARC_BUILTIN_FPSUB16S,
10152  SPARC_BUILTIN_FPSUB32,
10153  SPARC_BUILTIN_FPSUB32S,
10154  SPARC_BUILTIN_ARRAY8,
10155  SPARC_BUILTIN_ARRAY16,
10156  SPARC_BUILTIN_ARRAY32,
10157
10158  /* VIS 2.0 builtins.  */
10159  SPARC_BUILTIN_EDGE8N,
10160  SPARC_BUILTIN_EDGE8LN,
10161  SPARC_BUILTIN_EDGE16N,
10162  SPARC_BUILTIN_EDGE16LN,
10163  SPARC_BUILTIN_EDGE32N,
10164  SPARC_BUILTIN_EDGE32LN,
10165  SPARC_BUILTIN_BMASK,
10166  SPARC_BUILTIN_BSHUFFLEV4HI,
10167  SPARC_BUILTIN_BSHUFFLEV8QI,
10168  SPARC_BUILTIN_BSHUFFLEV2SI,
10169  SPARC_BUILTIN_BSHUFFLEDI,
10170
10171  /* VIS 3.0 builtins.  */
10172  SPARC_BUILTIN_CMASK8,
10173  SPARC_BUILTIN_CMASK16,
10174  SPARC_BUILTIN_CMASK32,
10175  SPARC_BUILTIN_FCHKSM16,
10176  SPARC_BUILTIN_FSLL16,
10177  SPARC_BUILTIN_FSLAS16,
10178  SPARC_BUILTIN_FSRL16,
10179  SPARC_BUILTIN_FSRA16,
10180  SPARC_BUILTIN_FSLL32,
10181  SPARC_BUILTIN_FSLAS32,
10182  SPARC_BUILTIN_FSRL32,
10183  SPARC_BUILTIN_FSRA32,
10184  SPARC_BUILTIN_PDISTN,
10185  SPARC_BUILTIN_FMEAN16,
10186  SPARC_BUILTIN_FPADD64,
10187  SPARC_BUILTIN_FPSUB64,
10188  SPARC_BUILTIN_FPADDS16,
10189  SPARC_BUILTIN_FPADDS16S,
10190  SPARC_BUILTIN_FPSUBS16,
10191  SPARC_BUILTIN_FPSUBS16S,
10192  SPARC_BUILTIN_FPADDS32,
10193  SPARC_BUILTIN_FPADDS32S,
10194  SPARC_BUILTIN_FPSUBS32,
10195  SPARC_BUILTIN_FPSUBS32S,
10196  SPARC_BUILTIN_FUCMPLE8,
10197  SPARC_BUILTIN_FUCMPNE8,
10198  SPARC_BUILTIN_FUCMPGT8,
10199  SPARC_BUILTIN_FUCMPEQ8,
10200  SPARC_BUILTIN_FHADDS,
10201  SPARC_BUILTIN_FHADDD,
10202  SPARC_BUILTIN_FHSUBS,
10203  SPARC_BUILTIN_FHSUBD,
10204  SPARC_BUILTIN_FNHADDS,
10205  SPARC_BUILTIN_FNHADDD,
10206  SPARC_BUILTIN_UMULXHI,
10207  SPARC_BUILTIN_XMULX,
10208  SPARC_BUILTIN_XMULXHI,
10209
10210  SPARC_BUILTIN_MAX
10211};
10212
10213static GTY (()) tree sparc_builtins[(int) SPARC_BUILTIN_MAX];
10214static enum insn_code sparc_builtins_icode[(int) SPARC_BUILTIN_MAX];
10215
10216/* Add a SPARC builtin function with NAME, ICODE, CODE and TYPE.  Return the
10217   function decl or NULL_TREE if the builtin was not added.  */
10218
10219static tree
10220def_builtin (const char *name, enum insn_code icode, enum sparc_builtins code,
10221	     tree type)
10222{
10223  tree t
10224    = add_builtin_function (name, type, code, BUILT_IN_MD, NULL, NULL_TREE);
10225
10226  if (t)
10227    {
10228      sparc_builtins[code] = t;
10229      sparc_builtins_icode[code] = icode;
10230    }
10231
10232  return t;
10233}
10234
10235/* Likewise, but also marks the function as "const".  */
10236
10237static tree
10238def_builtin_const (const char *name, enum insn_code icode,
10239		   enum sparc_builtins code, tree type)
10240{
10241  tree t = def_builtin (name, icode, code, type);
10242
10243  if (t)
10244    TREE_READONLY (t) = 1;
10245
10246  return t;
10247}
10248
10249/* Implement the TARGET_INIT_BUILTINS target hook.
10250   Create builtin functions for special SPARC instructions.  */
10251
10252static void
10253sparc_init_builtins (void)
10254{
10255  if (TARGET_FPU)
10256    sparc_fpu_init_builtins ();
10257
10258  if (TARGET_VIS)
10259    sparc_vis_init_builtins ();
10260}
10261
10262/* Create builtin functions for FPU instructions.  */
10263
10264static void
10265sparc_fpu_init_builtins (void)
10266{
10267  tree ftype
10268    = build_function_type_list (void_type_node,
10269				build_pointer_type (unsigned_type_node), 0);
10270  def_builtin ("__builtin_load_fsr", CODE_FOR_ldfsr,
10271	       SPARC_BUILTIN_LDFSR, ftype);
10272  def_builtin ("__builtin_store_fsr", CODE_FOR_stfsr,
10273	       SPARC_BUILTIN_STFSR, ftype);
10274}
10275
10276/* Create builtin functions for VIS instructions.  */
10277
10278static void
10279sparc_vis_init_builtins (void)
10280{
10281  tree v4qi = build_vector_type (unsigned_intQI_type_node, 4);
10282  tree v8qi = build_vector_type (unsigned_intQI_type_node, 8);
10283  tree v4hi = build_vector_type (intHI_type_node, 4);
10284  tree v2hi = build_vector_type (intHI_type_node, 2);
10285  tree v2si = build_vector_type (intSI_type_node, 2);
10286  tree v1si = build_vector_type (intSI_type_node, 1);
10287
10288  tree v4qi_ftype_v4hi = build_function_type_list (v4qi, v4hi, 0);
10289  tree v8qi_ftype_v2si_v8qi = build_function_type_list (v8qi, v2si, v8qi, 0);
10290  tree v2hi_ftype_v2si = build_function_type_list (v2hi, v2si, 0);
10291  tree v4hi_ftype_v4qi = build_function_type_list (v4hi, v4qi, 0);
10292  tree v8qi_ftype_v4qi_v4qi = build_function_type_list (v8qi, v4qi, v4qi, 0);
10293  tree v4hi_ftype_v4qi_v4hi = build_function_type_list (v4hi, v4qi, v4hi, 0);
10294  tree v4hi_ftype_v4qi_v2hi = build_function_type_list (v4hi, v4qi, v2hi, 0);
10295  tree v2si_ftype_v4qi_v2hi = build_function_type_list (v2si, v4qi, v2hi, 0);
10296  tree v4hi_ftype_v8qi_v4hi = build_function_type_list (v4hi, v8qi, v4hi, 0);
10297  tree v4hi_ftype_v4hi_v4hi = build_function_type_list (v4hi, v4hi, v4hi, 0);
10298  tree v2si_ftype_v2si_v2si = build_function_type_list (v2si, v2si, v2si, 0);
10299  tree v8qi_ftype_v8qi_v8qi = build_function_type_list (v8qi, v8qi, v8qi, 0);
10300  tree v2hi_ftype_v2hi_v2hi = build_function_type_list (v2hi, v2hi, v2hi, 0);
10301  tree v1si_ftype_v1si_v1si = build_function_type_list (v1si, v1si, v1si, 0);
10302  tree di_ftype_v8qi_v8qi_di = build_function_type_list (intDI_type_node,
10303							 v8qi, v8qi,
10304							 intDI_type_node, 0);
10305  tree di_ftype_v8qi_v8qi = build_function_type_list (intDI_type_node,
10306						      v8qi, v8qi, 0);
10307  tree si_ftype_v8qi_v8qi = build_function_type_list (intSI_type_node,
10308						      v8qi, v8qi, 0);
10309  tree di_ftype_di_di = build_function_type_list (intDI_type_node,
10310						  intDI_type_node,
10311						  intDI_type_node, 0);
10312  tree si_ftype_si_si = build_function_type_list (intSI_type_node,
10313						  intSI_type_node,
10314						  intSI_type_node, 0);
10315  tree ptr_ftype_ptr_si = build_function_type_list (ptr_type_node,
10316		        			    ptr_type_node,
10317					            intSI_type_node, 0);
10318  tree ptr_ftype_ptr_di = build_function_type_list (ptr_type_node,
10319		        			    ptr_type_node,
10320					            intDI_type_node, 0);
10321  tree si_ftype_ptr_ptr = build_function_type_list (intSI_type_node,
10322		        			    ptr_type_node,
10323					            ptr_type_node, 0);
10324  tree di_ftype_ptr_ptr = build_function_type_list (intDI_type_node,
10325		        			    ptr_type_node,
10326					            ptr_type_node, 0);
10327  tree si_ftype_v4hi_v4hi = build_function_type_list (intSI_type_node,
10328						      v4hi, v4hi, 0);
10329  tree si_ftype_v2si_v2si = build_function_type_list (intSI_type_node,
10330						      v2si, v2si, 0);
10331  tree di_ftype_v4hi_v4hi = build_function_type_list (intDI_type_node,
10332						      v4hi, v4hi, 0);
10333  tree di_ftype_v2si_v2si = build_function_type_list (intDI_type_node,
10334						      v2si, v2si, 0);
10335  tree void_ftype_di = build_function_type_list (void_type_node,
10336						 intDI_type_node, 0);
10337  tree di_ftype_void = build_function_type_list (intDI_type_node,
10338						 void_type_node, 0);
10339  tree void_ftype_si = build_function_type_list (void_type_node,
10340						 intSI_type_node, 0);
10341  tree sf_ftype_sf_sf = build_function_type_list (float_type_node,
10342						  float_type_node,
10343						  float_type_node, 0);
10344  tree df_ftype_df_df = build_function_type_list (double_type_node,
10345						  double_type_node,
10346						  double_type_node, 0);
10347
10348  /* Packing and expanding vectors.  */
10349  def_builtin ("__builtin_vis_fpack16", CODE_FOR_fpack16_vis,
10350	       SPARC_BUILTIN_FPACK16, v4qi_ftype_v4hi);
10351  def_builtin ("__builtin_vis_fpack32", CODE_FOR_fpack32_vis,
10352	       SPARC_BUILTIN_FPACK32, v8qi_ftype_v2si_v8qi);
10353  def_builtin ("__builtin_vis_fpackfix", CODE_FOR_fpackfix_vis,
10354	       SPARC_BUILTIN_FPACKFIX, v2hi_ftype_v2si);
10355  def_builtin_const ("__builtin_vis_fexpand", CODE_FOR_fexpand_vis,
10356		     SPARC_BUILTIN_FEXPAND, v4hi_ftype_v4qi);
10357  def_builtin_const ("__builtin_vis_fpmerge", CODE_FOR_fpmerge_vis,
10358		     SPARC_BUILTIN_FPMERGE, v8qi_ftype_v4qi_v4qi);
10359
10360  /* Multiplications.  */
10361  def_builtin_const ("__builtin_vis_fmul8x16", CODE_FOR_fmul8x16_vis,
10362		     SPARC_BUILTIN_FMUL8X16, v4hi_ftype_v4qi_v4hi);
10363  def_builtin_const ("__builtin_vis_fmul8x16au", CODE_FOR_fmul8x16au_vis,
10364		     SPARC_BUILTIN_FMUL8X16AU, v4hi_ftype_v4qi_v2hi);
10365  def_builtin_const ("__builtin_vis_fmul8x16al", CODE_FOR_fmul8x16al_vis,
10366		     SPARC_BUILTIN_FMUL8X16AL, v4hi_ftype_v4qi_v2hi);
10367  def_builtin_const ("__builtin_vis_fmul8sux16", CODE_FOR_fmul8sux16_vis,
10368		     SPARC_BUILTIN_FMUL8SUX16, v4hi_ftype_v8qi_v4hi);
10369  def_builtin_const ("__builtin_vis_fmul8ulx16", CODE_FOR_fmul8ulx16_vis,
10370		     SPARC_BUILTIN_FMUL8ULX16, v4hi_ftype_v8qi_v4hi);
10371  def_builtin_const ("__builtin_vis_fmuld8sux16", CODE_FOR_fmuld8sux16_vis,
10372		     SPARC_BUILTIN_FMULD8SUX16, v2si_ftype_v4qi_v2hi);
10373  def_builtin_const ("__builtin_vis_fmuld8ulx16", CODE_FOR_fmuld8ulx16_vis,
10374		     SPARC_BUILTIN_FMULD8ULX16, v2si_ftype_v4qi_v2hi);
10375
10376  /* Data aligning.  */
10377  def_builtin ("__builtin_vis_faligndatav4hi", CODE_FOR_faligndatav4hi_vis,
10378	       SPARC_BUILTIN_FALIGNDATAV4HI, v4hi_ftype_v4hi_v4hi);
10379  def_builtin ("__builtin_vis_faligndatav8qi", CODE_FOR_faligndatav8qi_vis,
10380	       SPARC_BUILTIN_FALIGNDATAV8QI, v8qi_ftype_v8qi_v8qi);
10381  def_builtin ("__builtin_vis_faligndatav2si", CODE_FOR_faligndatav2si_vis,
10382	       SPARC_BUILTIN_FALIGNDATAV2SI, v2si_ftype_v2si_v2si);
10383  def_builtin ("__builtin_vis_faligndatadi", CODE_FOR_faligndatav1di_vis,
10384	       SPARC_BUILTIN_FALIGNDATADI, di_ftype_di_di);
10385
10386  def_builtin ("__builtin_vis_write_gsr", CODE_FOR_wrgsr_vis,
10387	       SPARC_BUILTIN_WRGSR, void_ftype_di);
10388  def_builtin ("__builtin_vis_read_gsr", CODE_FOR_rdgsr_vis,
10389	       SPARC_BUILTIN_RDGSR, di_ftype_void);
10390
10391  if (TARGET_ARCH64)
10392    {
10393      def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrdi_vis,
10394		   SPARC_BUILTIN_ALIGNADDR, ptr_ftype_ptr_di);
10395      def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrldi_vis,
10396		   SPARC_BUILTIN_ALIGNADDRL, ptr_ftype_ptr_di);
10397    }
10398  else
10399    {
10400      def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrsi_vis,
10401		   SPARC_BUILTIN_ALIGNADDR, ptr_ftype_ptr_si);
10402      def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrlsi_vis,
10403		   SPARC_BUILTIN_ALIGNADDRL, ptr_ftype_ptr_si);
10404    }
10405
10406  /* Pixel distance.  */
10407  def_builtin_const ("__builtin_vis_pdist", CODE_FOR_pdist_vis,
10408		     SPARC_BUILTIN_PDIST, di_ftype_v8qi_v8qi_di);
10409
10410  /* Edge handling.  */
10411  if (TARGET_ARCH64)
10412    {
10413      def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8di_vis,
10414			 SPARC_BUILTIN_EDGE8, di_ftype_ptr_ptr);
10415      def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8ldi_vis,
10416			 SPARC_BUILTIN_EDGE8L, di_ftype_ptr_ptr);
10417      def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16di_vis,
10418			 SPARC_BUILTIN_EDGE16, di_ftype_ptr_ptr);
10419      def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16ldi_vis,
10420			 SPARC_BUILTIN_EDGE16L, di_ftype_ptr_ptr);
10421      def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32di_vis,
10422			 SPARC_BUILTIN_EDGE32, di_ftype_ptr_ptr);
10423      def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32ldi_vis,
10424			 SPARC_BUILTIN_EDGE32L, di_ftype_ptr_ptr);
10425    }
10426  else
10427    {
10428      def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8si_vis,
10429			 SPARC_BUILTIN_EDGE8, si_ftype_ptr_ptr);
10430      def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8lsi_vis,
10431			 SPARC_BUILTIN_EDGE8L, si_ftype_ptr_ptr);
10432      def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16si_vis,
10433			 SPARC_BUILTIN_EDGE16, si_ftype_ptr_ptr);
10434      def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16lsi_vis,
10435			 SPARC_BUILTIN_EDGE16L, si_ftype_ptr_ptr);
10436      def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32si_vis,
10437			 SPARC_BUILTIN_EDGE32, si_ftype_ptr_ptr);
10438      def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32lsi_vis,
10439			 SPARC_BUILTIN_EDGE32L, si_ftype_ptr_ptr);
10440    }
10441
10442  /* Pixel compare.  */
10443  if (TARGET_ARCH64)
10444    {
10445      def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16di_vis,
10446			 SPARC_BUILTIN_FCMPLE16, di_ftype_v4hi_v4hi);
10447      def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32di_vis,
10448			 SPARC_BUILTIN_FCMPLE32, di_ftype_v2si_v2si);
10449      def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16di_vis,
10450			 SPARC_BUILTIN_FCMPNE16, di_ftype_v4hi_v4hi);
10451      def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32di_vis,
10452			 SPARC_BUILTIN_FCMPNE32, di_ftype_v2si_v2si);
10453      def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16di_vis,
10454			 SPARC_BUILTIN_FCMPGT16, di_ftype_v4hi_v4hi);
10455      def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32di_vis,
10456			 SPARC_BUILTIN_FCMPGT32, di_ftype_v2si_v2si);
10457      def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16di_vis,
10458			 SPARC_BUILTIN_FCMPEQ16, di_ftype_v4hi_v4hi);
10459      def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32di_vis,
10460			 SPARC_BUILTIN_FCMPEQ32, di_ftype_v2si_v2si);
10461    }
10462  else
10463    {
10464      def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16si_vis,
10465			 SPARC_BUILTIN_FCMPLE16, si_ftype_v4hi_v4hi);
10466      def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32si_vis,
10467			 SPARC_BUILTIN_FCMPLE32, si_ftype_v2si_v2si);
10468      def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16si_vis,
10469			 SPARC_BUILTIN_FCMPNE16, si_ftype_v4hi_v4hi);
10470      def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32si_vis,
10471			 SPARC_BUILTIN_FCMPNE32, si_ftype_v2si_v2si);
10472      def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16si_vis,
10473			 SPARC_BUILTIN_FCMPGT16, si_ftype_v4hi_v4hi);
10474      def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32si_vis,
10475			 SPARC_BUILTIN_FCMPGT32, si_ftype_v2si_v2si);
10476      def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16si_vis,
10477			 SPARC_BUILTIN_FCMPEQ16, si_ftype_v4hi_v4hi);
10478      def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32si_vis,
10479			 SPARC_BUILTIN_FCMPEQ32, si_ftype_v2si_v2si);
10480    }
10481
10482  /* Addition and subtraction.  */
10483  def_builtin_const ("__builtin_vis_fpadd16", CODE_FOR_addv4hi3,
10484		     SPARC_BUILTIN_FPADD16, v4hi_ftype_v4hi_v4hi);
10485  def_builtin_const ("__builtin_vis_fpadd16s", CODE_FOR_addv2hi3,
10486		     SPARC_BUILTIN_FPADD16S, v2hi_ftype_v2hi_v2hi);
10487  def_builtin_const ("__builtin_vis_fpadd32", CODE_FOR_addv2si3,
10488		     SPARC_BUILTIN_FPADD32, v2si_ftype_v2si_v2si);
10489  def_builtin_const ("__builtin_vis_fpadd32s", CODE_FOR_addv1si3,
10490		     SPARC_BUILTIN_FPADD32S, v1si_ftype_v1si_v1si);
10491  def_builtin_const ("__builtin_vis_fpsub16", CODE_FOR_subv4hi3,
10492		     SPARC_BUILTIN_FPSUB16, v4hi_ftype_v4hi_v4hi);
10493  def_builtin_const ("__builtin_vis_fpsub16s", CODE_FOR_subv2hi3,
10494		     SPARC_BUILTIN_FPSUB16S, v2hi_ftype_v2hi_v2hi);
10495  def_builtin_const ("__builtin_vis_fpsub32", CODE_FOR_subv2si3,
10496		     SPARC_BUILTIN_FPSUB32, v2si_ftype_v2si_v2si);
10497  def_builtin_const ("__builtin_vis_fpsub32s", CODE_FOR_subv1si3,
10498		     SPARC_BUILTIN_FPSUB32S, v1si_ftype_v1si_v1si);
10499
10500  /* Three-dimensional array addressing.  */
10501  if (TARGET_ARCH64)
10502    {
10503      def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8di_vis,
10504			 SPARC_BUILTIN_ARRAY8, di_ftype_di_di);
10505      def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16di_vis,
10506			 SPARC_BUILTIN_ARRAY16, di_ftype_di_di);
10507      def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32di_vis,
10508			 SPARC_BUILTIN_ARRAY32, di_ftype_di_di);
10509    }
10510  else
10511    {
10512      def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8si_vis,
10513			 SPARC_BUILTIN_ARRAY8, si_ftype_si_si);
10514      def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16si_vis,
10515			 SPARC_BUILTIN_ARRAY16, si_ftype_si_si);
10516      def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32si_vis,
10517			 SPARC_BUILTIN_ARRAY32, si_ftype_si_si);
10518    }
10519
10520  if (TARGET_VIS2)
10521    {
10522      /* Edge handling.  */
10523      if (TARGET_ARCH64)
10524	{
10525	  def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8ndi_vis,
10526			     SPARC_BUILTIN_EDGE8N, di_ftype_ptr_ptr);
10527	  def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lndi_vis,
10528			     SPARC_BUILTIN_EDGE8LN, di_ftype_ptr_ptr);
10529	  def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16ndi_vis,
10530			     SPARC_BUILTIN_EDGE16N, di_ftype_ptr_ptr);
10531	  def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lndi_vis,
10532			     SPARC_BUILTIN_EDGE16LN, di_ftype_ptr_ptr);
10533	  def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32ndi_vis,
10534			     SPARC_BUILTIN_EDGE32N, di_ftype_ptr_ptr);
10535	  def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lndi_vis,
10536			     SPARC_BUILTIN_EDGE32LN, di_ftype_ptr_ptr);
10537	}
10538      else
10539	{
10540	  def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8nsi_vis,
10541			     SPARC_BUILTIN_EDGE8N, si_ftype_ptr_ptr);
10542	  def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lnsi_vis,
10543			     SPARC_BUILTIN_EDGE8LN, si_ftype_ptr_ptr);
10544	  def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16nsi_vis,
10545			     SPARC_BUILTIN_EDGE16N, si_ftype_ptr_ptr);
10546	  def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lnsi_vis,
10547			     SPARC_BUILTIN_EDGE16LN, si_ftype_ptr_ptr);
10548	  def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32nsi_vis,
10549			     SPARC_BUILTIN_EDGE32N, si_ftype_ptr_ptr);
10550	  def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lnsi_vis,
10551			     SPARC_BUILTIN_EDGE32LN, si_ftype_ptr_ptr);
10552	}
10553
10554      /* Byte mask and shuffle.  */
10555      if (TARGET_ARCH64)
10556	def_builtin ("__builtin_vis_bmask", CODE_FOR_bmaskdi_vis,
10557		     SPARC_BUILTIN_BMASK, di_ftype_di_di);
10558      else
10559	def_builtin ("__builtin_vis_bmask", CODE_FOR_bmasksi_vis,
10560		     SPARC_BUILTIN_BMASK, si_ftype_si_si);
10561      def_builtin ("__builtin_vis_bshufflev4hi", CODE_FOR_bshufflev4hi_vis,
10562		   SPARC_BUILTIN_BSHUFFLEV4HI, v4hi_ftype_v4hi_v4hi);
10563      def_builtin ("__builtin_vis_bshufflev8qi", CODE_FOR_bshufflev8qi_vis,
10564		   SPARC_BUILTIN_BSHUFFLEV8QI, v8qi_ftype_v8qi_v8qi);
10565      def_builtin ("__builtin_vis_bshufflev2si", CODE_FOR_bshufflev2si_vis,
10566		   SPARC_BUILTIN_BSHUFFLEV2SI, v2si_ftype_v2si_v2si);
10567      def_builtin ("__builtin_vis_bshuffledi", CODE_FOR_bshufflev1di_vis,
10568		   SPARC_BUILTIN_BSHUFFLEDI, di_ftype_di_di);
10569    }
10570
10571  if (TARGET_VIS3)
10572    {
10573      if (TARGET_ARCH64)
10574	{
10575	  def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8di_vis,
10576		       SPARC_BUILTIN_CMASK8, void_ftype_di);
10577	  def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16di_vis,
10578		       SPARC_BUILTIN_CMASK16, void_ftype_di);
10579	  def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32di_vis,
10580		       SPARC_BUILTIN_CMASK32, void_ftype_di);
10581	}
10582      else
10583	{
10584	  def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8si_vis,
10585		       SPARC_BUILTIN_CMASK8, void_ftype_si);
10586	  def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16si_vis,
10587		       SPARC_BUILTIN_CMASK16, void_ftype_si);
10588	  def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32si_vis,
10589		       SPARC_BUILTIN_CMASK32, void_ftype_si);
10590	}
10591
10592      def_builtin_const ("__builtin_vis_fchksm16", CODE_FOR_fchksm16_vis,
10593			 SPARC_BUILTIN_FCHKSM16, v4hi_ftype_v4hi_v4hi);
10594
10595      def_builtin_const ("__builtin_vis_fsll16", CODE_FOR_vashlv4hi3,
10596			 SPARC_BUILTIN_FSLL16, v4hi_ftype_v4hi_v4hi);
10597      def_builtin_const ("__builtin_vis_fslas16", CODE_FOR_vssashlv4hi3,
10598			 SPARC_BUILTIN_FSLAS16, v4hi_ftype_v4hi_v4hi);
10599      def_builtin_const ("__builtin_vis_fsrl16", CODE_FOR_vlshrv4hi3,
10600			 SPARC_BUILTIN_FSRL16, v4hi_ftype_v4hi_v4hi);
10601      def_builtin_const ("__builtin_vis_fsra16", CODE_FOR_vashrv4hi3,
10602			 SPARC_BUILTIN_FSRA16, v4hi_ftype_v4hi_v4hi);
10603      def_builtin_const ("__builtin_vis_fsll32", CODE_FOR_vashlv2si3,
10604			 SPARC_BUILTIN_FSLL32, v2si_ftype_v2si_v2si);
10605      def_builtin_const ("__builtin_vis_fslas32", CODE_FOR_vssashlv2si3,
10606			 SPARC_BUILTIN_FSLAS32, v2si_ftype_v2si_v2si);
10607      def_builtin_const ("__builtin_vis_fsrl32", CODE_FOR_vlshrv2si3,
10608			 SPARC_BUILTIN_FSRL32, v2si_ftype_v2si_v2si);
10609      def_builtin_const ("__builtin_vis_fsra32", CODE_FOR_vashrv2si3,
10610			 SPARC_BUILTIN_FSRA32, v2si_ftype_v2si_v2si);
10611
10612      if (TARGET_ARCH64)
10613	def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistndi_vis,
10614			   SPARC_BUILTIN_PDISTN, di_ftype_v8qi_v8qi);
10615      else
10616	def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistnsi_vis,
10617			   SPARC_BUILTIN_PDISTN, si_ftype_v8qi_v8qi);
10618
10619      def_builtin_const ("__builtin_vis_fmean16", CODE_FOR_fmean16_vis,
10620			 SPARC_BUILTIN_FMEAN16, v4hi_ftype_v4hi_v4hi);
10621      def_builtin_const ("__builtin_vis_fpadd64", CODE_FOR_fpadd64_vis,
10622			 SPARC_BUILTIN_FPADD64, di_ftype_di_di);
10623      def_builtin_const ("__builtin_vis_fpsub64", CODE_FOR_fpsub64_vis,
10624			 SPARC_BUILTIN_FPSUB64, di_ftype_di_di);
10625
10626      def_builtin_const ("__builtin_vis_fpadds16", CODE_FOR_ssaddv4hi3,
10627			 SPARC_BUILTIN_FPADDS16, v4hi_ftype_v4hi_v4hi);
10628      def_builtin_const ("__builtin_vis_fpadds16s", CODE_FOR_ssaddv2hi3,
10629			 SPARC_BUILTIN_FPADDS16S, v2hi_ftype_v2hi_v2hi);
10630      def_builtin_const ("__builtin_vis_fpsubs16", CODE_FOR_sssubv4hi3,
10631			 SPARC_BUILTIN_FPSUBS16, v4hi_ftype_v4hi_v4hi);
10632      def_builtin_const ("__builtin_vis_fpsubs16s", CODE_FOR_sssubv2hi3,
10633			 SPARC_BUILTIN_FPSUBS16S, v2hi_ftype_v2hi_v2hi);
10634      def_builtin_const ("__builtin_vis_fpadds32", CODE_FOR_ssaddv2si3,
10635			 SPARC_BUILTIN_FPADDS32, v2si_ftype_v2si_v2si);
10636      def_builtin_const ("__builtin_vis_fpadds32s", CODE_FOR_ssaddv1si3,
10637			 SPARC_BUILTIN_FPADDS32S, v1si_ftype_v1si_v1si);
10638      def_builtin_const ("__builtin_vis_fpsubs32", CODE_FOR_sssubv2si3,
10639			 SPARC_BUILTIN_FPSUBS32, v2si_ftype_v2si_v2si);
10640      def_builtin_const ("__builtin_vis_fpsubs32s", CODE_FOR_sssubv1si3,
10641			 SPARC_BUILTIN_FPSUBS32S, v1si_ftype_v1si_v1si);
10642
10643      if (TARGET_ARCH64)
10644	{
10645	  def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8di_vis,
10646			     SPARC_BUILTIN_FUCMPLE8, di_ftype_v8qi_v8qi);
10647	  def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8di_vis,
10648			     SPARC_BUILTIN_FUCMPNE8, di_ftype_v8qi_v8qi);
10649	  def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8di_vis,
10650			     SPARC_BUILTIN_FUCMPGT8, di_ftype_v8qi_v8qi);
10651	  def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8di_vis,
10652			     SPARC_BUILTIN_FUCMPEQ8, di_ftype_v8qi_v8qi);
10653	}
10654      else
10655	{
10656	  def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8si_vis,
10657			     SPARC_BUILTIN_FUCMPLE8, si_ftype_v8qi_v8qi);
10658	  def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8si_vis,
10659			     SPARC_BUILTIN_FUCMPNE8, si_ftype_v8qi_v8qi);
10660	  def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8si_vis,
10661			     SPARC_BUILTIN_FUCMPGT8, si_ftype_v8qi_v8qi);
10662	  def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8si_vis,
10663			     SPARC_BUILTIN_FUCMPEQ8, si_ftype_v8qi_v8qi);
10664	}
10665
10666      def_builtin_const ("__builtin_vis_fhadds", CODE_FOR_fhaddsf_vis,
10667			 SPARC_BUILTIN_FHADDS, sf_ftype_sf_sf);
10668      def_builtin_const ("__builtin_vis_fhaddd", CODE_FOR_fhadddf_vis,
10669			 SPARC_BUILTIN_FHADDD, df_ftype_df_df);
10670      def_builtin_const ("__builtin_vis_fhsubs", CODE_FOR_fhsubsf_vis,
10671			 SPARC_BUILTIN_FHSUBS, sf_ftype_sf_sf);
10672      def_builtin_const ("__builtin_vis_fhsubd", CODE_FOR_fhsubdf_vis,
10673			 SPARC_BUILTIN_FHSUBD, df_ftype_df_df);
10674      def_builtin_const ("__builtin_vis_fnhadds", CODE_FOR_fnhaddsf_vis,
10675			 SPARC_BUILTIN_FNHADDS, sf_ftype_sf_sf);
10676      def_builtin_const ("__builtin_vis_fnhaddd", CODE_FOR_fnhadddf_vis,
10677			 SPARC_BUILTIN_FNHADDD, df_ftype_df_df);
10678
10679      def_builtin_const ("__builtin_vis_umulxhi", CODE_FOR_umulxhi_vis,
10680			 SPARC_BUILTIN_UMULXHI, di_ftype_di_di);
10681      def_builtin_const ("__builtin_vis_xmulx", CODE_FOR_xmulx_vis,
10682			 SPARC_BUILTIN_XMULX, di_ftype_di_di);
10683      def_builtin_const ("__builtin_vis_xmulxhi", CODE_FOR_xmulxhi_vis,
10684			 SPARC_BUILTIN_XMULXHI, di_ftype_di_di);
10685    }
10686}
10687
10688/* Implement TARGET_BUILTIN_DECL hook.  */
10689
10690static tree
10691sparc_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
10692{
10693  if (code >= SPARC_BUILTIN_MAX)
10694    return error_mark_node;
10695
10696  return sparc_builtins[code];
10697}
10698
10699/* Implemented TARGET_EXPAND_BUILTIN hook.  */
10700
10701static rtx
10702sparc_expand_builtin (tree exp, rtx target,
10703		      rtx subtarget ATTRIBUTE_UNUSED,
10704		      machine_mode tmode ATTRIBUTE_UNUSED,
10705		      int ignore ATTRIBUTE_UNUSED)
10706{
10707  tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
10708  enum sparc_builtins code = (enum sparc_builtins) DECL_FUNCTION_CODE (fndecl);
10709  enum insn_code icode = sparc_builtins_icode[code];
10710  bool nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
10711  call_expr_arg_iterator iter;
10712  int arg_count = 0;
10713  rtx pat, op[4];
10714  tree arg;
10715
10716  if (nonvoid)
10717    {
10718      machine_mode tmode = insn_data[icode].operand[0].mode;
10719      if (!target
10720	  || GET_MODE (target) != tmode
10721	  || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
10722	op[0] = gen_reg_rtx (tmode);
10723      else
10724	op[0] = target;
10725    }
10726
10727  FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
10728    {
10729      const struct insn_operand_data *insn_op;
10730      int idx;
10731
10732      if (arg == error_mark_node)
10733	return NULL_RTX;
10734
10735      arg_count++;
10736      idx = arg_count - !nonvoid;
10737      insn_op = &insn_data[icode].operand[idx];
10738      op[arg_count] = expand_normal (arg);
10739
10740      if (code == SPARC_BUILTIN_LDFSR || code == SPARC_BUILTIN_STFSR)
10741	{
10742	  if (!address_operand (op[arg_count], SImode))
10743	    {
10744	      op[arg_count] = convert_memory_address (Pmode, op[arg_count]);
10745	      op[arg_count] = copy_addr_to_reg (op[arg_count]);
10746	    }
10747	  op[arg_count] = gen_rtx_MEM (SImode, op[arg_count]);
10748	}
10749
10750      else if (insn_op->mode == V1DImode
10751	       && GET_MODE (op[arg_count]) == DImode)
10752	op[arg_count] = gen_lowpart (V1DImode, op[arg_count]);
10753
10754      else if (insn_op->mode == V1SImode
10755	       && GET_MODE (op[arg_count]) == SImode)
10756	op[arg_count] = gen_lowpart (V1SImode, op[arg_count]);
10757
10758      if (! (*insn_data[icode].operand[idx].predicate) (op[arg_count],
10759							insn_op->mode))
10760	op[arg_count] = copy_to_mode_reg (insn_op->mode, op[arg_count]);
10761    }
10762
10763  switch (arg_count)
10764    {
10765    case 0:
10766      pat = GEN_FCN (icode) (op[0]);
10767      break;
10768    case 1:
10769      if (nonvoid)
10770	pat = GEN_FCN (icode) (op[0], op[1]);
10771      else
10772	pat = GEN_FCN (icode) (op[1]);
10773      break;
10774    case 2:
10775      pat = GEN_FCN (icode) (op[0], op[1], op[2]);
10776      break;
10777    case 3:
10778      pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
10779      break;
10780    default:
10781      gcc_unreachable ();
10782    }
10783
10784  if (!pat)
10785    return NULL_RTX;
10786
10787  emit_insn (pat);
10788
10789  return (nonvoid ? op[0] : const0_rtx);
10790}
10791
10792/* Return the upper 16 bits of the 8x16 multiplication.  */
10793
10794static int
10795sparc_vis_mul8x16 (int e8, int e16)
10796{
10797  return (e8 * e16 + 128) / 256;
10798}
10799
10800/* Multiply the VECTOR_CSTs CST0 and CST1 as specified by FNCODE and put
10801   the result into the array N_ELTS, whose elements are of INNER_TYPE.  */
10802
10803static void
10804sparc_handle_vis_mul8x16 (tree *n_elts, enum sparc_builtins fncode,
10805			  tree inner_type, tree cst0, tree cst1)
10806{
10807  unsigned i, num = VECTOR_CST_NELTS (cst0);
10808  int scale;
10809
10810  switch (fncode)
10811    {
10812    case SPARC_BUILTIN_FMUL8X16:
10813      for (i = 0; i < num; ++i)
10814	{
10815	  int val
10816	    = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
10817				 TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, i)));
10818	  n_elts[i] = build_int_cst (inner_type, val);
10819	}
10820      break;
10821
10822    case SPARC_BUILTIN_FMUL8X16AU:
10823      scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 0));
10824
10825      for (i = 0; i < num; ++i)
10826	{
10827	  int val
10828	    = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
10829				 scale);
10830	  n_elts[i] = build_int_cst (inner_type, val);
10831	}
10832      break;
10833
10834    case SPARC_BUILTIN_FMUL8X16AL:
10835      scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 1));
10836
10837      for (i = 0; i < num; ++i)
10838	{
10839	  int val
10840	    = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
10841				 scale);
10842	  n_elts[i] = build_int_cst (inner_type, val);
10843	}
10844      break;
10845
10846    default:
10847      gcc_unreachable ();
10848    }
10849}
10850
10851/* Implement TARGET_FOLD_BUILTIN hook.
10852
10853   Fold builtin functions for SPARC intrinsics.  If IGNORE is true the
10854   result of the function call is ignored.  NULL_TREE is returned if the
10855   function could not be folded.  */
10856
10857static tree
10858sparc_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED,
10859		    tree *args, bool ignore)
10860{
10861  enum sparc_builtins code = (enum sparc_builtins) DECL_FUNCTION_CODE (fndecl);
10862  tree rtype = TREE_TYPE (TREE_TYPE (fndecl));
10863  tree arg0, arg1, arg2;
10864
10865  if (ignore)
10866    switch (code)
10867      {
10868      case SPARC_BUILTIN_LDFSR:
10869      case SPARC_BUILTIN_STFSR:
10870      case SPARC_BUILTIN_ALIGNADDR:
10871      case SPARC_BUILTIN_WRGSR:
10872      case SPARC_BUILTIN_BMASK:
10873      case SPARC_BUILTIN_CMASK8:
10874      case SPARC_BUILTIN_CMASK16:
10875      case SPARC_BUILTIN_CMASK32:
10876	break;
10877
10878      default:
10879	return build_zero_cst (rtype);
10880      }
10881
10882  switch (code)
10883    {
10884    case SPARC_BUILTIN_FEXPAND:
10885      arg0 = args[0];
10886      STRIP_NOPS (arg0);
10887
10888      if (TREE_CODE (arg0) == VECTOR_CST)
10889	{
10890	  tree inner_type = TREE_TYPE (rtype);
10891	  tree *n_elts;
10892	  unsigned i;
10893
10894	  n_elts = XALLOCAVEC (tree, VECTOR_CST_NELTS (arg0));
10895	  for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
10896	    n_elts[i] = build_int_cst (inner_type,
10897				       TREE_INT_CST_LOW
10898				         (VECTOR_CST_ELT (arg0, i)) << 4);
10899	  return build_vector (rtype, n_elts);
10900	}
10901      break;
10902
10903    case SPARC_BUILTIN_FMUL8X16:
10904    case SPARC_BUILTIN_FMUL8X16AU:
10905    case SPARC_BUILTIN_FMUL8X16AL:
10906      arg0 = args[0];
10907      arg1 = args[1];
10908      STRIP_NOPS (arg0);
10909      STRIP_NOPS (arg1);
10910
10911      if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
10912	{
10913	  tree inner_type = TREE_TYPE (rtype);
10914	  tree *n_elts = XALLOCAVEC (tree, VECTOR_CST_NELTS (arg0));
10915	  sparc_handle_vis_mul8x16 (n_elts, code, inner_type, arg0, arg1);
10916	  return build_vector (rtype, n_elts);
10917	}
10918      break;
10919
10920    case SPARC_BUILTIN_FPMERGE:
10921      arg0 = args[0];
10922      arg1 = args[1];
10923      STRIP_NOPS (arg0);
10924      STRIP_NOPS (arg1);
10925
10926      if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
10927	{
10928	  tree *n_elts = XALLOCAVEC (tree, 2 * VECTOR_CST_NELTS (arg0));
10929	  unsigned i;
10930	  for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
10931	    {
10932	      n_elts[2*i] = VECTOR_CST_ELT (arg0, i);
10933	      n_elts[2*i+1] = VECTOR_CST_ELT (arg1, i);
10934	    }
10935
10936	  return build_vector (rtype, n_elts);
10937	}
10938      break;
10939
10940    case SPARC_BUILTIN_PDIST:
10941    case SPARC_BUILTIN_PDISTN:
10942      arg0 = args[0];
10943      arg1 = args[1];
10944      STRIP_NOPS (arg0);
10945      STRIP_NOPS (arg1);
10946      if (code == SPARC_BUILTIN_PDIST)
10947	{
10948	  arg2 = args[2];
10949	  STRIP_NOPS (arg2);
10950	}
10951      else
10952	arg2 = integer_zero_node;
10953
10954      if (TREE_CODE (arg0) == VECTOR_CST
10955	  && TREE_CODE (arg1) == VECTOR_CST
10956	  && TREE_CODE (arg2) == INTEGER_CST)
10957	{
10958	  bool overflow = false;
10959	  widest_int result = wi::to_widest (arg2);
10960	  widest_int tmp;
10961	  unsigned i;
10962
10963	  for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
10964	    {
10965	      tree e0 = VECTOR_CST_ELT (arg0, i);
10966	      tree e1 = VECTOR_CST_ELT (arg1, i);
10967
10968	      bool neg1_ovf, neg2_ovf, add1_ovf, add2_ovf;
10969
10970	      tmp = wi::neg (wi::to_widest (e1), &neg1_ovf);
10971	      tmp = wi::add (wi::to_widest (e0), tmp, SIGNED, &add1_ovf);
10972	      if (wi::neg_p (tmp))
10973		tmp = wi::neg (tmp, &neg2_ovf);
10974	      else
10975		neg2_ovf = false;
10976	      result = wi::add (result, tmp, SIGNED, &add2_ovf);
10977	      overflow |= neg1_ovf | neg2_ovf | add1_ovf | add2_ovf;
10978	    }
10979
10980	  gcc_assert (!overflow);
10981
10982	  return wide_int_to_tree (rtype, result);
10983	}
10984
10985    default:
10986      break;
10987    }
10988
10989  return NULL_TREE;
10990}
10991
10992/* ??? This duplicates information provided to the compiler by the
10993   ??? scheduler description.  Some day, teach genautomata to output
10994   ??? the latencies and then CSE will just use that.  */
10995
10996static bool
10997sparc_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
10998		 int *total, bool speed ATTRIBUTE_UNUSED)
10999{
11000  machine_mode mode = GET_MODE (x);
11001  bool float_mode_p = FLOAT_MODE_P (mode);
11002
11003  switch (code)
11004    {
11005    case CONST_INT:
11006      if (INTVAL (x) < 0x1000 && INTVAL (x) >= -0x1000)
11007	{
11008	  *total = 0;
11009	  return true;
11010	}
11011      /* FALLTHRU */
11012
11013    case HIGH:
11014      *total = 2;
11015      return true;
11016
11017    case CONST:
11018    case LABEL_REF:
11019    case SYMBOL_REF:
11020      *total = 4;
11021      return true;
11022
11023    case CONST_DOUBLE:
11024      if (GET_MODE (x) == VOIDmode
11025	  && ((CONST_DOUBLE_HIGH (x) == 0
11026	       && CONST_DOUBLE_LOW (x) < 0x1000)
11027	      || (CONST_DOUBLE_HIGH (x) == -1
11028		  && CONST_DOUBLE_LOW (x) < 0
11029		  && CONST_DOUBLE_LOW (x) >= -0x1000)))
11030	*total = 0;
11031      else
11032	*total = 8;
11033      return true;
11034
11035    case MEM:
11036      /* If outer-code was a sign or zero extension, a cost
11037	 of COSTS_N_INSNS (1) was already added in.  This is
11038	 why we are subtracting it back out.  */
11039      if (outer_code == ZERO_EXTEND)
11040	{
11041	  *total = sparc_costs->int_zload - COSTS_N_INSNS (1);
11042	}
11043      else if (outer_code == SIGN_EXTEND)
11044	{
11045	  *total = sparc_costs->int_sload - COSTS_N_INSNS (1);
11046	}
11047      else if (float_mode_p)
11048	{
11049	  *total = sparc_costs->float_load;
11050	}
11051      else
11052	{
11053	  *total = sparc_costs->int_load;
11054	}
11055
11056      return true;
11057
11058    case PLUS:
11059    case MINUS:
11060      if (float_mode_p)
11061	*total = sparc_costs->float_plusminus;
11062      else
11063	*total = COSTS_N_INSNS (1);
11064      return false;
11065
11066    case FMA:
11067      {
11068	rtx sub;
11069
11070	gcc_assert (float_mode_p);
11071	*total = sparc_costs->float_mul;
11072
11073	sub = XEXP (x, 0);
11074	if (GET_CODE (sub) == NEG)
11075	  sub = XEXP (sub, 0);
11076	*total += rtx_cost (sub, FMA, 0, speed);
11077
11078	sub = XEXP (x, 2);
11079	if (GET_CODE (sub) == NEG)
11080	  sub = XEXP (sub, 0);
11081	*total += rtx_cost (sub, FMA, 2, speed);
11082	return true;
11083      }
11084
11085    case MULT:
11086      if (float_mode_p)
11087	*total = sparc_costs->float_mul;
11088      else if (TARGET_ARCH32 && !TARGET_HARD_MUL)
11089	*total = COSTS_N_INSNS (25);
11090      else
11091	{
11092	  int bit_cost;
11093
11094	  bit_cost = 0;
11095	  if (sparc_costs->int_mul_bit_factor)
11096	    {
11097	      int nbits;
11098
11099	      if (GET_CODE (XEXP (x, 1)) == CONST_INT)
11100		{
11101		  unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
11102		  for (nbits = 0; value != 0; value &= value - 1)
11103		    nbits++;
11104		}
11105	      else if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
11106		       && GET_MODE (XEXP (x, 1)) == VOIDmode)
11107		{
11108		  rtx x1 = XEXP (x, 1);
11109		  unsigned HOST_WIDE_INT value1 = CONST_DOUBLE_LOW (x1);
11110		  unsigned HOST_WIDE_INT value2 = CONST_DOUBLE_HIGH (x1);
11111
11112		  for (nbits = 0; value1 != 0; value1 &= value1 - 1)
11113		    nbits++;
11114		  for (; value2 != 0; value2 &= value2 - 1)
11115		    nbits++;
11116		}
11117	      else
11118		nbits = 7;
11119
11120	      if (nbits < 3)
11121		nbits = 3;
11122	      bit_cost = (nbits - 3) / sparc_costs->int_mul_bit_factor;
11123	      bit_cost = COSTS_N_INSNS (bit_cost);
11124	    }
11125
11126	  if (mode == DImode || !TARGET_HARD_MUL)
11127	    *total = sparc_costs->int_mulX + bit_cost;
11128	  else
11129	    *total = sparc_costs->int_mul + bit_cost;
11130	}
11131      return false;
11132
11133    case ASHIFT:
11134    case ASHIFTRT:
11135    case LSHIFTRT:
11136      *total = COSTS_N_INSNS (1) + sparc_costs->shift_penalty;
11137      return false;
11138
11139    case DIV:
11140    case UDIV:
11141    case MOD:
11142    case UMOD:
11143      if (float_mode_p)
11144	{
11145	  if (mode == DFmode)
11146	    *total = sparc_costs->float_div_df;
11147	  else
11148	    *total = sparc_costs->float_div_sf;
11149	}
11150      else
11151	{
11152	  if (mode == DImode)
11153	    *total = sparc_costs->int_divX;
11154	  else
11155	    *total = sparc_costs->int_div;
11156	}
11157      return false;
11158
11159    case NEG:
11160      if (! float_mode_p)
11161	{
11162	  *total = COSTS_N_INSNS (1);
11163	  return false;
11164	}
11165      /* FALLTHRU */
11166
11167    case ABS:
11168    case FLOAT:
11169    case UNSIGNED_FLOAT:
11170    case FIX:
11171    case UNSIGNED_FIX:
11172    case FLOAT_EXTEND:
11173    case FLOAT_TRUNCATE:
11174      *total = sparc_costs->float_move;
11175      return false;
11176
11177    case SQRT:
11178      if (mode == DFmode)
11179	*total = sparc_costs->float_sqrt_df;
11180      else
11181	*total = sparc_costs->float_sqrt_sf;
11182      return false;
11183
11184    case COMPARE:
11185      if (float_mode_p)
11186	*total = sparc_costs->float_cmp;
11187      else
11188	*total = COSTS_N_INSNS (1);
11189      return false;
11190
11191    case IF_THEN_ELSE:
11192      if (float_mode_p)
11193	*total = sparc_costs->float_cmove;
11194      else
11195	*total = sparc_costs->int_cmove;
11196      return false;
11197
11198    case IOR:
11199      /* Handle the NAND vector patterns.  */
11200      if (sparc_vector_mode_supported_p (GET_MODE (x))
11201	  && GET_CODE (XEXP (x, 0)) == NOT
11202	  && GET_CODE (XEXP (x, 1)) == NOT)
11203	{
11204	  *total = COSTS_N_INSNS (1);
11205	  return true;
11206	}
11207      else
11208        return false;
11209
11210    default:
11211      return false;
11212    }
11213}
11214
11215/* Return true if CLASS is either GENERAL_REGS or I64_REGS.  */
11216
11217static inline bool
11218general_or_i64_p (reg_class_t rclass)
11219{
11220  return (rclass == GENERAL_REGS || rclass == I64_REGS);
11221}
11222
11223/* Implement TARGET_REGISTER_MOVE_COST.  */
11224
11225static int
11226sparc_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
11227			  reg_class_t from, reg_class_t to)
11228{
11229  bool need_memory = false;
11230
11231  if (from == FPCC_REGS || to == FPCC_REGS)
11232    need_memory = true;
11233  else if ((FP_REG_CLASS_P (from) && general_or_i64_p (to))
11234	   || (general_or_i64_p (from) && FP_REG_CLASS_P (to)))
11235    {
11236      if (TARGET_VIS3)
11237	{
11238	  int size = GET_MODE_SIZE (mode);
11239	  if (size == 8 || size == 4)
11240	    {
11241	      if (! TARGET_ARCH32 || size == 4)
11242		return 4;
11243	      else
11244		return 6;
11245	    }
11246	}
11247      need_memory = true;
11248    }
11249
11250  if (need_memory)
11251    {
11252      if (sparc_cpu == PROCESSOR_ULTRASPARC
11253	  || sparc_cpu == PROCESSOR_ULTRASPARC3
11254	  || sparc_cpu == PROCESSOR_NIAGARA
11255	  || sparc_cpu == PROCESSOR_NIAGARA2
11256	  || sparc_cpu == PROCESSOR_NIAGARA3
11257	  || sparc_cpu == PROCESSOR_NIAGARA4)
11258	return 12;
11259
11260      return 6;
11261    }
11262
11263  return 2;
11264}
11265
11266/* Emit the sequence of insns SEQ while preserving the registers REG and REG2.
11267   This is achieved by means of a manual dynamic stack space allocation in
11268   the current frame.  We make the assumption that SEQ doesn't contain any
11269   function calls, with the possible exception of calls to the GOT helper.  */
11270
11271static void
11272emit_and_preserve (rtx seq, rtx reg, rtx reg2)
11273{
11274  /* We must preserve the lowest 16 words for the register save area.  */
11275  HOST_WIDE_INT offset = 16*UNITS_PER_WORD;
11276  /* We really need only 2 words of fresh stack space.  */
11277  HOST_WIDE_INT size = SPARC_STACK_ALIGN (offset + 2*UNITS_PER_WORD);
11278
11279  rtx slot
11280    = gen_rtx_MEM (word_mode, plus_constant (Pmode, stack_pointer_rtx,
11281					     SPARC_STACK_BIAS + offset));
11282
11283  emit_insn (gen_stack_pointer_inc (GEN_INT (-size)));
11284  emit_insn (gen_rtx_SET (VOIDmode, slot, reg));
11285  if (reg2)
11286    emit_insn (gen_rtx_SET (VOIDmode,
11287			    adjust_address (slot, word_mode, UNITS_PER_WORD),
11288			    reg2));
11289  emit_insn (seq);
11290  if (reg2)
11291    emit_insn (gen_rtx_SET (VOIDmode,
11292			    reg2,
11293			    adjust_address (slot, word_mode, UNITS_PER_WORD)));
11294  emit_insn (gen_rtx_SET (VOIDmode, reg, slot));
11295  emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
11296}
11297
11298/* Output the assembler code for a thunk function.  THUNK_DECL is the
11299   declaration for the thunk function itself, FUNCTION is the decl for
11300   the target function.  DELTA is an immediate constant offset to be
11301   added to THIS.  If VCALL_OFFSET is nonzero, the word at address
11302   (*THIS + VCALL_OFFSET) should be additionally added to THIS.  */
11303
11304static void
11305sparc_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
11306		       HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
11307		       tree function)
11308{
11309  rtx this_rtx, funexp;
11310  rtx_insn *insn;
11311  unsigned int int_arg_first;
11312
11313  reload_completed = 1;
11314  epilogue_completed = 1;
11315
11316  emit_note (NOTE_INSN_PROLOGUE_END);
11317
11318  if (TARGET_FLAT)
11319    {
11320      sparc_leaf_function_p = 1;
11321
11322      int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
11323    }
11324  else if (flag_delayed_branch)
11325    {
11326      /* We will emit a regular sibcall below, so we need to instruct
11327	 output_sibcall that we are in a leaf function.  */
11328      sparc_leaf_function_p = crtl->uses_only_leaf_regs = 1;
11329
11330      /* This will cause final.c to invoke leaf_renumber_regs so we
11331	 must behave as if we were in a not-yet-leafified function.  */
11332      int_arg_first = SPARC_INCOMING_INT_ARG_FIRST;
11333    }
11334  else
11335    {
11336      /* We will emit the sibcall manually below, so we will need to
11337	 manually spill non-leaf registers.  */
11338      sparc_leaf_function_p = crtl->uses_only_leaf_regs = 0;
11339
11340      /* We really are in a leaf function.  */
11341      int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
11342    }
11343
11344  /* Find the "this" pointer.  Normally in %o0, but in ARCH64 if the function
11345     returns a structure, the structure return pointer is there instead.  */
11346  if (TARGET_ARCH64
11347      && aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
11348    this_rtx = gen_rtx_REG (Pmode, int_arg_first + 1);
11349  else
11350    this_rtx = gen_rtx_REG (Pmode, int_arg_first);
11351
11352  /* Add DELTA.  When possible use a plain add, otherwise load it into
11353     a register first.  */
11354  if (delta)
11355    {
11356      rtx delta_rtx = GEN_INT (delta);
11357
11358      if (! SPARC_SIMM13_P (delta))
11359	{
11360	  rtx scratch = gen_rtx_REG (Pmode, 1);
11361	  emit_move_insn (scratch, delta_rtx);
11362	  delta_rtx = scratch;
11363	}
11364
11365      /* THIS_RTX += DELTA.  */
11366      emit_insn (gen_add2_insn (this_rtx, delta_rtx));
11367    }
11368
11369  /* Add the word at address (*THIS_RTX + VCALL_OFFSET).  */
11370  if (vcall_offset)
11371    {
11372      rtx vcall_offset_rtx = GEN_INT (vcall_offset);
11373      rtx scratch = gen_rtx_REG (Pmode, 1);
11374
11375      gcc_assert (vcall_offset < 0);
11376
11377      /* SCRATCH = *THIS_RTX.  */
11378      emit_move_insn (scratch, gen_rtx_MEM (Pmode, this_rtx));
11379
11380      /* Prepare for adding VCALL_OFFSET.  The difficulty is that we
11381	 may not have any available scratch register at this point.  */
11382      if (SPARC_SIMM13_P (vcall_offset))
11383	;
11384      /* This is the case if ARCH64 (unless -ffixed-g5 is passed).  */
11385      else if (! fixed_regs[5]
11386	       /* The below sequence is made up of at least 2 insns,
11387		  while the default method may need only one.  */
11388	       && vcall_offset < -8192)
11389	{
11390	  rtx scratch2 = gen_rtx_REG (Pmode, 5);
11391	  emit_move_insn (scratch2, vcall_offset_rtx);
11392	  vcall_offset_rtx = scratch2;
11393	}
11394      else
11395	{
11396	  rtx increment = GEN_INT (-4096);
11397
11398	  /* VCALL_OFFSET is a negative number whose typical range can be
11399	     estimated as -32768..0 in 32-bit mode.  In almost all cases
11400	     it is therefore cheaper to emit multiple add insns than
11401	     spilling and loading the constant into a register (at least
11402	     6 insns).  */
11403	  while (! SPARC_SIMM13_P (vcall_offset))
11404	    {
11405	      emit_insn (gen_add2_insn (scratch, increment));
11406	      vcall_offset += 4096;
11407	    }
11408	  vcall_offset_rtx = GEN_INT (vcall_offset); /* cannot be 0 */
11409	}
11410
11411      /* SCRATCH = *(*THIS_RTX + VCALL_OFFSET).  */
11412      emit_move_insn (scratch, gen_rtx_MEM (Pmode,
11413					    gen_rtx_PLUS (Pmode,
11414							  scratch,
11415							  vcall_offset_rtx)));
11416
11417      /* THIS_RTX += *(*THIS_RTX + VCALL_OFFSET).  */
11418      emit_insn (gen_add2_insn (this_rtx, scratch));
11419    }
11420
11421  /* Generate a tail call to the target function.  */
11422  if (! TREE_USED (function))
11423    {
11424      assemble_external (function);
11425      TREE_USED (function) = 1;
11426    }
11427  funexp = XEXP (DECL_RTL (function), 0);
11428
11429  if (flag_delayed_branch)
11430    {
11431      funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
11432      insn = emit_call_insn (gen_sibcall (funexp));
11433      SIBLING_CALL_P (insn) = 1;
11434    }
11435  else
11436    {
11437      /* The hoops we have to jump through in order to generate a sibcall
11438	 without using delay slots...  */
11439      rtx spill_reg, seq, scratch = gen_rtx_REG (Pmode, 1);
11440
11441      if (flag_pic)
11442        {
11443	  spill_reg = gen_rtx_REG (word_mode, 15);  /* %o7 */
11444	  start_sequence ();
11445	  load_got_register ();  /* clobbers %o7 */
11446	  scratch = sparc_legitimize_pic_address (funexp, scratch);
11447	  seq = get_insns ();
11448	  end_sequence ();
11449	  emit_and_preserve (seq, spill_reg, pic_offset_table_rtx);
11450	}
11451      else if (TARGET_ARCH32)
11452	{
11453	  emit_insn (gen_rtx_SET (VOIDmode,
11454				  scratch,
11455				  gen_rtx_HIGH (SImode, funexp)));
11456	  emit_insn (gen_rtx_SET (VOIDmode,
11457				  scratch,
11458				  gen_rtx_LO_SUM (SImode, scratch, funexp)));
11459	}
11460      else  /* TARGET_ARCH64 */
11461        {
11462	  switch (sparc_cmodel)
11463	    {
11464	    case CM_MEDLOW:
11465	    case CM_MEDMID:
11466	      /* The destination can serve as a temporary.  */
11467	      sparc_emit_set_symbolic_const64 (scratch, funexp, scratch);
11468	      break;
11469
11470	    case CM_MEDANY:
11471	    case CM_EMBMEDANY:
11472	      /* The destination cannot serve as a temporary.  */
11473	      spill_reg = gen_rtx_REG (DImode, 15);  /* %o7 */
11474	      start_sequence ();
11475	      sparc_emit_set_symbolic_const64 (scratch, funexp, spill_reg);
11476	      seq = get_insns ();
11477	      end_sequence ();
11478	      emit_and_preserve (seq, spill_reg, 0);
11479	      break;
11480
11481	    default:
11482	      gcc_unreachable ();
11483	    }
11484	}
11485
11486      emit_jump_insn (gen_indirect_jump (scratch));
11487    }
11488
11489  emit_barrier ();
11490
11491  /* Run just enough of rest_of_compilation to get the insns emitted.
11492     There's not really enough bulk here to make other passes such as
11493     instruction scheduling worth while.  Note that use_thunk calls
11494     assemble_start_function and assemble_end_function.  */
11495  insn = get_insns ();
11496  shorten_branches (insn);
11497  final_start_function (insn, file, 1);
11498  final (insn, file, 1);
11499  final_end_function ();
11500
11501  reload_completed = 0;
11502  epilogue_completed = 0;
11503}
11504
11505/* Return true if sparc_output_mi_thunk would be able to output the
11506   assembler code for the thunk function specified by the arguments
11507   it is passed, and false otherwise.  */
11508static bool
11509sparc_can_output_mi_thunk (const_tree thunk_fndecl ATTRIBUTE_UNUSED,
11510			   HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
11511			   HOST_WIDE_INT vcall_offset,
11512			   const_tree function ATTRIBUTE_UNUSED)
11513{
11514  /* Bound the loop used in the default method above.  */
11515  return (vcall_offset >= -32768 || ! fixed_regs[5]);
11516}
11517
11518/* How to allocate a 'struct machine_function'.  */
11519
11520static struct machine_function *
11521sparc_init_machine_status (void)
11522{
11523  return ggc_cleared_alloc<machine_function> ();
11524}
11525
11526/* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
11527   We need to emit DTP-relative relocations.  */
11528
11529static void
11530sparc_output_dwarf_dtprel (FILE *file, int size, rtx x)
11531{
11532  switch (size)
11533    {
11534    case 4:
11535      fputs ("\t.word\t%r_tls_dtpoff32(", file);
11536      break;
11537    case 8:
11538      fputs ("\t.xword\t%r_tls_dtpoff64(", file);
11539      break;
11540    default:
11541      gcc_unreachable ();
11542    }
11543  output_addr_const (file, x);
11544  fputs (")", file);
11545}
11546
11547/* Do whatever processing is required at the end of a file.  */
11548
11549static void
11550sparc_file_end (void)
11551{
11552  /* If we need to emit the special GOT helper function, do so now.  */
11553  if (got_helper_rtx)
11554    {
11555      const char *name = XSTR (got_helper_rtx, 0);
11556      const char *reg_name = reg_names[GLOBAL_OFFSET_TABLE_REGNUM];
11557#ifdef DWARF2_UNWIND_INFO
11558      bool do_cfi;
11559#endif
11560
11561      if (USE_HIDDEN_LINKONCE)
11562	{
11563	  tree decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
11564				  get_identifier (name),
11565				  build_function_type_list (void_type_node,
11566                                                            NULL_TREE));
11567	  DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
11568					   NULL_TREE, void_type_node);
11569	  TREE_PUBLIC (decl) = 1;
11570	  TREE_STATIC (decl) = 1;
11571	  make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
11572	  DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
11573	  DECL_VISIBILITY_SPECIFIED (decl) = 1;
11574	  resolve_unique_section (decl, 0, flag_function_sections);
11575	  allocate_struct_function (decl, true);
11576	  cfun->is_thunk = 1;
11577	  current_function_decl = decl;
11578	  init_varasm_status ();
11579	  assemble_start_function (decl, name);
11580	}
11581      else
11582	{
11583	  const int align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
11584          switch_to_section (text_section);
11585	  if (align > 0)
11586	    ASM_OUTPUT_ALIGN (asm_out_file, align);
11587	  ASM_OUTPUT_LABEL (asm_out_file, name);
11588	}
11589
11590#ifdef DWARF2_UNWIND_INFO
11591      do_cfi = dwarf2out_do_cfi_asm ();
11592      if (do_cfi)
11593	fprintf (asm_out_file, "\t.cfi_startproc\n");
11594#endif
11595      if (flag_delayed_branch)
11596	fprintf (asm_out_file, "\tjmp\t%%o7+8\n\t add\t%%o7, %s, %s\n",
11597		 reg_name, reg_name);
11598      else
11599	fprintf (asm_out_file, "\tadd\t%%o7, %s, %s\n\tjmp\t%%o7+8\n\t nop\n",
11600		 reg_name, reg_name);
11601#ifdef DWARF2_UNWIND_INFO
11602      if (do_cfi)
11603	fprintf (asm_out_file, "\t.cfi_endproc\n");
11604#endif
11605    }
11606
11607  if (NEED_INDICATE_EXEC_STACK)
11608    file_end_indicate_exec_stack ();
11609
11610#ifdef TARGET_SOLARIS
11611  solaris_file_end ();
11612#endif
11613}
11614
11615#ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
11616/* Implement TARGET_MANGLE_TYPE.  */
11617
11618static const char *
11619sparc_mangle_type (const_tree type)
11620{
11621  if (!TARGET_64BIT
11622      && TYPE_MAIN_VARIANT (type) == long_double_type_node
11623      && TARGET_LONG_DOUBLE_128)
11624    return "g";
11625
11626  /* For all other types, use normal C++ mangling.  */
11627  return NULL;
11628}
11629#endif
11630
11631/* Expand a membar instruction for various use cases.  Both the LOAD_STORE
11632   and BEFORE_AFTER arguments of the form X_Y.  They are two-bit masks where
11633   bit 0 indicates that X is true, and bit 1 indicates Y is true.  */
11634
11635void
11636sparc_emit_membar_for_model (enum memmodel model,
11637			     int load_store, int before_after)
11638{
11639  /* Bits for the MEMBAR mmask field.  */
11640  const int LoadLoad = 1;
11641  const int StoreLoad = 2;
11642  const int LoadStore = 4;
11643  const int StoreStore = 8;
11644
11645  int mm = 0, implied = 0;
11646
11647  switch (sparc_memory_model)
11648    {
11649    case SMM_SC:
11650      /* Sequential Consistency.  All memory transactions are immediately
11651	 visible in sequential execution order.  No barriers needed.  */
11652      implied = LoadLoad | StoreLoad | LoadStore | StoreStore;
11653      break;
11654
11655    case SMM_TSO:
11656      /* Total Store Ordering: all memory transactions with store semantics
11657	 are followed by an implied StoreStore.  */
11658      implied |= StoreStore;
11659
11660      /* If we're not looking for a raw barrer (before+after), then atomic
11661	 operations get the benefit of being both load and store.  */
11662      if (load_store == 3 && before_after == 1)
11663	implied |= StoreLoad;
11664      /* FALLTHRU */
11665
11666    case SMM_PSO:
11667      /* Partial Store Ordering: all memory transactions with load semantics
11668	 are followed by an implied LoadLoad | LoadStore.  */
11669      implied |= LoadLoad | LoadStore;
11670
11671      /* If we're not looking for a raw barrer (before+after), then atomic
11672	 operations get the benefit of being both load and store.  */
11673      if (load_store == 3 && before_after == 2)
11674	implied |= StoreLoad | StoreStore;
11675      /* FALLTHRU */
11676
11677    case SMM_RMO:
11678      /* Relaxed Memory Ordering: no implicit bits.  */
11679      break;
11680
11681    default:
11682      gcc_unreachable ();
11683    }
11684
11685  if (before_after & 1)
11686    {
11687      if (is_mm_release (model) || is_mm_acq_rel (model)
11688	  || is_mm_seq_cst (model))
11689	{
11690	  if (load_store & 1)
11691	    mm |= LoadLoad | StoreLoad;
11692	  if (load_store & 2)
11693	    mm |= LoadStore | StoreStore;
11694	}
11695    }
11696  if (before_after & 2)
11697    {
11698      if (is_mm_acquire (model) || is_mm_acq_rel (model)
11699	  || is_mm_seq_cst (model))
11700	{
11701	  if (load_store & 1)
11702	    mm |= LoadLoad | LoadStore;
11703	  if (load_store & 2)
11704	    mm |= StoreLoad | StoreStore;
11705	}
11706    }
11707
11708  /* Remove the bits implied by the system memory model.  */
11709  mm &= ~implied;
11710
11711  /* For raw barriers (before+after), always emit a barrier.
11712     This will become a compile-time barrier if needed.  */
11713  if (mm || before_after == 3)
11714    emit_insn (gen_membar (GEN_INT (mm)));
11715}
11716
11717/* Expand code to perform a 8 or 16-bit compare and swap by doing 32-bit
11718   compare and swap on the word containing the byte or half-word.  */
11719
11720static void
11721sparc_expand_compare_and_swap_12 (rtx bool_result, rtx result, rtx mem,
11722				  rtx oldval, rtx newval)
11723{
11724  rtx addr1 = force_reg (Pmode, XEXP (mem, 0));
11725  rtx addr = gen_reg_rtx (Pmode);
11726  rtx off = gen_reg_rtx (SImode);
11727  rtx oldv = gen_reg_rtx (SImode);
11728  rtx newv = gen_reg_rtx (SImode);
11729  rtx oldvalue = gen_reg_rtx (SImode);
11730  rtx newvalue = gen_reg_rtx (SImode);
11731  rtx res = gen_reg_rtx (SImode);
11732  rtx resv = gen_reg_rtx (SImode);
11733  rtx memsi, val, mask, cc;
11734
11735  emit_insn (gen_rtx_SET (VOIDmode, addr,
11736			  gen_rtx_AND (Pmode, addr1, GEN_INT (-4))));
11737
11738  if (Pmode != SImode)
11739    addr1 = gen_lowpart (SImode, addr1);
11740  emit_insn (gen_rtx_SET (VOIDmode, off,
11741			  gen_rtx_AND (SImode, addr1, GEN_INT (3))));
11742
11743  memsi = gen_rtx_MEM (SImode, addr);
11744  set_mem_alias_set (memsi, ALIAS_SET_MEMORY_BARRIER);
11745  MEM_VOLATILE_P (memsi) = MEM_VOLATILE_P (mem);
11746
11747  val = copy_to_reg (memsi);
11748
11749  emit_insn (gen_rtx_SET (VOIDmode, off,
11750			  gen_rtx_XOR (SImode, off,
11751				       GEN_INT (GET_MODE (mem) == QImode
11752						? 3 : 2))));
11753
11754  emit_insn (gen_rtx_SET (VOIDmode, off,
11755			  gen_rtx_ASHIFT (SImode, off, GEN_INT (3))));
11756
11757  if (GET_MODE (mem) == QImode)
11758    mask = force_reg (SImode, GEN_INT (0xff));
11759  else
11760    mask = force_reg (SImode, GEN_INT (0xffff));
11761
11762  emit_insn (gen_rtx_SET (VOIDmode, mask,
11763			  gen_rtx_ASHIFT (SImode, mask, off)));
11764
11765  emit_insn (gen_rtx_SET (VOIDmode, val,
11766			  gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
11767				       val)));
11768
11769  oldval = gen_lowpart (SImode, oldval);
11770  emit_insn (gen_rtx_SET (VOIDmode, oldv,
11771			  gen_rtx_ASHIFT (SImode, oldval, off)));
11772
11773  newval = gen_lowpart_common (SImode, newval);
11774  emit_insn (gen_rtx_SET (VOIDmode, newv,
11775			  gen_rtx_ASHIFT (SImode, newval, off)));
11776
11777  emit_insn (gen_rtx_SET (VOIDmode, oldv,
11778			  gen_rtx_AND (SImode, oldv, mask)));
11779
11780  emit_insn (gen_rtx_SET (VOIDmode, newv,
11781			  gen_rtx_AND (SImode, newv, mask)));
11782
11783  rtx_code_label *end_label = gen_label_rtx ();
11784  rtx_code_label *loop_label = gen_label_rtx ();
11785  emit_label (loop_label);
11786
11787  emit_insn (gen_rtx_SET (VOIDmode, oldvalue,
11788			  gen_rtx_IOR (SImode, oldv, val)));
11789
11790  emit_insn (gen_rtx_SET (VOIDmode, newvalue,
11791			  gen_rtx_IOR (SImode, newv, val)));
11792
11793  emit_move_insn (bool_result, const1_rtx);
11794
11795  emit_insn (gen_atomic_compare_and_swapsi_1 (res, memsi, oldvalue, newvalue));
11796
11797  emit_cmp_and_jump_insns (res, oldvalue, EQ, NULL, SImode, 0, end_label);
11798
11799  emit_insn (gen_rtx_SET (VOIDmode, resv,
11800			  gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
11801				       res)));
11802
11803  emit_move_insn (bool_result, const0_rtx);
11804
11805  cc = gen_compare_reg_1 (NE, resv, val);
11806  emit_insn (gen_rtx_SET (VOIDmode, val, resv));
11807
11808  /* Use cbranchcc4 to separate the compare and branch!  */
11809  emit_jump_insn (gen_cbranchcc4 (gen_rtx_NE (VOIDmode, cc, const0_rtx),
11810				  cc, const0_rtx, loop_label));
11811
11812  emit_label (end_label);
11813
11814  emit_insn (gen_rtx_SET (VOIDmode, res,
11815			  gen_rtx_AND (SImode, res, mask)));
11816
11817  emit_insn (gen_rtx_SET (VOIDmode, res,
11818			  gen_rtx_LSHIFTRT (SImode, res, off)));
11819
11820  emit_move_insn (result, gen_lowpart (GET_MODE (result), res));
11821}
11822
11823/* Expand code to perform a compare-and-swap.  */
11824
11825void
11826sparc_expand_compare_and_swap (rtx operands[])
11827{
11828  rtx bval, retval, mem, oldval, newval;
11829  machine_mode mode;
11830  enum memmodel model;
11831
11832  bval = operands[0];
11833  retval = operands[1];
11834  mem = operands[2];
11835  oldval = operands[3];
11836  newval = operands[4];
11837  model = (enum memmodel) INTVAL (operands[6]);
11838  mode = GET_MODE (mem);
11839
11840  sparc_emit_membar_for_model (model, 3, 1);
11841
11842  if (reg_overlap_mentioned_p (retval, oldval))
11843    oldval = copy_to_reg (oldval);
11844
11845  if (mode == QImode || mode == HImode)
11846    sparc_expand_compare_and_swap_12 (bval, retval, mem, oldval, newval);
11847  else
11848    {
11849      rtx (*gen) (rtx, rtx, rtx, rtx);
11850      rtx x;
11851
11852      if (mode == SImode)
11853	gen = gen_atomic_compare_and_swapsi_1;
11854      else
11855	gen = gen_atomic_compare_and_swapdi_1;
11856      emit_insn (gen (retval, mem, oldval, newval));
11857
11858      x = emit_store_flag (bval, EQ, retval, oldval, mode, 1, 1);
11859      if (x != bval)
11860	convert_move (bval, x, 1);
11861    }
11862
11863  sparc_emit_membar_for_model (model, 3, 2);
11864}
11865
11866void
11867sparc_expand_vec_perm_bmask (machine_mode vmode, rtx sel)
11868{
11869  rtx t_1, t_2, t_3;
11870
11871  sel = gen_lowpart (DImode, sel);
11872  switch (vmode)
11873    {
11874    case V2SImode:
11875      /* inp = xxxxxxxAxxxxxxxB */
11876      t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
11877				 NULL_RTX, 1, OPTAB_DIRECT);
11878      /* t_1 = ....xxxxxxxAxxx. */
11879      sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel),
11880				 GEN_INT (3), NULL_RTX, 1, OPTAB_DIRECT);
11881      t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1),
11882				 GEN_INT (0x30000), NULL_RTX, 1, OPTAB_DIRECT);
11883      /* sel = .......B */
11884      /* t_1 = ...A.... */
11885      sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT);
11886      /* sel = ...A...B */
11887      sel = expand_mult (SImode, sel, GEN_INT (0x4444), sel, 1);
11888      /* sel = AAAABBBB * 4 */
11889      t_1 = force_reg (SImode, GEN_INT (0x01230123));
11890      /* sel = { A*4, A*4+1, A*4+2, ... } */
11891      break;
11892
11893    case V4HImode:
11894      /* inp = xxxAxxxBxxxCxxxD */
11895      t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8),
11896				 NULL_RTX, 1, OPTAB_DIRECT);
11897      t_2 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
11898				 NULL_RTX, 1, OPTAB_DIRECT);
11899      t_3 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (24),
11900				 NULL_RTX, 1, OPTAB_DIRECT);
11901      /* t_1 = ..xxxAxxxBxxxCxx */
11902      /* t_2 = ....xxxAxxxBxxxC */
11903      /* t_3 = ......xxxAxxxBxx */
11904      sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel),
11905				 GEN_INT (0x07),
11906				 NULL_RTX, 1, OPTAB_DIRECT);
11907      t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1),
11908				 GEN_INT (0x0700),
11909				 NULL_RTX, 1, OPTAB_DIRECT);
11910      t_2 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_2),
11911				 GEN_INT (0x070000),
11912				 NULL_RTX, 1, OPTAB_DIRECT);
11913      t_3 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_3),
11914				 GEN_INT (0x07000000),
11915				 NULL_RTX, 1, OPTAB_DIRECT);
11916      /* sel = .......D */
11917      /* t_1 = .....C.. */
11918      /* t_2 = ...B.... */
11919      /* t_3 = .A...... */
11920      sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT);
11921      t_2 = expand_simple_binop (SImode, IOR, t_2, t_3, t_2, 1, OPTAB_DIRECT);
11922      sel = expand_simple_binop (SImode, IOR, sel, t_2, sel, 1, OPTAB_DIRECT);
11923      /* sel = .A.B.C.D */
11924      sel = expand_mult (SImode, sel, GEN_INT (0x22), sel, 1);
11925      /* sel = AABBCCDD * 2 */
11926      t_1 = force_reg (SImode, GEN_INT (0x01010101));
11927      /* sel = { A*2, A*2+1, B*2, B*2+1, ... } */
11928      break;
11929
11930    case V8QImode:
11931      /* input = xAxBxCxDxExFxGxH */
11932      sel = expand_simple_binop (DImode, AND, sel,
11933				 GEN_INT ((HOST_WIDE_INT)0x0f0f0f0f << 32
11934					  | 0x0f0f0f0f),
11935				 NULL_RTX, 1, OPTAB_DIRECT);
11936      /* sel = .A.B.C.D.E.F.G.H */
11937      t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (4),
11938				 NULL_RTX, 1, OPTAB_DIRECT);
11939      /* t_1 = ..A.B.C.D.E.F.G. */
11940      sel = expand_simple_binop (DImode, IOR, sel, t_1,
11941				 NULL_RTX, 1, OPTAB_DIRECT);
11942      /* sel = .AABBCCDDEEFFGGH */
11943      sel = expand_simple_binop (DImode, AND, sel,
11944				 GEN_INT ((HOST_WIDE_INT)0xff00ff << 32
11945					  | 0xff00ff),
11946				 NULL_RTX, 1, OPTAB_DIRECT);
11947      /* sel = ..AB..CD..EF..GH */
11948      t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8),
11949				 NULL_RTX, 1, OPTAB_DIRECT);
11950      /* t_1 = ....AB..CD..EF.. */
11951      sel = expand_simple_binop (DImode, IOR, sel, t_1,
11952				 NULL_RTX, 1, OPTAB_DIRECT);
11953      /* sel = ..ABABCDCDEFEFGH */
11954      sel = expand_simple_binop (DImode, AND, sel,
11955				 GEN_INT ((HOST_WIDE_INT)0xffff << 32 | 0xffff),
11956				 NULL_RTX, 1, OPTAB_DIRECT);
11957      /* sel = ....ABCD....EFGH */
11958      t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
11959				 NULL_RTX, 1, OPTAB_DIRECT);
11960      /* t_1 = ........ABCD.... */
11961      sel = gen_lowpart (SImode, sel);
11962      t_1 = gen_lowpart (SImode, t_1);
11963      break;
11964
11965    default:
11966      gcc_unreachable ();
11967    }
11968
11969  /* Always perform the final addition/merge within the bmask insn.  */
11970  emit_insn (gen_bmasksi_vis (gen_rtx_REG (SImode, 0), sel, t_1));
11971}
11972
11973/* Implement TARGET_FRAME_POINTER_REQUIRED.  */
11974
11975static bool
11976sparc_frame_pointer_required (void)
11977{
11978  /* If the stack pointer is dynamically modified in the function, it cannot
11979     serve as the frame pointer.  */
11980  if (cfun->calls_alloca)
11981    return true;
11982
11983  /* If the function receives nonlocal gotos, it needs to save the frame
11984     pointer in the nonlocal_goto_save_area object.  */
11985  if (cfun->has_nonlocal_label)
11986    return true;
11987
11988  /* In flat mode, that's it.  */
11989  if (TARGET_FLAT)
11990    return false;
11991
11992  /* Otherwise, the frame pointer is required if the function isn't leaf.  */
11993  return !(crtl->is_leaf && only_leaf_regs_used ());
11994}
11995
11996/* The way this is structured, we can't eliminate SFP in favor of SP
11997   if the frame pointer is required: we want to use the SFP->HFP elimination
11998   in that case.  But the test in update_eliminables doesn't know we are
11999   assuming below that we only do the former elimination.  */
12000
12001static bool
12002sparc_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
12003{
12004  return to == HARD_FRAME_POINTER_REGNUM || !sparc_frame_pointer_required ();
12005}
12006
12007/* Return the hard frame pointer directly to bypass the stack bias.  */
12008
12009static rtx
12010sparc_builtin_setjmp_frame_value (void)
12011{
12012  return hard_frame_pointer_rtx;
12013}
12014
12015/* If !TARGET_FPU, then make the fp registers and fp cc regs fixed so that
12016   they won't be allocated.  */
12017
12018static void
12019sparc_conditional_register_usage (void)
12020{
12021  if (PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
12022    {
12023      fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
12024      call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
12025    }
12026  /* If the user has passed -f{fixed,call-{used,saved}}-g5 */
12027  /* then honor it.  */
12028  if (TARGET_ARCH32 && fixed_regs[5])
12029    fixed_regs[5] = 1;
12030  else if (TARGET_ARCH64 && fixed_regs[5] == 2)
12031    fixed_regs[5] = 0;
12032  if (! TARGET_V9)
12033    {
12034      int regno;
12035      for (regno = SPARC_FIRST_V9_FP_REG;
12036	   regno <= SPARC_LAST_V9_FP_REG;
12037	   regno++)
12038	fixed_regs[regno] = 1;
12039      /* %fcc0 is used by v8 and v9.  */
12040      for (regno = SPARC_FIRST_V9_FCC_REG + 1;
12041	   regno <= SPARC_LAST_V9_FCC_REG;
12042	   regno++)
12043	fixed_regs[regno] = 1;
12044    }
12045  if (! TARGET_FPU)
12046    {
12047      int regno;
12048      for (regno = 32; regno < SPARC_LAST_V9_FCC_REG; regno++)
12049	fixed_regs[regno] = 1;
12050    }
12051  /* If the user has passed -f{fixed,call-{used,saved}}-g2 */
12052  /* then honor it.  Likewise with g3 and g4.  */
12053  if (fixed_regs[2] == 2)
12054    fixed_regs[2] = ! TARGET_APP_REGS;
12055  if (fixed_regs[3] == 2)
12056    fixed_regs[3] = ! TARGET_APP_REGS;
12057  if (TARGET_ARCH32 && fixed_regs[4] == 2)
12058    fixed_regs[4] = ! TARGET_APP_REGS;
12059  else if (TARGET_CM_EMBMEDANY)
12060    fixed_regs[4] = 1;
12061  else if (fixed_regs[4] == 2)
12062    fixed_regs[4] = 0;
12063  if (TARGET_FLAT)
12064    {
12065      int regno;
12066      /* Disable leaf functions.  */
12067      memset (sparc_leaf_regs, 0, FIRST_PSEUDO_REGISTER);
12068      for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
12069	leaf_reg_remap [regno] = regno;
12070    }
12071  if (TARGET_VIS)
12072    global_regs[SPARC_GSR_REG] = 1;
12073}
12074
12075/* Implement TARGET_PREFERRED_RELOAD_CLASS:
12076
12077   - We can't load constants into FP registers.
12078   - We can't load FP constants into integer registers when soft-float,
12079     because there is no soft-float pattern with a r/F constraint.
12080   - We can't load FP constants into integer registers for TFmode unless
12081     it is 0.0L, because there is no movtf pattern with a r/F constraint.
12082   - Try and reload integer constants (symbolic or otherwise) back into
12083     registers directly, rather than having them dumped to memory.  */
12084
12085static reg_class_t
12086sparc_preferred_reload_class (rtx x, reg_class_t rclass)
12087{
12088  machine_mode mode = GET_MODE (x);
12089  if (CONSTANT_P (x))
12090    {
12091      if (FP_REG_CLASS_P (rclass)
12092	  || rclass == GENERAL_OR_FP_REGS
12093	  || rclass == GENERAL_OR_EXTRA_FP_REGS
12094	  || (GET_MODE_CLASS (mode) == MODE_FLOAT && ! TARGET_FPU)
12095	  || (mode == TFmode && ! const_zero_operand (x, mode)))
12096	return NO_REGS;
12097
12098      if (GET_MODE_CLASS (mode) == MODE_INT)
12099	return GENERAL_REGS;
12100
12101      if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
12102	{
12103	  if (! FP_REG_CLASS_P (rclass)
12104	      || !(const_zero_operand (x, mode)
12105		   || const_all_ones_operand (x, mode)))
12106	    return NO_REGS;
12107	}
12108    }
12109
12110  if (TARGET_VIS3
12111      && ! TARGET_ARCH64
12112      && (rclass == EXTRA_FP_REGS
12113	  || rclass == GENERAL_OR_EXTRA_FP_REGS))
12114    {
12115      int regno = true_regnum (x);
12116
12117      if (SPARC_INT_REG_P (regno))
12118	return (rclass == EXTRA_FP_REGS
12119		? FP_REGS : GENERAL_OR_FP_REGS);
12120    }
12121
12122  return rclass;
12123}
12124
12125/* Output a wide multiply instruction in V8+ mode.  INSN is the instruction,
12126   OPERANDS are its operands and OPCODE is the mnemonic to be used.  */
12127
12128const char *
12129output_v8plus_mult (rtx_insn *insn, rtx *operands, const char *opcode)
12130{
12131  char mulstr[32];
12132
12133  gcc_assert (! TARGET_ARCH64);
12134
12135  if (sparc_check_64 (operands[1], insn) <= 0)
12136    output_asm_insn ("srl\t%L1, 0, %L1", operands);
12137  if (which_alternative == 1)
12138    output_asm_insn ("sllx\t%H1, 32, %H1", operands);
12139  if (GET_CODE (operands[2]) == CONST_INT)
12140    {
12141      if (which_alternative == 1)
12142	{
12143	  output_asm_insn ("or\t%L1, %H1, %H1", operands);
12144	  sprintf (mulstr, "%s\t%%H1, %%2, %%L0", opcode);
12145	  output_asm_insn (mulstr, operands);
12146	  return "srlx\t%L0, 32, %H0";
12147	}
12148      else
12149	{
12150	  output_asm_insn ("sllx\t%H1, 32, %3", operands);
12151          output_asm_insn ("or\t%L1, %3, %3", operands);
12152          sprintf (mulstr, "%s\t%%3, %%2, %%3", opcode);
12153	  output_asm_insn (mulstr, operands);
12154	  output_asm_insn ("srlx\t%3, 32, %H0", operands);
12155          return "mov\t%3, %L0";
12156	}
12157    }
12158  else if (rtx_equal_p (operands[1], operands[2]))
12159    {
12160      if (which_alternative == 1)
12161	{
12162	  output_asm_insn ("or\t%L1, %H1, %H1", operands);
12163          sprintf (mulstr, "%s\t%%H1, %%H1, %%L0", opcode);
12164	  output_asm_insn (mulstr, operands);
12165	  return "srlx\t%L0, 32, %H0";
12166	}
12167      else
12168	{
12169	  output_asm_insn ("sllx\t%H1, 32, %3", operands);
12170          output_asm_insn ("or\t%L1, %3, %3", operands);
12171	  sprintf (mulstr, "%s\t%%3, %%3, %%3", opcode);
12172	  output_asm_insn (mulstr, operands);
12173	  output_asm_insn ("srlx\t%3, 32, %H0", operands);
12174          return "mov\t%3, %L0";
12175	}
12176    }
12177  if (sparc_check_64 (operands[2], insn) <= 0)
12178    output_asm_insn ("srl\t%L2, 0, %L2", operands);
12179  if (which_alternative == 1)
12180    {
12181      output_asm_insn ("or\t%L1, %H1, %H1", operands);
12182      output_asm_insn ("sllx\t%H2, 32, %L1", operands);
12183      output_asm_insn ("or\t%L2, %L1, %L1", operands);
12184      sprintf (mulstr, "%s\t%%H1, %%L1, %%L0", opcode);
12185      output_asm_insn (mulstr, operands);
12186      return "srlx\t%L0, 32, %H0";
12187    }
12188  else
12189    {
12190      output_asm_insn ("sllx\t%H1, 32, %3", operands);
12191      output_asm_insn ("sllx\t%H2, 32, %4", operands);
12192      output_asm_insn ("or\t%L1, %3, %3", operands);
12193      output_asm_insn ("or\t%L2, %4, %4", operands);
12194      sprintf (mulstr, "%s\t%%3, %%4, %%3", opcode);
12195      output_asm_insn (mulstr, operands);
12196      output_asm_insn ("srlx\t%3, 32, %H0", operands);
12197      return "mov\t%3, %L0";
12198    }
12199}
12200
12201/* Subroutine of sparc_expand_vector_init.  Emit code to initialize
12202   all fields of TARGET to ELT by means of VIS2 BSHUFFLE insn.  MODE
12203   and INNER_MODE are the modes describing TARGET.  */
12204
12205static void
12206vector_init_bshuffle (rtx target, rtx elt, machine_mode mode,
12207		      machine_mode inner_mode)
12208{
12209  rtx t1, final_insn, sel;
12210  int bmask;
12211
12212  t1 = gen_reg_rtx (mode);
12213
12214  elt = convert_modes (SImode, inner_mode, elt, true);
12215  emit_move_insn (gen_lowpart(SImode, t1), elt);
12216
12217  switch (mode)
12218    {
12219    case V2SImode:
12220      final_insn = gen_bshufflev2si_vis (target, t1, t1);
12221      bmask = 0x45674567;
12222      break;
12223    case V4HImode:
12224      final_insn = gen_bshufflev4hi_vis (target, t1, t1);
12225      bmask = 0x67676767;
12226      break;
12227    case V8QImode:
12228      final_insn = gen_bshufflev8qi_vis (target, t1, t1);
12229      bmask = 0x77777777;
12230      break;
12231    default:
12232      gcc_unreachable ();
12233    }
12234
12235  sel = force_reg (SImode, GEN_INT (bmask));
12236  emit_insn (gen_bmasksi_vis (gen_rtx_REG (SImode, 0), sel, const0_rtx));
12237  emit_insn (final_insn);
12238}
12239
12240/* Subroutine of sparc_expand_vector_init.  Emit code to initialize
12241   all fields of TARGET to ELT in V8QI by means of VIS FPMERGE insn.  */
12242
12243static void
12244vector_init_fpmerge (rtx target, rtx elt)
12245{
12246  rtx t1, t2, t2_low, t3, t3_low;
12247
12248  t1 = gen_reg_rtx (V4QImode);
12249  elt = convert_modes (SImode, QImode, elt, true);
12250  emit_move_insn (gen_lowpart (SImode, t1), elt);
12251
12252  t2 = gen_reg_rtx (V8QImode);
12253  t2_low = gen_lowpart (V4QImode, t2);
12254  emit_insn (gen_fpmerge_vis (t2, t1, t1));
12255
12256  t3 = gen_reg_rtx (V8QImode);
12257  t3_low = gen_lowpart (V4QImode, t3);
12258  emit_insn (gen_fpmerge_vis (t3, t2_low, t2_low));
12259
12260  emit_insn (gen_fpmerge_vis (target, t3_low, t3_low));
12261}
12262
12263/* Subroutine of sparc_expand_vector_init.  Emit code to initialize
12264   all fields of TARGET to ELT in V4HI by means of VIS FALIGNDATA insn.  */
12265
12266static void
12267vector_init_faligndata (rtx target, rtx elt)
12268{
12269  rtx t1 = gen_reg_rtx (V4HImode);
12270  int i;
12271
12272  elt = convert_modes (SImode, HImode, elt, true);
12273  emit_move_insn (gen_lowpart (SImode, t1), elt);
12274
12275  emit_insn (gen_alignaddrsi_vis (gen_reg_rtx (SImode),
12276				  force_reg (SImode, GEN_INT (6)),
12277				  const0_rtx));
12278
12279  for (i = 0; i < 4; i++)
12280    emit_insn (gen_faligndatav4hi_vis (target, t1, target));
12281}
12282
12283/* Emit code to initialize TARGET to values for individual fields VALS.  */
12284
12285void
12286sparc_expand_vector_init (rtx target, rtx vals)
12287{
12288  const machine_mode mode = GET_MODE (target);
12289  const machine_mode inner_mode = GET_MODE_INNER (mode);
12290  const int n_elts = GET_MODE_NUNITS (mode);
12291  int i, n_var = 0;
12292  bool all_same;
12293  rtx mem;
12294
12295  all_same = true;
12296  for (i = 0; i < n_elts; i++)
12297    {
12298      rtx x = XVECEXP (vals, 0, i);
12299      if (!CONSTANT_P (x))
12300	n_var++;
12301
12302      if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12303	all_same = false;
12304    }
12305
12306  if (n_var == 0)
12307    {
12308      emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
12309      return;
12310    }
12311
12312  if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (mode))
12313    {
12314      if (GET_MODE_SIZE (inner_mode) == 4)
12315	{
12316	  emit_move_insn (gen_lowpart (SImode, target),
12317			  gen_lowpart (SImode, XVECEXP (vals, 0, 0)));
12318	  return;
12319	}
12320      else if (GET_MODE_SIZE (inner_mode) == 8)
12321	{
12322	  emit_move_insn (gen_lowpart (DImode, target),
12323			  gen_lowpart (DImode, XVECEXP (vals, 0, 0)));
12324	  return;
12325	}
12326    }
12327  else if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (word_mode)
12328	   && GET_MODE_SIZE (mode) == 2 * GET_MODE_SIZE (word_mode))
12329    {
12330      emit_move_insn (gen_highpart (word_mode, target),
12331		      gen_lowpart (word_mode, XVECEXP (vals, 0, 0)));
12332      emit_move_insn (gen_lowpart (word_mode, target),
12333		      gen_lowpart (word_mode, XVECEXP (vals, 0, 1)));
12334      return;
12335    }
12336
12337  if (all_same && GET_MODE_SIZE (mode) == 8)
12338    {
12339      if (TARGET_VIS2)
12340	{
12341	  vector_init_bshuffle (target, XVECEXP (vals, 0, 0), mode, inner_mode);
12342	  return;
12343	}
12344      if (mode == V8QImode)
12345	{
12346	  vector_init_fpmerge (target, XVECEXP (vals, 0, 0));
12347	  return;
12348	}
12349      if (mode == V4HImode)
12350	{
12351	  vector_init_faligndata (target, XVECEXP (vals, 0, 0));
12352	  return;
12353	}
12354    }
12355
12356  mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12357  for (i = 0; i < n_elts; i++)
12358    emit_move_insn (adjust_address_nv (mem, inner_mode,
12359				       i * GET_MODE_SIZE (inner_mode)),
12360		    XVECEXP (vals, 0, i));
12361  emit_move_insn (target, mem);
12362}
12363
12364/* Implement TARGET_SECONDARY_RELOAD.  */
12365
12366static reg_class_t
12367sparc_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
12368			machine_mode mode, secondary_reload_info *sri)
12369{
12370  enum reg_class rclass = (enum reg_class) rclass_i;
12371
12372  sri->icode = CODE_FOR_nothing;
12373  sri->extra_cost = 0;
12374
12375  /* We need a temporary when loading/storing a HImode/QImode value
12376     between memory and the FPU registers.  This can happen when combine puts
12377     a paradoxical subreg in a float/fix conversion insn.  */
12378  if (FP_REG_CLASS_P (rclass)
12379      && (mode == HImode || mode == QImode)
12380      && (GET_CODE (x) == MEM
12381	  || ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
12382	      && true_regnum (x) == -1)))
12383    return GENERAL_REGS;
12384
12385  /* On 32-bit we need a temporary when loading/storing a DFmode value
12386     between unaligned memory and the upper FPU registers.  */
12387  if (TARGET_ARCH32
12388      && rclass == EXTRA_FP_REGS
12389      && mode == DFmode
12390      && GET_CODE (x) == MEM
12391      && ! mem_min_alignment (x, 8))
12392    return FP_REGS;
12393
12394  if (((TARGET_CM_MEDANY
12395	&& symbolic_operand (x, mode))
12396       || (TARGET_CM_EMBMEDANY
12397	   && text_segment_operand (x, mode)))
12398      && ! flag_pic)
12399    {
12400      if (in_p)
12401	sri->icode = direct_optab_handler (reload_in_optab, mode);
12402      else
12403	sri->icode = direct_optab_handler (reload_out_optab, mode);
12404      return NO_REGS;
12405    }
12406
12407  if (TARGET_VIS3 && TARGET_ARCH32)
12408    {
12409      int regno = true_regnum (x);
12410
12411      /* When using VIS3 fp<-->int register moves, on 32-bit we have
12412	 to move 8-byte values in 4-byte pieces.  This only works via
12413	 FP_REGS, and not via EXTRA_FP_REGS.  Therefore if we try to
12414	 move between EXTRA_FP_REGS and GENERAL_REGS, we will need
12415	 an FP_REGS intermediate move.  */
12416      if ((rclass == EXTRA_FP_REGS && SPARC_INT_REG_P (regno))
12417	  || ((general_or_i64_p (rclass)
12418	       || rclass == GENERAL_OR_FP_REGS)
12419	      && SPARC_FP_REG_P (regno)))
12420	{
12421	  sri->extra_cost = 2;
12422	  return FP_REGS;
12423	}
12424    }
12425
12426  return NO_REGS;
12427}
12428
12429/* Emit code to conditionally move either OPERANDS[2] or OPERANDS[3] into
12430   OPERANDS[0] in MODE.  OPERANDS[1] is the operator of the condition.  */
12431
12432bool
12433sparc_expand_conditional_move (machine_mode mode, rtx *operands)
12434{
12435  enum rtx_code rc = GET_CODE (operands[1]);
12436  machine_mode cmp_mode;
12437  rtx cc_reg, dst, cmp;
12438
12439  cmp = operands[1];
12440  if (GET_MODE (XEXP (cmp, 0)) == DImode && !TARGET_ARCH64)
12441    return false;
12442
12443  if (GET_MODE (XEXP (cmp, 0)) == TFmode && !TARGET_HARD_QUAD)
12444    cmp = sparc_emit_float_lib_cmp (XEXP (cmp, 0), XEXP (cmp, 1), rc);
12445
12446  cmp_mode = GET_MODE (XEXP (cmp, 0));
12447  rc = GET_CODE (cmp);
12448
12449  dst = operands[0];
12450  if (! rtx_equal_p (operands[2], dst)
12451      && ! rtx_equal_p (operands[3], dst))
12452    {
12453      if (reg_overlap_mentioned_p (dst, cmp))
12454	dst = gen_reg_rtx (mode);
12455
12456      emit_move_insn (dst, operands[3]);
12457    }
12458  else if (operands[2] == dst)
12459    {
12460      operands[2] = operands[3];
12461
12462      if (GET_MODE_CLASS (cmp_mode) == MODE_FLOAT)
12463        rc = reverse_condition_maybe_unordered (rc);
12464      else
12465        rc = reverse_condition (rc);
12466    }
12467
12468  if (XEXP (cmp, 1) == const0_rtx
12469      && GET_CODE (XEXP (cmp, 0)) == REG
12470      && cmp_mode == DImode
12471      && v9_regcmp_p (rc))
12472    cc_reg = XEXP (cmp, 0);
12473  else
12474    cc_reg = gen_compare_reg_1 (rc, XEXP (cmp, 0), XEXP (cmp, 1));
12475
12476  cmp = gen_rtx_fmt_ee (rc, GET_MODE (cc_reg), cc_reg, const0_rtx);
12477
12478  emit_insn (gen_rtx_SET (VOIDmode, dst,
12479			  gen_rtx_IF_THEN_ELSE (mode, cmp, operands[2], dst)));
12480
12481  if (dst != operands[0])
12482    emit_move_insn (operands[0], dst);
12483
12484  return true;
12485}
12486
12487/* Emit code to conditionally move a combination of OPERANDS[1] and OPERANDS[2]
12488   into OPERANDS[0] in MODE, depending on the outcome of the comparison of
12489   OPERANDS[4] and OPERANDS[5].  OPERANDS[3] is the operator of the condition.
12490   FCODE is the machine code to be used for OPERANDS[3] and CCODE the machine
12491   code to be used for the condition mask.  */
12492
12493void
12494sparc_expand_vcond (machine_mode mode, rtx *operands, int ccode, int fcode)
12495{
12496  rtx mask, cop0, cop1, fcmp, cmask, bshuf, gsr;
12497  enum rtx_code code = GET_CODE (operands[3]);
12498
12499  mask = gen_reg_rtx (Pmode);
12500  cop0 = operands[4];
12501  cop1 = operands[5];
12502  if (code == LT || code == GE)
12503    {
12504      rtx t;
12505
12506      code = swap_condition (code);
12507      t = cop0; cop0 = cop1; cop1 = t;
12508    }
12509
12510  gsr = gen_rtx_REG (DImode, SPARC_GSR_REG);
12511
12512  fcmp = gen_rtx_UNSPEC (Pmode,
12513			 gen_rtvec (1, gen_rtx_fmt_ee (code, mode, cop0, cop1)),
12514			 fcode);
12515
12516  cmask = gen_rtx_UNSPEC (DImode,
12517			  gen_rtvec (2, mask, gsr),
12518			  ccode);
12519
12520  bshuf = gen_rtx_UNSPEC (mode,
12521			  gen_rtvec (3, operands[1], operands[2], gsr),
12522			  UNSPEC_BSHUFFLE);
12523
12524  emit_insn (gen_rtx_SET (VOIDmode, mask, fcmp));
12525  emit_insn (gen_rtx_SET (VOIDmode, gsr, cmask));
12526
12527  emit_insn (gen_rtx_SET (VOIDmode, operands[0], bshuf));
12528}
12529
12530/* On sparc, any mode which naturally allocates into the float
12531   registers should return 4 here.  */
12532
12533unsigned int
12534sparc_regmode_natural_size (machine_mode mode)
12535{
12536  int size = UNITS_PER_WORD;
12537
12538  if (TARGET_ARCH64)
12539    {
12540      enum mode_class mclass = GET_MODE_CLASS (mode);
12541
12542      if (mclass == MODE_FLOAT || mclass == MODE_VECTOR_INT)
12543	size = 4;
12544    }
12545
12546  return size;
12547}
12548
12549/* Return TRUE if it is a good idea to tie two pseudo registers
12550   when one has mode MODE1 and one has mode MODE2.
12551   If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2,
12552   for any hard reg, then this must be FALSE for correct output.
12553
12554   For V9 we have to deal with the fact that only the lower 32 floating
12555   point registers are 32-bit addressable.  */
12556
12557bool
12558sparc_modes_tieable_p (machine_mode mode1, machine_mode mode2)
12559{
12560  enum mode_class mclass1, mclass2;
12561  unsigned short size1, size2;
12562
12563  if (mode1 == mode2)
12564    return true;
12565
12566  mclass1 = GET_MODE_CLASS (mode1);
12567  mclass2 = GET_MODE_CLASS (mode2);
12568  if (mclass1 != mclass2)
12569    return false;
12570
12571  if (! TARGET_V9)
12572    return true;
12573
12574  /* Classes are the same and we are V9 so we have to deal with upper
12575     vs. lower floating point registers.  If one of the modes is a
12576     4-byte mode, and the other is not, we have to mark them as not
12577     tieable because only the lower 32 floating point register are
12578     addressable 32-bits at a time.
12579
12580     We can't just test explicitly for SFmode, otherwise we won't
12581     cover the vector mode cases properly.  */
12582
12583  if (mclass1 != MODE_FLOAT && mclass1 != MODE_VECTOR_INT)
12584    return true;
12585
12586  size1 = GET_MODE_SIZE (mode1);
12587  size2 = GET_MODE_SIZE (mode2);
12588  if ((size1 > 4 && size2 == 4)
12589      || (size2 > 4 && size1 == 4))
12590    return false;
12591
12592  return true;
12593}
12594
12595/* Implement TARGET_CSTORE_MODE.  */
12596
12597static machine_mode
12598sparc_cstore_mode (enum insn_code icode ATTRIBUTE_UNUSED)
12599{
12600  return (TARGET_ARCH64 ? DImode : SImode);
12601}
12602
12603/* Return the compound expression made of T1 and T2.  */
12604
12605static inline tree
12606compound_expr (tree t1, tree t2)
12607{
12608  return build2 (COMPOUND_EXPR, void_type_node, t1, t2);
12609}
12610
12611/* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook.  */
12612
12613static void
12614sparc_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
12615{
12616  if (!TARGET_FPU)
12617    return;
12618
12619  const unsigned HOST_WIDE_INT accrued_exception_mask = 0x1f << 5;
12620  const unsigned HOST_WIDE_INT trap_enable_mask = 0x1f << 23;
12621
12622  /* We generate the equivalent of feholdexcept (&fenv_var):
12623
12624       unsigned int fenv_var;
12625       __builtin_store_fsr (&fenv_var);
12626
12627       unsigned int tmp1_var;
12628       tmp1_var = fenv_var & ~(accrued_exception_mask | trap_enable_mask);
12629
12630       __builtin_load_fsr (&tmp1_var);  */
12631
12632  tree fenv_var = create_tmp_var (unsigned_type_node);
12633  mark_addressable (fenv_var);
12634  tree fenv_addr = build_fold_addr_expr (fenv_var);
12635  tree stfsr = sparc_builtins[SPARC_BUILTIN_STFSR];
12636  tree hold_stfsr = build_call_expr (stfsr, 1, fenv_addr);
12637
12638  tree tmp1_var = create_tmp_var (unsigned_type_node);
12639  mark_addressable (tmp1_var);
12640  tree masked_fenv_var
12641    = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var,
12642	      build_int_cst (unsigned_type_node,
12643			     ~(accrued_exception_mask | trap_enable_mask)));
12644  tree hold_mask
12645    = build2 (MODIFY_EXPR, void_type_node, tmp1_var, masked_fenv_var);
12646
12647  tree tmp1_addr = build_fold_addr_expr (tmp1_var);
12648  tree ldfsr = sparc_builtins[SPARC_BUILTIN_LDFSR];
12649  tree hold_ldfsr = build_call_expr (ldfsr, 1, tmp1_addr);
12650
12651  *hold = compound_expr (compound_expr (hold_stfsr, hold_mask), hold_ldfsr);
12652
12653  /* We reload the value of tmp1_var to clear the exceptions:
12654
12655       __builtin_load_fsr (&tmp1_var);  */
12656
12657  *clear = build_call_expr (ldfsr, 1, tmp1_addr);
12658
12659  /* We generate the equivalent of feupdateenv (&fenv_var):
12660
12661       unsigned int tmp2_var;
12662       __builtin_store_fsr (&tmp2_var);
12663
12664       __builtin_load_fsr (&fenv_var);
12665
12666       if (SPARC_LOW_FE_EXCEPT_VALUES)
12667         tmp2_var >>= 5;
12668       __atomic_feraiseexcept ((int) tmp2_var);  */
12669
12670  tree tmp2_var = create_tmp_var (unsigned_type_node);
12671  mark_addressable (tmp2_var);
12672  tree tmp3_addr = build_fold_addr_expr (tmp2_var);
12673  tree update_stfsr = build_call_expr (stfsr, 1, tmp3_addr);
12674
12675  tree update_ldfsr = build_call_expr (ldfsr, 1, fenv_addr);
12676
12677  tree atomic_feraiseexcept
12678    = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
12679  tree update_call
12680    = build_call_expr (atomic_feraiseexcept, 1,
12681		       fold_convert (integer_type_node, tmp2_var));
12682
12683  if (SPARC_LOW_FE_EXCEPT_VALUES)
12684    {
12685      tree shifted_tmp2_var
12686	= build2 (RSHIFT_EXPR, unsigned_type_node, tmp2_var,
12687		  build_int_cst (unsigned_type_node, 5));
12688      tree update_shift
12689	= build2 (MODIFY_EXPR, void_type_node, tmp2_var, shifted_tmp2_var);
12690      update_call = compound_expr (update_shift, update_call);
12691    }
12692
12693  *update
12694    = compound_expr (compound_expr (update_stfsr, update_ldfsr), update_call);
12695}
12696
12697#include "gt-sparc.h"
12698