1/* Subroutines for insn-output.c for HPPA.
2   Copyright (C) 1992-2015 Free Software Foundation, Inc.
3   Contributed by Tim Moore (moore@cs.utah.edu), based on sparc.c
4
5This file is part of GCC.
6
7GCC is free software; you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
9the Free Software Foundation; either version 3, or (at your option)
10any later version.
11
12GCC is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
18along with GCC; see the file COPYING3.  If not see
19<http://www.gnu.org/licenses/>.  */
20
21#include "config.h"
22#include "system.h"
23#include "coretypes.h"
24#include "tm.h"
25#include "rtl.h"
26#include "regs.h"
27#include "hard-reg-set.h"
28#include "insn-config.h"
29#include "conditions.h"
30#include "insn-attr.h"
31#include "flags.h"
32#include "hash-set.h"
33#include "machmode.h"
34#include "vec.h"
35#include "double-int.h"
36#include "input.h"
37#include "alias.h"
38#include "symtab.h"
39#include "wide-int.h"
40#include "inchash.h"
41#include "tree.h"
42#include "fold-const.h"
43#include "stor-layout.h"
44#include "stringpool.h"
45#include "varasm.h"
46#include "calls.h"
47#include "output.h"
48#include "dbxout.h"
49#include "except.h"
50#include "hashtab.h"
51#include "function.h"
52#include "statistics.h"
53#include "real.h"
54#include "fixed-value.h"
55#include "expmed.h"
56#include "dojump.h"
57#include "explow.h"
58#include "emit-rtl.h"
59#include "stmt.h"
60#include "expr.h"
61#include "insn-codes.h"
62#include "optabs.h"
63#include "reload.h"
64#include "diagnostic-core.h"
65#include "ggc.h"
66#include "recog.h"
67#include "predict.h"
68#include "tm_p.h"
69#include "target.h"
70#include "common/common-target.h"
71#include "target-def.h"
72#include "langhooks.h"
73#include "dominance.h"
74#include "cfg.h"
75#include "cfgrtl.h"
76#include "cfganal.h"
77#include "lcm.h"
78#include "cfgbuild.h"
79#include "cfgcleanup.h"
80#include "basic-block.h"
81#include "df.h"
82#include "opts.h"
83#include "builtins.h"
84
85/* Return nonzero if there is a bypass for the output of
86   OUT_INSN and the fp store IN_INSN.  */
87int
88pa_fpstore_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn)
89{
90  machine_mode store_mode;
91  machine_mode other_mode;
92  rtx set;
93
94  if (recog_memoized (in_insn) < 0
95      || (get_attr_type (in_insn) != TYPE_FPSTORE
96	  && get_attr_type (in_insn) != TYPE_FPSTORE_LOAD)
97      || recog_memoized (out_insn) < 0)
98    return 0;
99
100  store_mode = GET_MODE (SET_SRC (PATTERN (in_insn)));
101
102  set = single_set (out_insn);
103  if (!set)
104    return 0;
105
106  other_mode = GET_MODE (SET_SRC (set));
107
108  return (GET_MODE_SIZE (store_mode) == GET_MODE_SIZE (other_mode));
109}
110
111
112#ifndef DO_FRAME_NOTES
113#ifdef INCOMING_RETURN_ADDR_RTX
114#define DO_FRAME_NOTES 1
115#else
116#define DO_FRAME_NOTES 0
117#endif
118#endif
119
120static void pa_option_override (void);
121static void copy_reg_pointer (rtx, rtx);
122static void fix_range (const char *);
123static int hppa_register_move_cost (machine_mode mode, reg_class_t,
124				    reg_class_t);
125static int hppa_address_cost (rtx, machine_mode mode, addr_space_t, bool);
126static bool hppa_rtx_costs (rtx, int, int, int, int *, bool);
127static inline rtx force_mode (machine_mode, rtx);
128static void pa_reorg (void);
129static void pa_combine_instructions (void);
130static int pa_can_combine_p (rtx_insn *, rtx_insn *, rtx_insn *, int, rtx,
131			     rtx, rtx);
132static bool forward_branch_p (rtx_insn *);
133static void compute_zdepwi_operands (unsigned HOST_WIDE_INT, unsigned *);
134static void compute_zdepdi_operands (unsigned HOST_WIDE_INT, unsigned *);
135static int compute_movmem_length (rtx_insn *);
136static int compute_clrmem_length (rtx_insn *);
137static bool pa_assemble_integer (rtx, unsigned int, int);
138static void remove_useless_addtr_insns (int);
139static void store_reg (int, HOST_WIDE_INT, int);
140static void store_reg_modify (int, int, HOST_WIDE_INT);
141static void load_reg (int, HOST_WIDE_INT, int);
142static void set_reg_plus_d (int, int, HOST_WIDE_INT, int);
143static rtx pa_function_value (const_tree, const_tree, bool);
144static rtx pa_libcall_value (machine_mode, const_rtx);
145static bool pa_function_value_regno_p (const unsigned int);
146static void pa_output_function_prologue (FILE *, HOST_WIDE_INT);
147static void update_total_code_bytes (unsigned int);
148static void pa_output_function_epilogue (FILE *, HOST_WIDE_INT);
149static int pa_adjust_cost (rtx_insn *, rtx, rtx_insn *, int);
150static int pa_adjust_priority (rtx_insn *, int);
151static int pa_issue_rate (void);
152static int pa_reloc_rw_mask (void);
153static void pa_som_asm_init_sections (void) ATTRIBUTE_UNUSED;
154static section *pa_som_tm_clone_table_section (void) ATTRIBUTE_UNUSED;
155static section *pa_select_section (tree, int, unsigned HOST_WIDE_INT)
156     ATTRIBUTE_UNUSED;
157static void pa_encode_section_info (tree, rtx, int);
158static const char *pa_strip_name_encoding (const char *);
159static bool pa_function_ok_for_sibcall (tree, tree);
160static void pa_globalize_label (FILE *, const char *)
161     ATTRIBUTE_UNUSED;
162static void pa_asm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
163				    HOST_WIDE_INT, tree);
164#if !defined(USE_COLLECT2)
165static void pa_asm_out_constructor (rtx, int);
166static void pa_asm_out_destructor (rtx, int);
167#endif
168static void pa_init_builtins (void);
169static rtx pa_expand_builtin (tree, rtx, rtx, machine_mode mode, int);
170static rtx hppa_builtin_saveregs (void);
171static void hppa_va_start (tree, rtx);
172static tree hppa_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
173static bool pa_scalar_mode_supported_p (machine_mode);
174static bool pa_commutative_p (const_rtx x, int outer_code);
175static void copy_fp_args (rtx_insn *) ATTRIBUTE_UNUSED;
176static int length_fp_args (rtx_insn *) ATTRIBUTE_UNUSED;
177static rtx hppa_legitimize_address (rtx, rtx, machine_mode);
178static inline void pa_file_start_level (void) ATTRIBUTE_UNUSED;
179static inline void pa_file_start_space (int) ATTRIBUTE_UNUSED;
180static inline void pa_file_start_file (int) ATTRIBUTE_UNUSED;
181static inline void pa_file_start_mcount (const char*) ATTRIBUTE_UNUSED;
182static void pa_elf_file_start (void) ATTRIBUTE_UNUSED;
183static void pa_som_file_start (void) ATTRIBUTE_UNUSED;
184static void pa_linux_file_start (void) ATTRIBUTE_UNUSED;
185static void pa_hpux64_gas_file_start (void) ATTRIBUTE_UNUSED;
186static void pa_hpux64_hpas_file_start (void) ATTRIBUTE_UNUSED;
187static void output_deferred_plabels (void);
188static void output_deferred_profile_counters (void) ATTRIBUTE_UNUSED;
189#ifdef ASM_OUTPUT_EXTERNAL_REAL
190static void pa_hpux_file_end (void);
191#endif
192static void pa_init_libfuncs (void);
193static rtx pa_struct_value_rtx (tree, int);
194static bool pa_pass_by_reference (cumulative_args_t, machine_mode,
195				  const_tree, bool);
196static int pa_arg_partial_bytes (cumulative_args_t, machine_mode,
197				 tree, bool);
198static void pa_function_arg_advance (cumulative_args_t, machine_mode,
199				     const_tree, bool);
200static rtx pa_function_arg (cumulative_args_t, machine_mode,
201			    const_tree, bool);
202static unsigned int pa_function_arg_boundary (machine_mode, const_tree);
203static struct machine_function * pa_init_machine_status (void);
204static reg_class_t pa_secondary_reload (bool, rtx, reg_class_t,
205					machine_mode,
206					secondary_reload_info *);
207static void pa_extra_live_on_entry (bitmap);
208static machine_mode pa_promote_function_mode (const_tree,
209						   machine_mode, int *,
210						   const_tree, int);
211
212static void pa_asm_trampoline_template (FILE *);
213static void pa_trampoline_init (rtx, tree, rtx);
214static rtx pa_trampoline_adjust_address (rtx);
215static rtx pa_delegitimize_address (rtx);
216static bool pa_print_operand_punct_valid_p (unsigned char);
217static rtx pa_internal_arg_pointer (void);
218static bool pa_can_eliminate (const int, const int);
219static void pa_conditional_register_usage (void);
220static machine_mode pa_c_mode_for_suffix (char);
221static section *pa_function_section (tree, enum node_frequency, bool, bool);
222static bool pa_cannot_force_const_mem (machine_mode, rtx);
223static bool pa_legitimate_constant_p (machine_mode, rtx);
224static unsigned int pa_section_type_flags (tree, const char *, int);
225static bool pa_legitimate_address_p (machine_mode, rtx, bool);
226
227/* The following extra sections are only used for SOM.  */
228static GTY(()) section *som_readonly_data_section;
229static GTY(()) section *som_one_only_readonly_data_section;
230static GTY(()) section *som_one_only_data_section;
231static GTY(()) section *som_tm_clone_table_section;
232
233/* Counts for the number of callee-saved general and floating point
234   registers which were saved by the current function's prologue.  */
235static int gr_saved, fr_saved;
236
237/* Boolean indicating whether the return pointer was saved by the
238   current function's prologue.  */
239static bool rp_saved;
240
241static rtx find_addr_reg (rtx);
242
243/* Keep track of the number of bytes we have output in the CODE subspace
244   during this compilation so we'll know when to emit inline long-calls.  */
245unsigned long total_code_bytes;
246
247/* The last address of the previous function plus the number of bytes in
248   associated thunks that have been output.  This is used to determine if
249   a thunk can use an IA-relative branch to reach its target function.  */
250static unsigned int last_address;
251
252/* Variables to handle plabels that we discover are necessary at assembly
253   output time.  They are output after the current function.  */
254struct GTY(()) deferred_plabel
255{
256  rtx internal_label;
257  rtx symbol;
258};
259static GTY((length ("n_deferred_plabels"))) struct deferred_plabel *
260  deferred_plabels;
261static size_t n_deferred_plabels = 0;
262
263/* Initialize the GCC target structure.  */
264
265#undef TARGET_OPTION_OVERRIDE
266#define TARGET_OPTION_OVERRIDE pa_option_override
267
268#undef TARGET_ASM_ALIGNED_HI_OP
269#define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
270#undef TARGET_ASM_ALIGNED_SI_OP
271#define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
272#undef TARGET_ASM_ALIGNED_DI_OP
273#define TARGET_ASM_ALIGNED_DI_OP "\t.dword\t"
274#undef TARGET_ASM_UNALIGNED_HI_OP
275#define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
276#undef TARGET_ASM_UNALIGNED_SI_OP
277#define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
278#undef TARGET_ASM_UNALIGNED_DI_OP
279#define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
280#undef TARGET_ASM_INTEGER
281#define TARGET_ASM_INTEGER pa_assemble_integer
282
283#undef TARGET_ASM_FUNCTION_PROLOGUE
284#define TARGET_ASM_FUNCTION_PROLOGUE pa_output_function_prologue
285#undef TARGET_ASM_FUNCTION_EPILOGUE
286#define TARGET_ASM_FUNCTION_EPILOGUE pa_output_function_epilogue
287
288#undef TARGET_FUNCTION_VALUE
289#define TARGET_FUNCTION_VALUE pa_function_value
290#undef TARGET_LIBCALL_VALUE
291#define TARGET_LIBCALL_VALUE pa_libcall_value
292#undef TARGET_FUNCTION_VALUE_REGNO_P
293#define TARGET_FUNCTION_VALUE_REGNO_P pa_function_value_regno_p
294
295#undef TARGET_LEGITIMIZE_ADDRESS
296#define TARGET_LEGITIMIZE_ADDRESS hppa_legitimize_address
297
298#undef TARGET_SCHED_ADJUST_COST
299#define TARGET_SCHED_ADJUST_COST pa_adjust_cost
300#undef TARGET_SCHED_ADJUST_PRIORITY
301#define TARGET_SCHED_ADJUST_PRIORITY pa_adjust_priority
302#undef TARGET_SCHED_ISSUE_RATE
303#define TARGET_SCHED_ISSUE_RATE pa_issue_rate
304
305#undef TARGET_ENCODE_SECTION_INFO
306#define TARGET_ENCODE_SECTION_INFO pa_encode_section_info
307#undef TARGET_STRIP_NAME_ENCODING
308#define TARGET_STRIP_NAME_ENCODING pa_strip_name_encoding
309
310#undef TARGET_FUNCTION_OK_FOR_SIBCALL
311#define TARGET_FUNCTION_OK_FOR_SIBCALL pa_function_ok_for_sibcall
312
313#undef TARGET_COMMUTATIVE_P
314#define TARGET_COMMUTATIVE_P pa_commutative_p
315
316#undef TARGET_ASM_OUTPUT_MI_THUNK
317#define TARGET_ASM_OUTPUT_MI_THUNK pa_asm_output_mi_thunk
318#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
319#define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
320
321#undef TARGET_ASM_FILE_END
322#ifdef ASM_OUTPUT_EXTERNAL_REAL
323#define TARGET_ASM_FILE_END pa_hpux_file_end
324#else
325#define TARGET_ASM_FILE_END output_deferred_plabels
326#endif
327
328#undef TARGET_ASM_RELOC_RW_MASK
329#define TARGET_ASM_RELOC_RW_MASK pa_reloc_rw_mask
330
331#undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
332#define TARGET_PRINT_OPERAND_PUNCT_VALID_P pa_print_operand_punct_valid_p
333
334#if !defined(USE_COLLECT2)
335#undef TARGET_ASM_CONSTRUCTOR
336#define TARGET_ASM_CONSTRUCTOR pa_asm_out_constructor
337#undef TARGET_ASM_DESTRUCTOR
338#define TARGET_ASM_DESTRUCTOR pa_asm_out_destructor
339#endif
340
341#undef TARGET_INIT_BUILTINS
342#define TARGET_INIT_BUILTINS pa_init_builtins
343
344#undef TARGET_EXPAND_BUILTIN
345#define TARGET_EXPAND_BUILTIN pa_expand_builtin
346
347#undef TARGET_REGISTER_MOVE_COST
348#define TARGET_REGISTER_MOVE_COST hppa_register_move_cost
349#undef TARGET_RTX_COSTS
350#define TARGET_RTX_COSTS hppa_rtx_costs
351#undef TARGET_ADDRESS_COST
352#define TARGET_ADDRESS_COST hppa_address_cost
353
354#undef TARGET_MACHINE_DEPENDENT_REORG
355#define TARGET_MACHINE_DEPENDENT_REORG pa_reorg
356
357#undef TARGET_INIT_LIBFUNCS
358#define TARGET_INIT_LIBFUNCS pa_init_libfuncs
359
360#undef TARGET_PROMOTE_FUNCTION_MODE
361#define TARGET_PROMOTE_FUNCTION_MODE pa_promote_function_mode
362#undef TARGET_PROMOTE_PROTOTYPES
363#define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
364
365#undef TARGET_STRUCT_VALUE_RTX
366#define TARGET_STRUCT_VALUE_RTX pa_struct_value_rtx
367#undef TARGET_RETURN_IN_MEMORY
368#define TARGET_RETURN_IN_MEMORY pa_return_in_memory
369#undef TARGET_MUST_PASS_IN_STACK
370#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
371#undef TARGET_PASS_BY_REFERENCE
372#define TARGET_PASS_BY_REFERENCE pa_pass_by_reference
373#undef TARGET_CALLEE_COPIES
374#define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_true
375#undef TARGET_ARG_PARTIAL_BYTES
376#define TARGET_ARG_PARTIAL_BYTES pa_arg_partial_bytes
377#undef TARGET_FUNCTION_ARG
378#define TARGET_FUNCTION_ARG pa_function_arg
379#undef TARGET_FUNCTION_ARG_ADVANCE
380#define TARGET_FUNCTION_ARG_ADVANCE pa_function_arg_advance
381#undef TARGET_FUNCTION_ARG_BOUNDARY
382#define TARGET_FUNCTION_ARG_BOUNDARY pa_function_arg_boundary
383
384#undef TARGET_EXPAND_BUILTIN_SAVEREGS
385#define TARGET_EXPAND_BUILTIN_SAVEREGS hppa_builtin_saveregs
386#undef TARGET_EXPAND_BUILTIN_VA_START
387#define TARGET_EXPAND_BUILTIN_VA_START hppa_va_start
388#undef TARGET_GIMPLIFY_VA_ARG_EXPR
389#define TARGET_GIMPLIFY_VA_ARG_EXPR hppa_gimplify_va_arg_expr
390
391#undef TARGET_SCALAR_MODE_SUPPORTED_P
392#define TARGET_SCALAR_MODE_SUPPORTED_P pa_scalar_mode_supported_p
393
394#undef TARGET_CANNOT_FORCE_CONST_MEM
395#define TARGET_CANNOT_FORCE_CONST_MEM pa_cannot_force_const_mem
396
397#undef TARGET_SECONDARY_RELOAD
398#define TARGET_SECONDARY_RELOAD pa_secondary_reload
399
400#undef TARGET_EXTRA_LIVE_ON_ENTRY
401#define TARGET_EXTRA_LIVE_ON_ENTRY pa_extra_live_on_entry
402
403#undef TARGET_ASM_TRAMPOLINE_TEMPLATE
404#define TARGET_ASM_TRAMPOLINE_TEMPLATE pa_asm_trampoline_template
405#undef TARGET_TRAMPOLINE_INIT
406#define TARGET_TRAMPOLINE_INIT pa_trampoline_init
407#undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
408#define TARGET_TRAMPOLINE_ADJUST_ADDRESS pa_trampoline_adjust_address
409#undef TARGET_DELEGITIMIZE_ADDRESS
410#define TARGET_DELEGITIMIZE_ADDRESS pa_delegitimize_address
411#undef TARGET_INTERNAL_ARG_POINTER
412#define TARGET_INTERNAL_ARG_POINTER pa_internal_arg_pointer
413#undef TARGET_CAN_ELIMINATE
414#define TARGET_CAN_ELIMINATE pa_can_eliminate
415#undef TARGET_CONDITIONAL_REGISTER_USAGE
416#define TARGET_CONDITIONAL_REGISTER_USAGE pa_conditional_register_usage
417#undef TARGET_C_MODE_FOR_SUFFIX
418#define TARGET_C_MODE_FOR_SUFFIX pa_c_mode_for_suffix
419#undef TARGET_ASM_FUNCTION_SECTION
420#define TARGET_ASM_FUNCTION_SECTION pa_function_section
421
422#undef TARGET_LEGITIMATE_CONSTANT_P
423#define TARGET_LEGITIMATE_CONSTANT_P pa_legitimate_constant_p
424#undef TARGET_SECTION_TYPE_FLAGS
425#define TARGET_SECTION_TYPE_FLAGS pa_section_type_flags
426#undef TARGET_LEGITIMATE_ADDRESS_P
427#define TARGET_LEGITIMATE_ADDRESS_P pa_legitimate_address_p
428
429struct gcc_target targetm = TARGET_INITIALIZER;
430
431/* Parse the -mfixed-range= option string.  */
432
433static void
434fix_range (const char *const_str)
435{
436  int i, first, last;
437  char *str, *dash, *comma;
438
439  /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
440     REG2 are either register names or register numbers.  The effect
441     of this option is to mark the registers in the range from REG1 to
442     REG2 as ``fixed'' so they won't be used by the compiler.  This is
443     used, e.g., to ensure that kernel mode code doesn't use fr4-fr31.  */
444
445  i = strlen (const_str);
446  str = (char *) alloca (i + 1);
447  memcpy (str, const_str, i + 1);
448
449  while (1)
450    {
451      dash = strchr (str, '-');
452      if (!dash)
453	{
454	  warning (0, "value of -mfixed-range must have form REG1-REG2");
455	  return;
456	}
457      *dash = '\0';
458
459      comma = strchr (dash + 1, ',');
460      if (comma)
461	*comma = '\0';
462
463      first = decode_reg_name (str);
464      if (first < 0)
465	{
466	  warning (0, "unknown register name: %s", str);
467	  return;
468	}
469
470      last = decode_reg_name (dash + 1);
471      if (last < 0)
472	{
473	  warning (0, "unknown register name: %s", dash + 1);
474	  return;
475	}
476
477      *dash = '-';
478
479      if (first > last)
480	{
481	  warning (0, "%s-%s is an empty range", str, dash + 1);
482	  return;
483	}
484
485      for (i = first; i <= last; ++i)
486	fixed_regs[i] = call_used_regs[i] = 1;
487
488      if (!comma)
489	break;
490
491      *comma = ',';
492      str = comma + 1;
493    }
494
495  /* Check if all floating point registers have been fixed.  */
496  for (i = FP_REG_FIRST; i <= FP_REG_LAST; i++)
497    if (!fixed_regs[i])
498      break;
499
500  if (i > FP_REG_LAST)
501    target_flags |= MASK_DISABLE_FPREGS;
502}
503
504/* Implement the TARGET_OPTION_OVERRIDE hook.  */
505
506static void
507pa_option_override (void)
508{
509  unsigned int i;
510  cl_deferred_option *opt;
511  vec<cl_deferred_option> *v
512    = (vec<cl_deferred_option> *) pa_deferred_options;
513
514  if (v)
515    FOR_EACH_VEC_ELT (*v, i, opt)
516      {
517	switch (opt->opt_index)
518	  {
519	  case OPT_mfixed_range_:
520	    fix_range (opt->arg);
521	    break;
522
523	  default:
524	    gcc_unreachable ();
525	  }
526      }
527
528  if (flag_pic && TARGET_PORTABLE_RUNTIME)
529    {
530      warning (0, "PIC code generation is not supported in the portable runtime model");
531    }
532
533  if (flag_pic && TARGET_FAST_INDIRECT_CALLS)
534   {
535      warning (0, "PIC code generation is not compatible with fast indirect calls");
536   }
537
538  if (! TARGET_GAS && write_symbols != NO_DEBUG)
539    {
540      warning (0, "-g is only supported when using GAS on this processor,");
541      warning (0, "-g option disabled");
542      write_symbols = NO_DEBUG;
543    }
544
545  /* We only support the "big PIC" model now.  And we always generate PIC
546     code when in 64bit mode.  */
547  if (flag_pic == 1 || TARGET_64BIT)
548    flag_pic = 2;
549
550  /* Disable -freorder-blocks-and-partition as we don't support hot and
551     cold partitioning.  */
552  if (flag_reorder_blocks_and_partition)
553    {
554      inform (input_location,
555              "-freorder-blocks-and-partition does not work "
556              "on this architecture");
557      flag_reorder_blocks_and_partition = 0;
558      flag_reorder_blocks = 1;
559    }
560
561  /* We can't guarantee that .dword is available for 32-bit targets.  */
562  if (UNITS_PER_WORD == 4)
563    targetm.asm_out.aligned_op.di = NULL;
564
565  /* The unaligned ops are only available when using GAS.  */
566  if (!TARGET_GAS)
567    {
568      targetm.asm_out.unaligned_op.hi = NULL;
569      targetm.asm_out.unaligned_op.si = NULL;
570      targetm.asm_out.unaligned_op.di = NULL;
571    }
572
573  init_machine_status = pa_init_machine_status;
574}
575
576enum pa_builtins
577{
578  PA_BUILTIN_COPYSIGNQ,
579  PA_BUILTIN_FABSQ,
580  PA_BUILTIN_INFQ,
581  PA_BUILTIN_HUGE_VALQ,
582  PA_BUILTIN_max
583};
584
585static GTY(()) tree pa_builtins[(int) PA_BUILTIN_max];
586
587static void
588pa_init_builtins (void)
589{
590#ifdef DONT_HAVE_FPUTC_UNLOCKED
591  {
592    tree decl = builtin_decl_explicit (BUILT_IN_PUTC_UNLOCKED);
593    set_builtin_decl (BUILT_IN_FPUTC_UNLOCKED, decl,
594		      builtin_decl_implicit_p (BUILT_IN_PUTC_UNLOCKED));
595  }
596#endif
597#if TARGET_HPUX_11
598  {
599    tree decl;
600
601    if ((decl = builtin_decl_explicit (BUILT_IN_FINITE)) != NULL_TREE)
602      set_user_assembler_name (decl, "_Isfinite");
603    if ((decl = builtin_decl_explicit (BUILT_IN_FINITEF)) != NULL_TREE)
604      set_user_assembler_name (decl, "_Isfinitef");
605  }
606#endif
607
608  if (HPUX_LONG_DOUBLE_LIBRARY)
609    {
610      tree decl, ftype;
611
612      /* Under HPUX, the __float128 type is a synonym for "long double".  */
613      (*lang_hooks.types.register_builtin_type) (long_double_type_node,
614						 "__float128");
615
616      /* TFmode support builtins.  */
617      ftype = build_function_type_list (long_double_type_node,
618					long_double_type_node,
619					NULL_TREE);
620      decl = add_builtin_function ("__builtin_fabsq", ftype,
621				   PA_BUILTIN_FABSQ, BUILT_IN_MD,
622				   "_U_Qfabs", NULL_TREE);
623      TREE_READONLY (decl) = 1;
624      pa_builtins[PA_BUILTIN_FABSQ] = decl;
625
626      ftype = build_function_type_list (long_double_type_node,
627					long_double_type_node,
628					long_double_type_node,
629					NULL_TREE);
630      decl = add_builtin_function ("__builtin_copysignq", ftype,
631				   PA_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
632				   "_U_Qfcopysign", NULL_TREE);
633      TREE_READONLY (decl) = 1;
634      pa_builtins[PA_BUILTIN_COPYSIGNQ] = decl;
635
636      ftype = build_function_type_list (long_double_type_node, NULL_TREE);
637      decl = add_builtin_function ("__builtin_infq", ftype,
638				   PA_BUILTIN_INFQ, BUILT_IN_MD,
639				   NULL, NULL_TREE);
640      pa_builtins[PA_BUILTIN_INFQ] = decl;
641
642      decl = add_builtin_function ("__builtin_huge_valq", ftype,
643                                   PA_BUILTIN_HUGE_VALQ, BUILT_IN_MD,
644                                   NULL, NULL_TREE);
645      pa_builtins[PA_BUILTIN_HUGE_VALQ] = decl;
646    }
647}
648
649static rtx
650pa_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
651		   machine_mode mode ATTRIBUTE_UNUSED,
652		   int ignore ATTRIBUTE_UNUSED)
653{
654  tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
655  unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
656
657  switch (fcode)
658    {
659    case PA_BUILTIN_FABSQ:
660    case PA_BUILTIN_COPYSIGNQ:
661      return expand_call (exp, target, ignore);
662
663    case PA_BUILTIN_INFQ:
664    case PA_BUILTIN_HUGE_VALQ:
665      {
666	machine_mode target_mode = TYPE_MODE (TREE_TYPE (exp));
667	REAL_VALUE_TYPE inf;
668	rtx tmp;
669
670	real_inf (&inf);
671	tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, target_mode);
672
673	tmp = validize_mem (force_const_mem (target_mode, tmp));
674
675	if (target == 0)
676	  target = gen_reg_rtx (target_mode);
677
678	emit_move_insn (target, tmp);
679	return target;
680      }
681
682    default:
683      gcc_unreachable ();
684    }
685
686  return NULL_RTX;
687}
688
689/* Function to init struct machine_function.
690   This will be called, via a pointer variable,
691   from push_function_context.  */
692
693static struct machine_function *
694pa_init_machine_status (void)
695{
696  return ggc_cleared_alloc<machine_function> ();
697}
698
699/* If FROM is a probable pointer register, mark TO as a probable
700   pointer register with the same pointer alignment as FROM.  */
701
702static void
703copy_reg_pointer (rtx to, rtx from)
704{
705  if (REG_POINTER (from))
706    mark_reg_pointer (to, REGNO_POINTER_ALIGN (REGNO (from)));
707}
708
709/* Return 1 if X contains a symbolic expression.  We know these
710   expressions will have one of a few well defined forms, so
711   we need only check those forms.  */
712int
713pa_symbolic_expression_p (rtx x)
714{
715
716  /* Strip off any HIGH.  */
717  if (GET_CODE (x) == HIGH)
718    x = XEXP (x, 0);
719
720  return symbolic_operand (x, VOIDmode);
721}
722
723/* Accept any constant that can be moved in one instruction into a
724   general register.  */
725int
726pa_cint_ok_for_move (HOST_WIDE_INT ival)
727{
728  /* OK if ldo, ldil, or zdepi, can be used.  */
729  return (VAL_14_BITS_P (ival)
730	  || pa_ldil_cint_p (ival)
731	  || pa_zdepi_cint_p (ival));
732}
733
734/* True iff ldil can be used to load this CONST_INT.  The least
735   significant 11 bits of the value must be zero and the value must
736   not change sign when extended from 32 to 64 bits.  */
737int
738pa_ldil_cint_p (HOST_WIDE_INT ival)
739{
740  HOST_WIDE_INT x = ival & (((HOST_WIDE_INT) -1 << 31) | 0x7ff);
741
742  return x == 0 || x == ((HOST_WIDE_INT) -1 << 31);
743}
744
745/* True iff zdepi can be used to generate this CONST_INT.
746   zdepi first sign extends a 5-bit signed number to a given field
747   length, then places this field anywhere in a zero.  */
748int
749pa_zdepi_cint_p (unsigned HOST_WIDE_INT x)
750{
751  unsigned HOST_WIDE_INT lsb_mask, t;
752
753  /* This might not be obvious, but it's at least fast.
754     This function is critical; we don't have the time loops would take.  */
755  lsb_mask = x & -x;
756  t = ((x >> 4) + lsb_mask) & ~(lsb_mask - 1);
757  /* Return true iff t is a power of two.  */
758  return ((t & (t - 1)) == 0);
759}
760
761/* True iff depi or extru can be used to compute (reg & mask).
762   Accept bit pattern like these:
763   0....01....1
764   1....10....0
765   1..10..01..1  */
766int
767pa_and_mask_p (unsigned HOST_WIDE_INT mask)
768{
769  mask = ~mask;
770  mask += mask & -mask;
771  return (mask & (mask - 1)) == 0;
772}
773
774/* True iff depi can be used to compute (reg | MASK).  */
775int
776pa_ior_mask_p (unsigned HOST_WIDE_INT mask)
777{
778  mask += mask & -mask;
779  return (mask & (mask - 1)) == 0;
780}
781
782/* Legitimize PIC addresses.  If the address is already
783   position-independent, we return ORIG.  Newly generated
784   position-independent addresses go to REG.  If we need more
785   than one register, we lose.  */
786
787static rtx
788legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
789{
790  rtx pic_ref = orig;
791
792  gcc_assert (!PA_SYMBOL_REF_TLS_P (orig));
793
794  /* Labels need special handling.  */
795  if (pic_label_operand (orig, mode))
796    {
797      rtx_insn *insn;
798
799      /* We do not want to go through the movXX expanders here since that
800	 would create recursion.
801
802	 Nor do we really want to call a generator for a named pattern
803	 since that requires multiple patterns if we want to support
804	 multiple word sizes.
805
806	 So instead we just emit the raw set, which avoids the movXX
807	 expanders completely.  */
808      mark_reg_pointer (reg, BITS_PER_UNIT);
809      insn = emit_insn (gen_rtx_SET (VOIDmode, reg, orig));
810
811      /* Put a REG_EQUAL note on this insn, so that it can be optimized.  */
812      add_reg_note (insn, REG_EQUAL, orig);
813
814      /* During and after reload, we need to generate a REG_LABEL_OPERAND note
815	 and update LABEL_NUSES because this is not done automatically.  */
816      if (reload_in_progress || reload_completed)
817	{
818	  /* Extract LABEL_REF.  */
819	  if (GET_CODE (orig) == CONST)
820	    orig = XEXP (XEXP (orig, 0), 0);
821	  /* Extract CODE_LABEL.  */
822	  orig = XEXP (orig, 0);
823	  add_reg_note (insn, REG_LABEL_OPERAND, orig);
824	  /* Make sure we have label and not a note.  */
825	  if (LABEL_P (orig))
826	    LABEL_NUSES (orig)++;
827	}
828      crtl->uses_pic_offset_table = 1;
829      return reg;
830    }
831  if (GET_CODE (orig) == SYMBOL_REF)
832    {
833      rtx_insn *insn;
834      rtx tmp_reg;
835
836      gcc_assert (reg);
837
838      /* Before reload, allocate a temporary register for the intermediate
839	 result.  This allows the sequence to be deleted when the final
840	 result is unused and the insns are trivially dead.  */
841      tmp_reg = ((reload_in_progress || reload_completed)
842		 ? reg : gen_reg_rtx (Pmode));
843
844      if (function_label_operand (orig, VOIDmode))
845	{
846	  /* Force function label into memory in word mode.  */
847	  orig = XEXP (force_const_mem (word_mode, orig), 0);
848	  /* Load plabel address from DLT.  */
849	  emit_move_insn (tmp_reg,
850			  gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
851					gen_rtx_HIGH (word_mode, orig)));
852	  pic_ref
853	    = gen_const_mem (Pmode,
854			     gen_rtx_LO_SUM (Pmode, tmp_reg,
855					     gen_rtx_UNSPEC (Pmode,
856						         gen_rtvec (1, orig),
857						         UNSPEC_DLTIND14R)));
858	  emit_move_insn (reg, pic_ref);
859	  /* Now load address of function descriptor.  */
860	  pic_ref = gen_rtx_MEM (Pmode, reg);
861	}
862      else
863	{
864	  /* Load symbol reference from DLT.  */
865	  emit_move_insn (tmp_reg,
866			  gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
867					gen_rtx_HIGH (word_mode, orig)));
868	  pic_ref
869	    = gen_const_mem (Pmode,
870			     gen_rtx_LO_SUM (Pmode, tmp_reg,
871					     gen_rtx_UNSPEC (Pmode,
872						         gen_rtvec (1, orig),
873						         UNSPEC_DLTIND14R)));
874	}
875
876      crtl->uses_pic_offset_table = 1;
877      mark_reg_pointer (reg, BITS_PER_UNIT);
878      insn = emit_move_insn (reg, pic_ref);
879
880      /* Put a REG_EQUAL note on this insn, so that it can be optimized.  */
881      set_unique_reg_note (insn, REG_EQUAL, orig);
882
883      return reg;
884    }
885  else if (GET_CODE (orig) == CONST)
886    {
887      rtx base;
888
889      if (GET_CODE (XEXP (orig, 0)) == PLUS
890	  && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
891	return orig;
892
893      gcc_assert (reg);
894      gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
895
896      base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
897      orig = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
898				     base == reg ? 0 : reg);
899
900      if (GET_CODE (orig) == CONST_INT)
901	{
902	  if (INT_14_BITS (orig))
903	    return plus_constant (Pmode, base, INTVAL (orig));
904	  orig = force_reg (Pmode, orig);
905	}
906      pic_ref = gen_rtx_PLUS (Pmode, base, orig);
907      /* Likewise, should we set special REG_NOTEs here?  */
908    }
909
910  return pic_ref;
911}
912
913static GTY(()) rtx gen_tls_tga;
914
915static rtx
916gen_tls_get_addr (void)
917{
918  if (!gen_tls_tga)
919    gen_tls_tga = init_one_libfunc ("__tls_get_addr");
920  return gen_tls_tga;
921}
922
923static rtx
924hppa_tls_call (rtx arg)
925{
926  rtx ret;
927
928  ret = gen_reg_rtx (Pmode);
929  emit_library_call_value (gen_tls_get_addr (), ret,
930		  	   LCT_CONST, Pmode, 1, arg, Pmode);
931
932  return ret;
933}
934
935static rtx
936legitimize_tls_address (rtx addr)
937{
938  rtx ret, tmp, t1, t2, tp;
939  rtx_insn *insn;
940
941  /* Currently, we can't handle anything but a SYMBOL_REF.  */
942  if (GET_CODE (addr) != SYMBOL_REF)
943    return addr;
944
945  switch (SYMBOL_REF_TLS_MODEL (addr))
946    {
947      case TLS_MODEL_GLOBAL_DYNAMIC:
948	tmp = gen_reg_rtx (Pmode);
949	if (flag_pic)
950	  emit_insn (gen_tgd_load_pic (tmp, addr));
951	else
952	  emit_insn (gen_tgd_load (tmp, addr));
953	ret = hppa_tls_call (tmp);
954	break;
955
956      case TLS_MODEL_LOCAL_DYNAMIC:
957	ret = gen_reg_rtx (Pmode);
958	tmp = gen_reg_rtx (Pmode);
959	start_sequence ();
960	if (flag_pic)
961	  emit_insn (gen_tld_load_pic (tmp, addr));
962	else
963	  emit_insn (gen_tld_load (tmp, addr));
964	t1 = hppa_tls_call (tmp);
965	insn = get_insns ();
966	end_sequence ();
967	t2 = gen_reg_rtx (Pmode);
968	emit_libcall_block (insn, t2, t1,
969			    gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
970				            UNSPEC_TLSLDBASE));
971	emit_insn (gen_tld_offset_load (ret, addr, t2));
972	break;
973
974      case TLS_MODEL_INITIAL_EXEC:
975	tp = gen_reg_rtx (Pmode);
976	tmp = gen_reg_rtx (Pmode);
977	ret = gen_reg_rtx (Pmode);
978	emit_insn (gen_tp_load (tp));
979	if (flag_pic)
980	  emit_insn (gen_tie_load_pic (tmp, addr));
981	else
982	  emit_insn (gen_tie_load (tmp, addr));
983	emit_move_insn (ret, gen_rtx_PLUS (Pmode, tp, tmp));
984	break;
985
986      case TLS_MODEL_LOCAL_EXEC:
987	tp = gen_reg_rtx (Pmode);
988	ret = gen_reg_rtx (Pmode);
989	emit_insn (gen_tp_load (tp));
990	emit_insn (gen_tle_load (ret, addr, tp));
991	break;
992
993      default:
994	gcc_unreachable ();
995    }
996
997  return ret;
998}
999
1000/* Try machine-dependent ways of modifying an illegitimate address
1001   to be legitimate.  If we find one, return the new, valid address.
1002   This macro is used in only one place: `memory_address' in explow.c.
1003
1004   OLDX is the address as it was before break_out_memory_refs was called.
1005   In some cases it is useful to look at this to decide what needs to be done.
1006
1007   It is always safe for this macro to do nothing.  It exists to recognize
1008   opportunities to optimize the output.
1009
1010   For the PA, transform:
1011
1012	memory(X + <large int>)
1013
1014   into:
1015
1016	if (<large int> & mask) >= 16
1017	  Y = (<large int> & ~mask) + mask + 1	Round up.
1018	else
1019	  Y = (<large int> & ~mask)		Round down.
1020	Z = X + Y
1021	memory (Z + (<large int> - Y));
1022
1023   This is for CSE to find several similar references, and only use one Z.
1024
1025   X can either be a SYMBOL_REF or REG, but because combine cannot
1026   perform a 4->2 combination we do nothing for SYMBOL_REF + D where
1027   D will not fit in 14 bits.
1028
1029   MODE_FLOAT references allow displacements which fit in 5 bits, so use
1030   0x1f as the mask.
1031
1032   MODE_INT references allow displacements which fit in 14 bits, so use
1033   0x3fff as the mask.
1034
1035   This relies on the fact that most mode MODE_FLOAT references will use FP
1036   registers and most mode MODE_INT references will use integer registers.
1037   (In the rare case of an FP register used in an integer MODE, we depend
1038   on secondary reloads to clean things up.)
1039
1040
1041   It is also beneficial to handle (plus (mult (X) (Y)) (Z)) in a special
1042   manner if Y is 2, 4, or 8.  (allows more shadd insns and shifted indexed
1043   addressing modes to be used).
1044
1045   Put X and Z into registers.  Then put the entire expression into
1046   a register.  */
1047
1048rtx
1049hppa_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
1050			 machine_mode mode)
1051{
1052  rtx orig = x;
1053
1054  /* We need to canonicalize the order of operands in unscaled indexed
1055     addresses since the code that checks if an address is valid doesn't
1056     always try both orders.  */
1057  if (!TARGET_NO_SPACE_REGS
1058      && GET_CODE (x) == PLUS
1059      && GET_MODE (x) == Pmode
1060      && REG_P (XEXP (x, 0))
1061      && REG_P (XEXP (x, 1))
1062      && REG_POINTER (XEXP (x, 0))
1063      && !REG_POINTER (XEXP (x, 1)))
1064    return gen_rtx_PLUS (Pmode, XEXP (x, 1), XEXP (x, 0));
1065
1066  if (tls_referenced_p (x))
1067    return legitimize_tls_address (x);
1068  else if (flag_pic)
1069    return legitimize_pic_address (x, mode, gen_reg_rtx (Pmode));
1070
1071  /* Strip off CONST.  */
1072  if (GET_CODE (x) == CONST)
1073    x = XEXP (x, 0);
1074
1075  /* Special case.  Get the SYMBOL_REF into a register and use indexing.
1076     That should always be safe.  */
1077  if (GET_CODE (x) == PLUS
1078      && GET_CODE (XEXP (x, 0)) == REG
1079      && GET_CODE (XEXP (x, 1)) == SYMBOL_REF)
1080    {
1081      rtx reg = force_reg (Pmode, XEXP (x, 1));
1082      return force_reg (Pmode, gen_rtx_PLUS (Pmode, reg, XEXP (x, 0)));
1083    }
1084
1085  /* Note we must reject symbols which represent function addresses
1086     since the assembler/linker can't handle arithmetic on plabels.  */
1087  if (GET_CODE (x) == PLUS
1088      && GET_CODE (XEXP (x, 1)) == CONST_INT
1089      && ((GET_CODE (XEXP (x, 0)) == SYMBOL_REF
1090	   && !FUNCTION_NAME_P (XSTR (XEXP (x, 0), 0)))
1091	  || GET_CODE (XEXP (x, 0)) == REG))
1092    {
1093      rtx int_part, ptr_reg;
1094      int newoffset;
1095      int offset = INTVAL (XEXP (x, 1));
1096      int mask;
1097
1098      mask = (GET_MODE_CLASS (mode) == MODE_FLOAT
1099	      && !INT14_OK_STRICT ? 0x1f : 0x3fff);
1100
1101      /* Choose which way to round the offset.  Round up if we
1102	 are >= halfway to the next boundary.  */
1103      if ((offset & mask) >= ((mask + 1) / 2))
1104	newoffset = (offset & ~ mask) + mask + 1;
1105      else
1106	newoffset = (offset & ~ mask);
1107
1108      /* If the newoffset will not fit in 14 bits (ldo), then
1109	 handling this would take 4 or 5 instructions (2 to load
1110	 the SYMBOL_REF + 1 or 2 to load the newoffset + 1 to
1111	 add the new offset and the SYMBOL_REF.)  Combine can
1112	 not handle 4->2 or 5->2 combinations, so do not create
1113	 them.  */
1114      if (! VAL_14_BITS_P (newoffset)
1115	  && GET_CODE (XEXP (x, 0)) == SYMBOL_REF)
1116	{
1117	  rtx const_part = plus_constant (Pmode, XEXP (x, 0), newoffset);
1118	  rtx tmp_reg
1119	    = force_reg (Pmode,
1120			 gen_rtx_HIGH (Pmode, const_part));
1121	  ptr_reg
1122	    = force_reg (Pmode,
1123			 gen_rtx_LO_SUM (Pmode,
1124					 tmp_reg, const_part));
1125	}
1126      else
1127	{
1128	  if (! VAL_14_BITS_P (newoffset))
1129	    int_part = force_reg (Pmode, GEN_INT (newoffset));
1130	  else
1131	    int_part = GEN_INT (newoffset);
1132
1133	  ptr_reg = force_reg (Pmode,
1134			       gen_rtx_PLUS (Pmode,
1135					     force_reg (Pmode, XEXP (x, 0)),
1136					     int_part));
1137	}
1138      return plus_constant (Pmode, ptr_reg, offset - newoffset);
1139    }
1140
1141  /* Handle (plus (mult (a) (shadd_constant)) (b)).  */
1142
1143  if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT
1144      && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1145      && pa_shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1)))
1146      && (OBJECT_P (XEXP (x, 1))
1147	  || GET_CODE (XEXP (x, 1)) == SUBREG)
1148      && GET_CODE (XEXP (x, 1)) != CONST)
1149    {
1150      int val = INTVAL (XEXP (XEXP (x, 0), 1));
1151      rtx reg1, reg2;
1152
1153      reg1 = XEXP (x, 1);
1154      if (GET_CODE (reg1) != REG)
1155	reg1 = force_reg (Pmode, force_operand (reg1, 0));
1156
1157      reg2 = XEXP (XEXP (x, 0), 0);
1158      if (GET_CODE (reg2) != REG)
1159        reg2 = force_reg (Pmode, force_operand (reg2, 0));
1160
1161      return force_reg (Pmode, gen_rtx_PLUS (Pmode,
1162					     gen_rtx_MULT (Pmode,
1163							   reg2,
1164							   GEN_INT (val)),
1165					     reg1));
1166    }
1167
1168  /* Similarly for (plus (plus (mult (a) (shadd_constant)) (b)) (c)).
1169
1170     Only do so for floating point modes since this is more speculative
1171     and we lose if it's an integer store.  */
1172  if (GET_CODE (x) == PLUS
1173      && GET_CODE (XEXP (x, 0)) == PLUS
1174      && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
1175      && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
1176      && pa_shadd_constant_p (INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1)))
1177      && (mode == SFmode || mode == DFmode))
1178    {
1179
1180      /* First, try and figure out what to use as a base register.  */
1181      rtx reg1, reg2, base, idx;
1182
1183      reg1 = XEXP (XEXP (x, 0), 1);
1184      reg2 = XEXP (x, 1);
1185      base = NULL_RTX;
1186      idx = NULL_RTX;
1187
1188      /* Make sure they're both regs.  If one was a SYMBOL_REF [+ const],
1189	 then pa_emit_move_sequence will turn on REG_POINTER so we'll know
1190	 it's a base register below.  */
1191      if (GET_CODE (reg1) != REG)
1192	reg1 = force_reg (Pmode, force_operand (reg1, 0));
1193
1194      if (GET_CODE (reg2) != REG)
1195	reg2 = force_reg (Pmode, force_operand (reg2, 0));
1196
1197      /* Figure out what the base and index are.  */
1198
1199      if (GET_CODE (reg1) == REG
1200	  && REG_POINTER (reg1))
1201	{
1202	  base = reg1;
1203	  idx = gen_rtx_PLUS (Pmode,
1204			      gen_rtx_MULT (Pmode,
1205					    XEXP (XEXP (XEXP (x, 0), 0), 0),
1206					    XEXP (XEXP (XEXP (x, 0), 0), 1)),
1207			      XEXP (x, 1));
1208	}
1209      else if (GET_CODE (reg2) == REG
1210	       && REG_POINTER (reg2))
1211	{
1212	  base = reg2;
1213	  idx = XEXP (x, 0);
1214	}
1215
1216      if (base == 0)
1217	return orig;
1218
1219      /* If the index adds a large constant, try to scale the
1220	 constant so that it can be loaded with only one insn.  */
1221      if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1222	  && VAL_14_BITS_P (INTVAL (XEXP (idx, 1))
1223			    / INTVAL (XEXP (XEXP (idx, 0), 1)))
1224	  && INTVAL (XEXP (idx, 1)) % INTVAL (XEXP (XEXP (idx, 0), 1)) == 0)
1225	{
1226	  /* Divide the CONST_INT by the scale factor, then add it to A.  */
1227	  int val = INTVAL (XEXP (idx, 1));
1228
1229	  val /= INTVAL (XEXP (XEXP (idx, 0), 1));
1230	  reg1 = XEXP (XEXP (idx, 0), 0);
1231	  if (GET_CODE (reg1) != REG)
1232	    reg1 = force_reg (Pmode, force_operand (reg1, 0));
1233
1234	  reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, reg1, GEN_INT (val)));
1235
1236	  /* We can now generate a simple scaled indexed address.  */
1237	  return
1238	    force_reg
1239	      (Pmode, gen_rtx_PLUS (Pmode,
1240				    gen_rtx_MULT (Pmode, reg1,
1241						  XEXP (XEXP (idx, 0), 1)),
1242				    base));
1243	}
1244
1245      /* If B + C is still a valid base register, then add them.  */
1246      if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1247	  && INTVAL (XEXP (idx, 1)) <= 4096
1248	  && INTVAL (XEXP (idx, 1)) >= -4096)
1249	{
1250	  int val = INTVAL (XEXP (XEXP (idx, 0), 1));
1251	  rtx reg1, reg2;
1252
1253	  reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, XEXP (idx, 1)));
1254
1255	  reg2 = XEXP (XEXP (idx, 0), 0);
1256	  if (GET_CODE (reg2) != CONST_INT)
1257	    reg2 = force_reg (Pmode, force_operand (reg2, 0));
1258
1259	  return force_reg (Pmode, gen_rtx_PLUS (Pmode,
1260						 gen_rtx_MULT (Pmode,
1261							       reg2,
1262							       GEN_INT (val)),
1263						 reg1));
1264	}
1265
1266      /* Get the index into a register, then add the base + index and
1267	 return a register holding the result.  */
1268
1269      /* First get A into a register.  */
1270      reg1 = XEXP (XEXP (idx, 0), 0);
1271      if (GET_CODE (reg1) != REG)
1272	reg1 = force_reg (Pmode, force_operand (reg1, 0));
1273
1274      /* And get B into a register.  */
1275      reg2 = XEXP (idx, 1);
1276      if (GET_CODE (reg2) != REG)
1277	reg2 = force_reg (Pmode, force_operand (reg2, 0));
1278
1279      reg1 = force_reg (Pmode,
1280			gen_rtx_PLUS (Pmode,
1281				      gen_rtx_MULT (Pmode, reg1,
1282						    XEXP (XEXP (idx, 0), 1)),
1283				      reg2));
1284
1285      /* Add the result to our base register and return.  */
1286      return force_reg (Pmode, gen_rtx_PLUS (Pmode, base, reg1));
1287
1288    }
1289
1290  /* Uh-oh.  We might have an address for x[n-100000].  This needs
1291     special handling to avoid creating an indexed memory address
1292     with x-100000 as the base.
1293
1294     If the constant part is small enough, then it's still safe because
1295     there is a guard page at the beginning and end of the data segment.
1296
1297     Scaled references are common enough that we want to try and rearrange the
1298     terms so that we can use indexing for these addresses too.  Only
1299     do the optimization for floatint point modes.  */
1300
1301  if (GET_CODE (x) == PLUS
1302      && pa_symbolic_expression_p (XEXP (x, 1)))
1303    {
1304      /* Ugly.  We modify things here so that the address offset specified
1305	 by the index expression is computed first, then added to x to form
1306	 the entire address.  */
1307
1308      rtx regx1, regx2, regy1, regy2, y;
1309
1310      /* Strip off any CONST.  */
1311      y = XEXP (x, 1);
1312      if (GET_CODE (y) == CONST)
1313	y = XEXP (y, 0);
1314
1315      if (GET_CODE (y) == PLUS || GET_CODE (y) == MINUS)
1316	{
1317	  /* See if this looks like
1318		(plus (mult (reg) (shadd_const))
1319		      (const (plus (symbol_ref) (const_int))))
1320
1321	     Where const_int is small.  In that case the const
1322	     expression is a valid pointer for indexing.
1323
1324	     If const_int is big, but can be divided evenly by shadd_const
1325	     and added to (reg).  This allows more scaled indexed addresses.  */
1326	  if (GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1327	      && GET_CODE (XEXP (x, 0)) == MULT
1328	      && GET_CODE (XEXP (y, 1)) == CONST_INT
1329	      && INTVAL (XEXP (y, 1)) >= -4096
1330	      && INTVAL (XEXP (y, 1)) <= 4095
1331	      && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1332	      && pa_shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))))
1333	    {
1334	      int val = INTVAL (XEXP (XEXP (x, 0), 1));
1335	      rtx reg1, reg2;
1336
1337	      reg1 = XEXP (x, 1);
1338	      if (GET_CODE (reg1) != REG)
1339		reg1 = force_reg (Pmode, force_operand (reg1, 0));
1340
1341	      reg2 = XEXP (XEXP (x, 0), 0);
1342	      if (GET_CODE (reg2) != REG)
1343	        reg2 = force_reg (Pmode, force_operand (reg2, 0));
1344
1345	      return force_reg (Pmode,
1346				gen_rtx_PLUS (Pmode,
1347					      gen_rtx_MULT (Pmode,
1348							    reg2,
1349							    GEN_INT (val)),
1350					      reg1));
1351	    }
1352	  else if ((mode == DFmode || mode == SFmode)
1353		   && GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1354		   && GET_CODE (XEXP (x, 0)) == MULT
1355		   && GET_CODE (XEXP (y, 1)) == CONST_INT
1356		   && INTVAL (XEXP (y, 1)) % INTVAL (XEXP (XEXP (x, 0), 1)) == 0
1357		   && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1358		   && pa_shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))))
1359	    {
1360	      regx1
1361		= force_reg (Pmode, GEN_INT (INTVAL (XEXP (y, 1))
1362					     / INTVAL (XEXP (XEXP (x, 0), 1))));
1363	      regx2 = XEXP (XEXP (x, 0), 0);
1364	      if (GET_CODE (regx2) != REG)
1365		regx2 = force_reg (Pmode, force_operand (regx2, 0));
1366	      regx2 = force_reg (Pmode, gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1367							regx2, regx1));
1368	      return
1369		force_reg (Pmode,
1370			   gen_rtx_PLUS (Pmode,
1371					 gen_rtx_MULT (Pmode, regx2,
1372						       XEXP (XEXP (x, 0), 1)),
1373					 force_reg (Pmode, XEXP (y, 0))));
1374	    }
1375	  else if (GET_CODE (XEXP (y, 1)) == CONST_INT
1376		   && INTVAL (XEXP (y, 1)) >= -4096
1377		   && INTVAL (XEXP (y, 1)) <= 4095)
1378	    {
1379	      /* This is safe because of the guard page at the
1380		 beginning and end of the data space.  Just
1381		 return the original address.  */
1382	      return orig;
1383	    }
1384	  else
1385	    {
1386	      /* Doesn't look like one we can optimize.  */
1387	      regx1 = force_reg (Pmode, force_operand (XEXP (x, 0), 0));
1388	      regy1 = force_reg (Pmode, force_operand (XEXP (y, 0), 0));
1389	      regy2 = force_reg (Pmode, force_operand (XEXP (y, 1), 0));
1390	      regx1 = force_reg (Pmode,
1391				 gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1392						 regx1, regy2));
1393	      return force_reg (Pmode, gen_rtx_PLUS (Pmode, regx1, regy1));
1394	    }
1395	}
1396    }
1397
1398  return orig;
1399}
1400
1401/* Implement the TARGET_REGISTER_MOVE_COST hook.
1402
1403   Compute extra cost of moving data between one register class
1404   and another.
1405
1406   Make moves from SAR so expensive they should never happen.  We used to
1407   have 0xffff here, but that generates overflow in rare cases.
1408
1409   Copies involving a FP register and a non-FP register are relatively
1410   expensive because they must go through memory.
1411
1412   Other copies are reasonably cheap.  */
1413
1414static int
1415hppa_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
1416			 reg_class_t from, reg_class_t to)
1417{
1418  if (from == SHIFT_REGS)
1419    return 0x100;
1420  else if (to == SHIFT_REGS && FP_REG_CLASS_P (from))
1421    return 18;
1422  else if ((FP_REG_CLASS_P (from) && ! FP_REG_CLASS_P (to))
1423           || (FP_REG_CLASS_P (to) && ! FP_REG_CLASS_P (from)))
1424    return 16;
1425  else
1426    return 2;
1427}
1428
1429/* For the HPPA, REG and REG+CONST is cost 0
1430   and addresses involving symbolic constants are cost 2.
1431
1432   PIC addresses are very expensive.
1433
1434   It is no coincidence that this has the same structure
1435   as pa_legitimate_address_p.  */
1436
1437static int
1438hppa_address_cost (rtx X, machine_mode mode ATTRIBUTE_UNUSED,
1439		   addr_space_t as ATTRIBUTE_UNUSED,
1440		   bool speed ATTRIBUTE_UNUSED)
1441{
1442  switch (GET_CODE (X))
1443    {
1444    case REG:
1445    case PLUS:
1446    case LO_SUM:
1447      return 1;
1448    case HIGH:
1449      return 2;
1450    default:
1451      return 4;
1452    }
1453}
1454
1455/* Compute a (partial) cost for rtx X.  Return true if the complete
1456   cost has been computed, and false if subexpressions should be
1457   scanned.  In either case, *TOTAL contains the cost result.  */
1458
1459static bool
1460hppa_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
1461		int *total, bool speed ATTRIBUTE_UNUSED)
1462{
1463  int factor;
1464
1465  switch (code)
1466    {
1467    case CONST_INT:
1468      if (INTVAL (x) == 0)
1469	*total = 0;
1470      else if (INT_14_BITS (x))
1471	*total = 1;
1472      else
1473	*total = 2;
1474      return true;
1475
1476    case HIGH:
1477      *total = 2;
1478      return true;
1479
1480    case CONST:
1481    case LABEL_REF:
1482    case SYMBOL_REF:
1483      *total = 4;
1484      return true;
1485
1486    case CONST_DOUBLE:
1487      if ((x == CONST0_RTX (DFmode) || x == CONST0_RTX (SFmode))
1488	  && outer_code != SET)
1489	*total = 0;
1490      else
1491        *total = 8;
1492      return true;
1493
1494    case MULT:
1495      if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1496	{
1497	  *total = COSTS_N_INSNS (3);
1498	  return true;
1499	}
1500
1501      /* A mode size N times larger than SImode needs O(N*N) more insns.  */
1502      factor = GET_MODE_SIZE (GET_MODE (x)) / 4;
1503      if (factor == 0)
1504	factor = 1;
1505
1506      if (TARGET_PA_11 && !TARGET_DISABLE_FPREGS && !TARGET_SOFT_FLOAT)
1507	*total = factor * factor * COSTS_N_INSNS (8);
1508      else
1509	*total = factor * factor * COSTS_N_INSNS (20);
1510      return true;
1511
1512    case DIV:
1513      if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1514	{
1515	  *total = COSTS_N_INSNS (14);
1516	  return true;
1517	}
1518      /* FALLTHRU */
1519
1520    case UDIV:
1521    case MOD:
1522    case UMOD:
1523      /* A mode size N times larger than SImode needs O(N*N) more insns.  */
1524      factor = GET_MODE_SIZE (GET_MODE (x)) / 4;
1525      if (factor == 0)
1526	factor = 1;
1527
1528      *total = factor * factor * COSTS_N_INSNS (60);
1529      return true;
1530
1531    case PLUS: /* this includes shNadd insns */
1532    case MINUS:
1533      if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1534	{
1535	  *total = COSTS_N_INSNS (3);
1536	  return true;
1537	}
1538
1539      /* A size N times larger than UNITS_PER_WORD needs N times as
1540	 many insns, taking N times as long.  */
1541      factor = GET_MODE_SIZE (GET_MODE (x)) / UNITS_PER_WORD;
1542      if (factor == 0)
1543	factor = 1;
1544      *total = factor * COSTS_N_INSNS (1);
1545      return true;
1546
1547    case ASHIFT:
1548    case ASHIFTRT:
1549    case LSHIFTRT:
1550      *total = COSTS_N_INSNS (1);
1551      return true;
1552
1553    default:
1554      return false;
1555    }
1556}
1557
1558/* Ensure mode of ORIG, a REG rtx, is MODE.  Returns either ORIG or a
1559   new rtx with the correct mode.  */
1560static inline rtx
1561force_mode (machine_mode mode, rtx orig)
1562{
1563  if (mode == GET_MODE (orig))
1564    return orig;
1565
1566  gcc_assert (REGNO (orig) < FIRST_PSEUDO_REGISTER);
1567
1568  return gen_rtx_REG (mode, REGNO (orig));
1569}
1570
1571/* Implement TARGET_CANNOT_FORCE_CONST_MEM.  */
1572
1573static bool
1574pa_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
1575{
1576  return tls_referenced_p (x);
1577}
1578
1579/* Emit insns to move operands[1] into operands[0].
1580
1581   Return 1 if we have written out everything that needs to be done to
1582   do the move.  Otherwise, return 0 and the caller will emit the move
1583   normally.
1584
1585   Note SCRATCH_REG may not be in the proper mode depending on how it
1586   will be used.  This routine is responsible for creating a new copy
1587   of SCRATCH_REG in the proper mode.  */
1588
1589int
1590pa_emit_move_sequence (rtx *operands, machine_mode mode, rtx scratch_reg)
1591{
1592  register rtx operand0 = operands[0];
1593  register rtx operand1 = operands[1];
1594  register rtx tem;
1595
1596  /* We can only handle indexed addresses in the destination operand
1597     of floating point stores.  Thus, we need to break out indexed
1598     addresses from the destination operand.  */
1599  if (GET_CODE (operand0) == MEM && IS_INDEX_ADDR_P (XEXP (operand0, 0)))
1600    {
1601      gcc_assert (can_create_pseudo_p ());
1602
1603      tem = copy_to_mode_reg (Pmode, XEXP (operand0, 0));
1604      operand0 = replace_equiv_address (operand0, tem);
1605    }
1606
1607  /* On targets with non-equivalent space registers, break out unscaled
1608     indexed addresses from the source operand before the final CSE.
1609     We have to do this because the REG_POINTER flag is not correctly
1610     carried through various optimization passes and CSE may substitute
1611     a pseudo without the pointer set for one with the pointer set.  As
1612     a result, we loose various opportunities to create insns with
1613     unscaled indexed addresses.  */
1614  if (!TARGET_NO_SPACE_REGS
1615      && !cse_not_expected
1616      && GET_CODE (operand1) == MEM
1617      && GET_CODE (XEXP (operand1, 0)) == PLUS
1618      && REG_P (XEXP (XEXP (operand1, 0), 0))
1619      && REG_P (XEXP (XEXP (operand1, 0), 1)))
1620    operand1
1621      = replace_equiv_address (operand1,
1622			       copy_to_mode_reg (Pmode, XEXP (operand1, 0)));
1623
1624  if (scratch_reg
1625      && reload_in_progress && GET_CODE (operand0) == REG
1626      && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1627    operand0 = reg_equiv_mem (REGNO (operand0));
1628  else if (scratch_reg
1629	   && reload_in_progress && GET_CODE (operand0) == SUBREG
1630	   && GET_CODE (SUBREG_REG (operand0)) == REG
1631	   && REGNO (SUBREG_REG (operand0)) >= FIRST_PSEUDO_REGISTER)
1632    {
1633     /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1634	the code which tracks sets/uses for delete_output_reload.  */
1635      rtx temp = gen_rtx_SUBREG (GET_MODE (operand0),
1636				 reg_equiv_mem (REGNO (SUBREG_REG (operand0))),
1637				 SUBREG_BYTE (operand0));
1638      operand0 = alter_subreg (&temp, true);
1639    }
1640
1641  if (scratch_reg
1642      && reload_in_progress && GET_CODE (operand1) == REG
1643      && REGNO (operand1) >= FIRST_PSEUDO_REGISTER)
1644    operand1 = reg_equiv_mem (REGNO (operand1));
1645  else if (scratch_reg
1646	   && reload_in_progress && GET_CODE (operand1) == SUBREG
1647	   && GET_CODE (SUBREG_REG (operand1)) == REG
1648	   && REGNO (SUBREG_REG (operand1)) >= FIRST_PSEUDO_REGISTER)
1649    {
1650     /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1651	the code which tracks sets/uses for delete_output_reload.  */
1652      rtx temp = gen_rtx_SUBREG (GET_MODE (operand1),
1653				 reg_equiv_mem (REGNO (SUBREG_REG (operand1))),
1654				 SUBREG_BYTE (operand1));
1655      operand1 = alter_subreg (&temp, true);
1656    }
1657
1658  if (scratch_reg && reload_in_progress && GET_CODE (operand0) == MEM
1659      && ((tem = find_replacement (&XEXP (operand0, 0)))
1660	  != XEXP (operand0, 0)))
1661    operand0 = replace_equiv_address (operand0, tem);
1662
1663  if (scratch_reg && reload_in_progress && GET_CODE (operand1) == MEM
1664      && ((tem = find_replacement (&XEXP (operand1, 0)))
1665	  != XEXP (operand1, 0)))
1666    operand1 = replace_equiv_address (operand1, tem);
1667
1668  /* Handle secondary reloads for loads/stores of FP registers from
1669     REG+D addresses where D does not fit in 5 or 14 bits, including
1670     (subreg (mem (addr))) cases, and reloads for other unsupported
1671     memory operands.  */
1672  if (scratch_reg
1673      && FP_REG_P (operand0)
1674      && (MEM_P (operand1)
1675	  || (GET_CODE (operand1) == SUBREG
1676	      && MEM_P (XEXP (operand1, 0)))))
1677    {
1678      rtx op1 = operand1;
1679
1680      if (GET_CODE (op1) == SUBREG)
1681	op1 = XEXP (op1, 0);
1682
1683      if (reg_plus_base_memory_operand (op1, GET_MODE (op1)))
1684	{
1685	  if (!(TARGET_PA_20
1686		&& !TARGET_ELF32
1687		&& INT_14_BITS (XEXP (XEXP (op1, 0), 1)))
1688	      && !INT_5_BITS (XEXP (XEXP (op1, 0), 1)))
1689	    {
1690	      /* SCRATCH_REG will hold an address and maybe the actual data.
1691		 We want it in WORD_MODE regardless of what mode it was
1692		 originally given to us.  */
1693	      scratch_reg = force_mode (word_mode, scratch_reg);
1694
1695	      /* D might not fit in 14 bits either; for such cases load D
1696		 into scratch reg.  */
1697	      if (!INT_14_BITS (XEXP (XEXP (op1, 0), 1)))
1698		{
1699		  emit_move_insn (scratch_reg, XEXP (XEXP (op1, 0), 1));
1700		  emit_move_insn (scratch_reg,
1701				  gen_rtx_fmt_ee (GET_CODE (XEXP (op1, 0)),
1702						  Pmode,
1703						  XEXP (XEXP (op1, 0), 0),
1704						  scratch_reg));
1705		}
1706	      else
1707		emit_move_insn (scratch_reg, XEXP (op1, 0));
1708	      emit_insn (gen_rtx_SET (VOIDmode, operand0,
1709				  replace_equiv_address (op1, scratch_reg)));
1710	      return 1;
1711	    }
1712	}
1713      else if ((!INT14_OK_STRICT && symbolic_memory_operand (op1, VOIDmode))
1714	       || IS_LO_SUM_DLT_ADDR_P (XEXP (op1, 0))
1715	       || IS_INDEX_ADDR_P (XEXP (op1, 0)))
1716	{
1717	  /* Load memory address into SCRATCH_REG.  */
1718	  scratch_reg = force_mode (word_mode, scratch_reg);
1719	  emit_move_insn (scratch_reg, XEXP (op1, 0));
1720	  emit_insn (gen_rtx_SET (VOIDmode, operand0,
1721				  replace_equiv_address (op1, scratch_reg)));
1722	  return 1;
1723	}
1724    }
1725  else if (scratch_reg
1726	   && FP_REG_P (operand1)
1727	   && (MEM_P (operand0)
1728	       || (GET_CODE (operand0) == SUBREG
1729		   && MEM_P (XEXP (operand0, 0)))))
1730    {
1731      rtx op0 = operand0;
1732
1733      if (GET_CODE (op0) == SUBREG)
1734	op0 = XEXP (op0, 0);
1735
1736      if (reg_plus_base_memory_operand (op0, GET_MODE (op0)))
1737	{
1738	  if (!(TARGET_PA_20
1739		&& !TARGET_ELF32
1740		&& INT_14_BITS (XEXP (XEXP (op0, 0), 1)))
1741	      && !INT_5_BITS (XEXP (XEXP (op0, 0), 1)))
1742	    {
1743	      /* SCRATCH_REG will hold an address and maybe the actual data.
1744		 We want it in WORD_MODE regardless of what mode it was
1745		 originally given to us.  */
1746	      scratch_reg = force_mode (word_mode, scratch_reg);
1747
1748	      /* D might not fit in 14 bits either; for such cases load D
1749		 into scratch reg.  */
1750	      if (!INT_14_BITS (XEXP (XEXP (op0, 0), 1)))
1751		{
1752		  emit_move_insn (scratch_reg, XEXP (XEXP (op0, 0), 1));
1753		  emit_move_insn (scratch_reg,
1754				  gen_rtx_fmt_ee (GET_CODE (XEXP (op0, 0)),
1755						  Pmode,
1756						  XEXP (XEXP (op0, 0), 0),
1757						  scratch_reg));
1758		}
1759	      else
1760		emit_move_insn (scratch_reg, XEXP (op0, 0));
1761	      emit_insn (gen_rtx_SET (VOIDmode,
1762				      replace_equiv_address (op0, scratch_reg),
1763				      operand1));
1764	      return 1;
1765	    }
1766	}
1767      else if ((!INT14_OK_STRICT && symbolic_memory_operand (op0, VOIDmode))
1768	       || IS_LO_SUM_DLT_ADDR_P (XEXP (op0, 0))
1769	       || IS_INDEX_ADDR_P (XEXP (op0, 0)))
1770	{
1771	  /* Load memory address into SCRATCH_REG.  */
1772	  emit_move_insn (scratch_reg, XEXP (op0, 0));
1773	  emit_insn (gen_rtx_SET (VOIDmode,
1774				  replace_equiv_address (op0, scratch_reg),
1775				  operand1));
1776	  return 1;
1777	}
1778    }
1779  /* Handle secondary reloads for loads of FP registers from constant
1780     expressions by forcing the constant into memory.  For the most part,
1781     this is only necessary for SImode and DImode.
1782
1783     Use scratch_reg to hold the address of the memory location.  */
1784  else if (scratch_reg
1785	   && CONSTANT_P (operand1)
1786	   && FP_REG_P (operand0))
1787    {
1788      rtx const_mem, xoperands[2];
1789
1790      if (operand1 == CONST0_RTX (mode))
1791	{
1792	  emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1));
1793	  return 1;
1794	}
1795
1796      /* SCRATCH_REG will hold an address and maybe the actual data.  We want
1797	 it in WORD_MODE regardless of what mode it was originally given
1798	 to us.  */
1799      scratch_reg = force_mode (word_mode, scratch_reg);
1800
1801      /* Force the constant into memory and put the address of the
1802	 memory location into scratch_reg.  */
1803      const_mem = force_const_mem (mode, operand1);
1804      xoperands[0] = scratch_reg;
1805      xoperands[1] = XEXP (const_mem, 0);
1806      pa_emit_move_sequence (xoperands, Pmode, 0);
1807
1808      /* Now load the destination register.  */
1809      emit_insn (gen_rtx_SET (mode, operand0,
1810			      replace_equiv_address (const_mem, scratch_reg)));
1811      return 1;
1812    }
1813  /* Handle secondary reloads for SAR.  These occur when trying to load
1814     the SAR from memory or a constant.  */
1815  else if (scratch_reg
1816	   && GET_CODE (operand0) == REG
1817	   && REGNO (operand0) < FIRST_PSEUDO_REGISTER
1818	   && REGNO_REG_CLASS (REGNO (operand0)) == SHIFT_REGS
1819	   && (GET_CODE (operand1) == MEM || GET_CODE (operand1) == CONST_INT))
1820    {
1821      /* D might not fit in 14 bits either; for such cases load D into
1822	 scratch reg.  */
1823      if (GET_CODE (operand1) == MEM
1824	  && !memory_address_p (GET_MODE (operand0), XEXP (operand1, 0)))
1825	{
1826	  /* We are reloading the address into the scratch register, so we
1827	     want to make sure the scratch register is a full register.  */
1828	  scratch_reg = force_mode (word_mode, scratch_reg);
1829
1830	  emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
1831	  emit_move_insn (scratch_reg,
1832			  gen_rtx_fmt_ee (GET_CODE (XEXP (operand1, 0)),
1833					  Pmode,
1834					  XEXP (XEXP (operand1, 0), 0),
1835					  scratch_reg));
1836
1837	  /* Now we are going to load the scratch register from memory,
1838	     we want to load it in the same width as the original MEM,
1839	     which must be the same as the width of the ultimate destination,
1840	     OPERAND0.  */
1841	  scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
1842
1843	  emit_move_insn (scratch_reg,
1844			  replace_equiv_address (operand1, scratch_reg));
1845	}
1846      else
1847	{
1848	  /* We want to load the scratch register using the same mode as
1849	     the ultimate destination.  */
1850	  scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
1851
1852	  emit_move_insn (scratch_reg, operand1);
1853	}
1854
1855      /* And emit the insn to set the ultimate destination.  We know that
1856	 the scratch register has the same mode as the destination at this
1857	 point.  */
1858      emit_move_insn (operand0, scratch_reg);
1859      return 1;
1860    }
1861
1862  /* Handle the most common case: storing into a register.  */
1863  if (register_operand (operand0, mode))
1864    {
1865      /* Legitimize TLS symbol references.  This happens for references
1866	 that aren't a legitimate constant.  */
1867      if (PA_SYMBOL_REF_TLS_P (operand1))
1868	operand1 = legitimize_tls_address (operand1);
1869
1870      if (register_operand (operand1, mode)
1871	  || (GET_CODE (operand1) == CONST_INT
1872	      && pa_cint_ok_for_move (INTVAL (operand1)))
1873	  || (operand1 == CONST0_RTX (mode))
1874	  || (GET_CODE (operand1) == HIGH
1875	      && !symbolic_operand (XEXP (operand1, 0), VOIDmode))
1876	  /* Only `general_operands' can come here, so MEM is ok.  */
1877	  || GET_CODE (operand1) == MEM)
1878	{
1879	  /* Various sets are created during RTL generation which don't
1880	     have the REG_POINTER flag correctly set.  After the CSE pass,
1881	     instruction recognition can fail if we don't consistently
1882	     set this flag when performing register copies.  This should
1883	     also improve the opportunities for creating insns that use
1884	     unscaled indexing.  */
1885	  if (REG_P (operand0) && REG_P (operand1))
1886	    {
1887	      if (REG_POINTER (operand1)
1888		  && !REG_POINTER (operand0)
1889		  && !HARD_REGISTER_P (operand0))
1890		copy_reg_pointer (operand0, operand1);
1891	    }
1892
1893	  /* When MEMs are broken out, the REG_POINTER flag doesn't
1894	     get set.  In some cases, we can set the REG_POINTER flag
1895	     from the declaration for the MEM.  */
1896	  if (REG_P (operand0)
1897	      && GET_CODE (operand1) == MEM
1898	      && !REG_POINTER (operand0))
1899	    {
1900	      tree decl = MEM_EXPR (operand1);
1901
1902	      /* Set the register pointer flag and register alignment
1903		 if the declaration for this memory reference is a
1904		 pointer type.  */
1905	      if (decl)
1906		{
1907		  tree type;
1908
1909		  /* If this is a COMPONENT_REF, use the FIELD_DECL from
1910		     tree operand 1.  */
1911		  if (TREE_CODE (decl) == COMPONENT_REF)
1912		    decl = TREE_OPERAND (decl, 1);
1913
1914		  type = TREE_TYPE (decl);
1915		  type = strip_array_types (type);
1916
1917		  if (POINTER_TYPE_P (type))
1918		    {
1919		      int align;
1920
1921		      type = TREE_TYPE (type);
1922		      /* Using TYPE_ALIGN_OK is rather conservative as
1923			 only the ada frontend actually sets it.  */
1924		      align = (TYPE_ALIGN_OK (type) ? TYPE_ALIGN (type)
1925			       : BITS_PER_UNIT);
1926		      mark_reg_pointer (operand0, align);
1927		    }
1928		}
1929	    }
1930
1931	  emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1));
1932	  return 1;
1933	}
1934    }
1935  else if (GET_CODE (operand0) == MEM)
1936    {
1937      if (mode == DFmode && operand1 == CONST0_RTX (mode)
1938	  && !(reload_in_progress || reload_completed))
1939	{
1940	  rtx temp = gen_reg_rtx (DFmode);
1941
1942	  emit_insn (gen_rtx_SET (VOIDmode, temp, operand1));
1943	  emit_insn (gen_rtx_SET (VOIDmode, operand0, temp));
1944	  return 1;
1945	}
1946      if (register_operand (operand1, mode) || operand1 == CONST0_RTX (mode))
1947	{
1948	  /* Run this case quickly.  */
1949	  emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1));
1950	  return 1;
1951	}
1952      if (! (reload_in_progress || reload_completed))
1953	{
1954	  operands[0] = validize_mem (operand0);
1955	  operands[1] = operand1 = force_reg (mode, operand1);
1956	}
1957    }
1958
1959  /* Simplify the source if we need to.
1960     Note we do have to handle function labels here, even though we do
1961     not consider them legitimate constants.  Loop optimizations can
1962     call the emit_move_xxx with one as a source.  */
1963  if ((GET_CODE (operand1) != HIGH && immediate_operand (operand1, mode))
1964      || (GET_CODE (operand1) == HIGH
1965	  && symbolic_operand (XEXP (operand1, 0), mode))
1966      || function_label_operand (operand1, VOIDmode)
1967      || tls_referenced_p (operand1))
1968    {
1969      int ishighonly = 0;
1970
1971      if (GET_CODE (operand1) == HIGH)
1972	{
1973	  ishighonly = 1;
1974	  operand1 = XEXP (operand1, 0);
1975	}
1976      if (symbolic_operand (operand1, mode))
1977	{
1978	  /* Argh.  The assembler and linker can't handle arithmetic
1979	     involving plabels.
1980
1981	     So we force the plabel into memory, load operand0 from
1982	     the memory location, then add in the constant part.  */
1983	  if ((GET_CODE (operand1) == CONST
1984	       && GET_CODE (XEXP (operand1, 0)) == PLUS
1985	       && function_label_operand (XEXP (XEXP (operand1, 0), 0),
1986					  VOIDmode))
1987	      || function_label_operand (operand1, VOIDmode))
1988	    {
1989	      rtx temp, const_part;
1990
1991	      /* Figure out what (if any) scratch register to use.  */
1992	      if (reload_in_progress || reload_completed)
1993		{
1994		  scratch_reg = scratch_reg ? scratch_reg : operand0;
1995		  /* SCRATCH_REG will hold an address and maybe the actual
1996		     data.  We want it in WORD_MODE regardless of what mode it
1997		     was originally given to us.  */
1998		  scratch_reg = force_mode (word_mode, scratch_reg);
1999		}
2000	      else if (flag_pic)
2001		scratch_reg = gen_reg_rtx (Pmode);
2002
2003	      if (GET_CODE (operand1) == CONST)
2004		{
2005		  /* Save away the constant part of the expression.  */
2006		  const_part = XEXP (XEXP (operand1, 0), 1);
2007		  gcc_assert (GET_CODE (const_part) == CONST_INT);
2008
2009		  /* Force the function label into memory.  */
2010		  temp = force_const_mem (mode, XEXP (XEXP (operand1, 0), 0));
2011		}
2012	      else
2013		{
2014		  /* No constant part.  */
2015		  const_part = NULL_RTX;
2016
2017		  /* Force the function label into memory.  */
2018		  temp = force_const_mem (mode, operand1);
2019		}
2020
2021
2022	      /* Get the address of the memory location.  PIC-ify it if
2023		 necessary.  */
2024	      temp = XEXP (temp, 0);
2025	      if (flag_pic)
2026		temp = legitimize_pic_address (temp, mode, scratch_reg);
2027
2028	      /* Put the address of the memory location into our destination
2029		 register.  */
2030	      operands[1] = temp;
2031	      pa_emit_move_sequence (operands, mode, scratch_reg);
2032
2033	      /* Now load from the memory location into our destination
2034		 register.  */
2035	      operands[1] = gen_rtx_MEM (Pmode, operands[0]);
2036	      pa_emit_move_sequence (operands, mode, scratch_reg);
2037
2038	      /* And add back in the constant part.  */
2039	      if (const_part != NULL_RTX)
2040		expand_inc (operand0, const_part);
2041
2042	      return 1;
2043	    }
2044
2045	  if (flag_pic)
2046	    {
2047	      rtx_insn *insn;
2048	      rtx temp;
2049
2050	      if (reload_in_progress || reload_completed)
2051		{
2052		  temp = scratch_reg ? scratch_reg : operand0;
2053		  /* TEMP will hold an address and maybe the actual
2054		     data.  We want it in WORD_MODE regardless of what mode it
2055		     was originally given to us.  */
2056		  temp = force_mode (word_mode, temp);
2057		}
2058	      else
2059		temp = gen_reg_rtx (Pmode);
2060
2061	      /* Force (const (plus (symbol) (const_int))) to memory
2062	         if the const_int will not fit in 14 bits.  Although
2063		 this requires a relocation, the instruction sequence
2064		 needed to load the value is shorter.  */
2065	      if (GET_CODE (operand1) == CONST
2066		       && GET_CODE (XEXP (operand1, 0)) == PLUS
2067		       && GET_CODE (XEXP (XEXP (operand1, 0), 1)) == CONST_INT
2068		       && !INT_14_BITS (XEXP (XEXP (operand1, 0), 1)))
2069		{
2070		  rtx x, m = force_const_mem (mode, operand1);
2071
2072		  x = legitimize_pic_address (XEXP (m, 0), mode, temp);
2073		  x = replace_equiv_address (m, x);
2074		  insn = emit_move_insn (operand0, x);
2075		}
2076	      else
2077		{
2078		  operands[1] = legitimize_pic_address (operand1, mode, temp);
2079		  if (REG_P (operand0) && REG_P (operands[1]))
2080		    copy_reg_pointer (operand0, operands[1]);
2081		  insn = emit_move_insn (operand0, operands[1]);
2082		}
2083
2084	      /* Put a REG_EQUAL note on this insn.  */
2085	      set_unique_reg_note (insn, REG_EQUAL, operand1);
2086	    }
2087	  /* On the HPPA, references to data space are supposed to use dp,
2088	     register 27, but showing it in the RTL inhibits various cse
2089	     and loop optimizations.  */
2090	  else
2091	    {
2092	      rtx temp, set;
2093
2094	      if (reload_in_progress || reload_completed)
2095		{
2096		  temp = scratch_reg ? scratch_reg : operand0;
2097		  /* TEMP will hold an address and maybe the actual
2098		     data.  We want it in WORD_MODE regardless of what mode it
2099		     was originally given to us.  */
2100		  temp = force_mode (word_mode, temp);
2101		}
2102	      else
2103		temp = gen_reg_rtx (mode);
2104
2105	      /* Loading a SYMBOL_REF into a register makes that register
2106		 safe to be used as the base in an indexed address.
2107
2108		 Don't mark hard registers though.  That loses.  */
2109	      if (GET_CODE (operand0) == REG
2110		  && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
2111		mark_reg_pointer (operand0, BITS_PER_UNIT);
2112	      if (REGNO (temp) >= FIRST_PSEUDO_REGISTER)
2113		mark_reg_pointer (temp, BITS_PER_UNIT);
2114
2115	      if (ishighonly)
2116		set = gen_rtx_SET (mode, operand0, temp);
2117	      else
2118		set = gen_rtx_SET (VOIDmode,
2119				   operand0,
2120				   gen_rtx_LO_SUM (mode, temp, operand1));
2121
2122	      emit_insn (gen_rtx_SET (VOIDmode,
2123				      temp,
2124				      gen_rtx_HIGH (mode, operand1)));
2125	      emit_insn (set);
2126
2127	    }
2128	  return 1;
2129	}
2130      else if (tls_referenced_p (operand1))
2131	{
2132	  rtx tmp = operand1;
2133	  rtx addend = NULL;
2134
2135	  if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
2136	    {
2137	      addend = XEXP (XEXP (tmp, 0), 1);
2138	      tmp = XEXP (XEXP (tmp, 0), 0);
2139	    }
2140
2141	  gcc_assert (GET_CODE (tmp) == SYMBOL_REF);
2142	  tmp = legitimize_tls_address (tmp);
2143	  if (addend)
2144	    {
2145	      tmp = gen_rtx_PLUS (mode, tmp, addend);
2146	      tmp = force_operand (tmp, operands[0]);
2147	    }
2148	  operands[1] = tmp;
2149	}
2150      else if (GET_CODE (operand1) != CONST_INT
2151	       || !pa_cint_ok_for_move (INTVAL (operand1)))
2152	{
2153	  rtx temp;
2154	  rtx_insn *insn;
2155	  rtx op1 = operand1;
2156	  HOST_WIDE_INT value = 0;
2157	  HOST_WIDE_INT insv = 0;
2158	  int insert = 0;
2159
2160	  if (GET_CODE (operand1) == CONST_INT)
2161	    value = INTVAL (operand1);
2162
2163	  if (TARGET_64BIT
2164	      && GET_CODE (operand1) == CONST_INT
2165	      && HOST_BITS_PER_WIDE_INT > 32
2166	      && GET_MODE_BITSIZE (GET_MODE (operand0)) > 32)
2167	    {
2168	      HOST_WIDE_INT nval;
2169
2170	      /* Extract the low order 32 bits of the value and sign extend.
2171		 If the new value is the same as the original value, we can
2172		 can use the original value as-is.  If the new value is
2173		 different, we use it and insert the most-significant 32-bits
2174		 of the original value into the final result.  */
2175	      nval = ((value & (((HOST_WIDE_INT) 2 << 31) - 1))
2176		      ^ ((HOST_WIDE_INT) 1 << 31)) - ((HOST_WIDE_INT) 1 << 31);
2177	      if (value != nval)
2178		{
2179#if HOST_BITS_PER_WIDE_INT > 32
2180		  insv = value >= 0 ? value >> 32 : ~(~value >> 32);
2181#endif
2182		  insert = 1;
2183		  value = nval;
2184		  operand1 = GEN_INT (nval);
2185		}
2186	    }
2187
2188	  if (reload_in_progress || reload_completed)
2189	    temp = scratch_reg ? scratch_reg : operand0;
2190	  else
2191	    temp = gen_reg_rtx (mode);
2192
2193	  /* We don't directly split DImode constants on 32-bit targets
2194	     because PLUS uses an 11-bit immediate and the insn sequence
2195	     generated is not as efficient as the one using HIGH/LO_SUM.  */
2196	  if (GET_CODE (operand1) == CONST_INT
2197	      && GET_MODE_BITSIZE (mode) <= BITS_PER_WORD
2198	      && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
2199	      && !insert)
2200	    {
2201	      /* Directly break constant into high and low parts.  This
2202		 provides better optimization opportunities because various
2203		 passes recognize constants split with PLUS but not LO_SUM.
2204		 We use a 14-bit signed low part except when the addition
2205		 of 0x4000 to the high part might change the sign of the
2206		 high part.  */
2207	      HOST_WIDE_INT low = value & 0x3fff;
2208	      HOST_WIDE_INT high = value & ~ 0x3fff;
2209
2210	      if (low >= 0x2000)
2211		{
2212		  if (high == 0x7fffc000 || (mode == HImode && high == 0x4000))
2213		    high += 0x2000;
2214		  else
2215		    high += 0x4000;
2216		}
2217
2218	      low = value - high;
2219
2220	      emit_insn (gen_rtx_SET (VOIDmode, temp, GEN_INT (high)));
2221	      operands[1] = gen_rtx_PLUS (mode, temp, GEN_INT (low));
2222	    }
2223	  else
2224	    {
2225	      emit_insn (gen_rtx_SET (VOIDmode, temp,
2226				      gen_rtx_HIGH (mode, operand1)));
2227	      operands[1] = gen_rtx_LO_SUM (mode, temp, operand1);
2228	    }
2229
2230	  insn = emit_move_insn (operands[0], operands[1]);
2231
2232	  /* Now insert the most significant 32 bits of the value
2233	     into the register.  When we don't have a second register
2234	     available, it could take up to nine instructions to load
2235	     a 64-bit integer constant.  Prior to reload, we force
2236	     constants that would take more than three instructions
2237	     to load to the constant pool.  During and after reload,
2238	     we have to handle all possible values.  */
2239	  if (insert)
2240	    {
2241	      /* Use a HIGH/LO_SUM/INSV sequence if we have a second
2242		 register and the value to be inserted is outside the
2243		 range that can be loaded with three depdi instructions.  */
2244	      if (temp != operand0 && (insv >= 16384 || insv < -16384))
2245		{
2246		  operand1 = GEN_INT (insv);
2247
2248		  emit_insn (gen_rtx_SET (VOIDmode, temp,
2249					  gen_rtx_HIGH (mode, operand1)));
2250		  emit_move_insn (temp, gen_rtx_LO_SUM (mode, temp, operand1));
2251		  if (mode == DImode)
2252		    insn = emit_insn (gen_insvdi (operand0, GEN_INT (32),
2253						  const0_rtx, temp));
2254		  else
2255		    insn = emit_insn (gen_insvsi (operand0, GEN_INT (32),
2256						  const0_rtx, temp));
2257		}
2258	      else
2259		{
2260		  int len = 5, pos = 27;
2261
2262		  /* Insert the bits using the depdi instruction.  */
2263		  while (pos >= 0)
2264		    {
2265		      HOST_WIDE_INT v5 = ((insv & 31) ^ 16) - 16;
2266		      HOST_WIDE_INT sign = v5 < 0;
2267
2268		      /* Left extend the insertion.  */
2269		      insv = (insv >= 0 ? insv >> len : ~(~insv >> len));
2270		      while (pos > 0 && (insv & 1) == sign)
2271			{
2272			  insv = (insv >= 0 ? insv >> 1 : ~(~insv >> 1));
2273			  len += 1;
2274			  pos -= 1;
2275			}
2276
2277		      if (mode == DImode)
2278			insn = emit_insn (gen_insvdi (operand0,
2279						      GEN_INT (len),
2280						      GEN_INT (pos),
2281						      GEN_INT (v5)));
2282		      else
2283			insn = emit_insn (gen_insvsi (operand0,
2284						      GEN_INT (len),
2285						      GEN_INT (pos),
2286						      GEN_INT (v5)));
2287
2288		      len = pos > 0 && pos < 5 ? pos : 5;
2289		      pos -= len;
2290		    }
2291		}
2292	    }
2293
2294	  set_unique_reg_note (insn, REG_EQUAL, op1);
2295
2296	  return 1;
2297	}
2298    }
2299  /* Now have insn-emit do whatever it normally does.  */
2300  return 0;
2301}
2302
2303/* Examine EXP and return nonzero if it contains an ADDR_EXPR (meaning
2304   it will need a link/runtime reloc).  */
2305
2306int
2307pa_reloc_needed (tree exp)
2308{
2309  int reloc = 0;
2310
2311  switch (TREE_CODE (exp))
2312    {
2313    case ADDR_EXPR:
2314      return 1;
2315
2316    case POINTER_PLUS_EXPR:
2317    case PLUS_EXPR:
2318    case MINUS_EXPR:
2319      reloc = pa_reloc_needed (TREE_OPERAND (exp, 0));
2320      reloc |= pa_reloc_needed (TREE_OPERAND (exp, 1));
2321      break;
2322
2323    CASE_CONVERT:
2324    case NON_LVALUE_EXPR:
2325      reloc = pa_reloc_needed (TREE_OPERAND (exp, 0));
2326      break;
2327
2328    case CONSTRUCTOR:
2329      {
2330	tree value;
2331	unsigned HOST_WIDE_INT ix;
2332
2333	FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (exp), ix, value)
2334	  if (value)
2335	    reloc |= pa_reloc_needed (value);
2336      }
2337      break;
2338
2339    case ERROR_MARK:
2340      break;
2341
2342    default:
2343      break;
2344    }
2345  return reloc;
2346}
2347
2348
2349/* Return the best assembler insn template
2350   for moving operands[1] into operands[0] as a fullword.  */
2351const char *
2352pa_singlemove_string (rtx *operands)
2353{
2354  HOST_WIDE_INT intval;
2355
2356  if (GET_CODE (operands[0]) == MEM)
2357    return "stw %r1,%0";
2358  if (GET_CODE (operands[1]) == MEM)
2359    return "ldw %1,%0";
2360  if (GET_CODE (operands[1]) == CONST_DOUBLE)
2361    {
2362      long i;
2363      REAL_VALUE_TYPE d;
2364
2365      gcc_assert (GET_MODE (operands[1]) == SFmode);
2366
2367      /* Translate the CONST_DOUBLE to a CONST_INT with the same target
2368	 bit pattern.  */
2369      REAL_VALUE_FROM_CONST_DOUBLE (d, operands[1]);
2370      REAL_VALUE_TO_TARGET_SINGLE (d, i);
2371
2372      operands[1] = GEN_INT (i);
2373      /* Fall through to CONST_INT case.  */
2374    }
2375  if (GET_CODE (operands[1]) == CONST_INT)
2376    {
2377      intval = INTVAL (operands[1]);
2378
2379      if (VAL_14_BITS_P (intval))
2380	return "ldi %1,%0";
2381      else if ((intval & 0x7ff) == 0)
2382	return "ldil L'%1,%0";
2383      else if (pa_zdepi_cint_p (intval))
2384	return "{zdepi %Z1,%0|depwi,z %Z1,%0}";
2385      else
2386	return "ldil L'%1,%0\n\tldo R'%1(%0),%0";
2387    }
2388  return "copy %1,%0";
2389}
2390
2391
2392/* Compute position (in OP[1]) and width (in OP[2])
2393   useful for copying IMM to a register using the zdepi
2394   instructions.  Store the immediate value to insert in OP[0].  */
2395static void
2396compute_zdepwi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
2397{
2398  int lsb, len;
2399
2400  /* Find the least significant set bit in IMM.  */
2401  for (lsb = 0; lsb < 32; lsb++)
2402    {
2403      if ((imm & 1) != 0)
2404        break;
2405      imm >>= 1;
2406    }
2407
2408  /* Choose variants based on *sign* of the 5-bit field.  */
2409  if ((imm & 0x10) == 0)
2410    len = (lsb <= 28) ? 4 : 32 - lsb;
2411  else
2412    {
2413      /* Find the width of the bitstring in IMM.  */
2414      for (len = 5; len < 32 - lsb; len++)
2415	{
2416	  if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
2417	    break;
2418	}
2419
2420      /* Sign extend IMM as a 5-bit value.  */
2421      imm = (imm & 0xf) - 0x10;
2422    }
2423
2424  op[0] = imm;
2425  op[1] = 31 - lsb;
2426  op[2] = len;
2427}
2428
2429/* Compute position (in OP[1]) and width (in OP[2])
2430   useful for copying IMM to a register using the depdi,z
2431   instructions.  Store the immediate value to insert in OP[0].  */
2432
2433static void
2434compute_zdepdi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
2435{
2436  int lsb, len, maxlen;
2437
2438  maxlen = MIN (HOST_BITS_PER_WIDE_INT, 64);
2439
2440  /* Find the least significant set bit in IMM.  */
2441  for (lsb = 0; lsb < maxlen; lsb++)
2442    {
2443      if ((imm & 1) != 0)
2444        break;
2445      imm >>= 1;
2446    }
2447
2448  /* Choose variants based on *sign* of the 5-bit field.  */
2449  if ((imm & 0x10) == 0)
2450    len = (lsb <= maxlen - 4) ? 4 : maxlen - lsb;
2451  else
2452    {
2453      /* Find the width of the bitstring in IMM.  */
2454      for (len = 5; len < maxlen - lsb; len++)
2455	{
2456	  if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
2457	    break;
2458	}
2459
2460      /* Extend length if host is narrow and IMM is negative.  */
2461      if (HOST_BITS_PER_WIDE_INT == 32 && len == maxlen - lsb)
2462	len += 32;
2463
2464      /* Sign extend IMM as a 5-bit value.  */
2465      imm = (imm & 0xf) - 0x10;
2466    }
2467
2468  op[0] = imm;
2469  op[1] = 63 - lsb;
2470  op[2] = len;
2471}
2472
2473/* Output assembler code to perform a doubleword move insn
2474   with operands OPERANDS.  */
2475
2476const char *
2477pa_output_move_double (rtx *operands)
2478{
2479  enum { REGOP, OFFSOP, MEMOP, CNSTOP, RNDOP } optype0, optype1;
2480  rtx latehalf[2];
2481  rtx addreg0 = 0, addreg1 = 0;
2482  int highonly = 0;
2483
2484  /* First classify both operands.  */
2485
2486  if (REG_P (operands[0]))
2487    optype0 = REGOP;
2488  else if (offsettable_memref_p (operands[0]))
2489    optype0 = OFFSOP;
2490  else if (GET_CODE (operands[0]) == MEM)
2491    optype0 = MEMOP;
2492  else
2493    optype0 = RNDOP;
2494
2495  if (REG_P (operands[1]))
2496    optype1 = REGOP;
2497  else if (CONSTANT_P (operands[1]))
2498    optype1 = CNSTOP;
2499  else if (offsettable_memref_p (operands[1]))
2500    optype1 = OFFSOP;
2501  else if (GET_CODE (operands[1]) == MEM)
2502    optype1 = MEMOP;
2503  else
2504    optype1 = RNDOP;
2505
2506  /* Check for the cases that the operand constraints are not
2507     supposed to allow to happen.  */
2508  gcc_assert (optype0 == REGOP || optype1 == REGOP);
2509
2510  /* Handle copies between general and floating registers.  */
2511
2512  if (optype0 == REGOP && optype1 == REGOP
2513      && FP_REG_P (operands[0]) ^ FP_REG_P (operands[1]))
2514    {
2515      if (FP_REG_P (operands[0]))
2516	{
2517	  output_asm_insn ("{stws|stw} %1,-16(%%sp)", operands);
2518	  output_asm_insn ("{stws|stw} %R1,-12(%%sp)", operands);
2519	  return "{fldds|fldd} -16(%%sp),%0";
2520	}
2521      else
2522	{
2523	  output_asm_insn ("{fstds|fstd} %1,-16(%%sp)", operands);
2524	  output_asm_insn ("{ldws|ldw} -16(%%sp),%0", operands);
2525	  return "{ldws|ldw} -12(%%sp),%R0";
2526	}
2527    }
2528
2529   /* Handle auto decrementing and incrementing loads and stores
2530     specifically, since the structure of the function doesn't work
2531     for them without major modification.  Do it better when we learn
2532     this port about the general inc/dec addressing of PA.
2533     (This was written by tege.  Chide him if it doesn't work.)  */
2534
2535  if (optype0 == MEMOP)
2536    {
2537      /* We have to output the address syntax ourselves, since print_operand
2538	 doesn't deal with the addresses we want to use.  Fix this later.  */
2539
2540      rtx addr = XEXP (operands[0], 0);
2541      if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2542	{
2543	  rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2544
2545	  operands[0] = XEXP (addr, 0);
2546	  gcc_assert (GET_CODE (operands[1]) == REG
2547		      && GET_CODE (operands[0]) == REG);
2548
2549	  gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
2550
2551	  /* No overlap between high target register and address
2552	     register.  (We do this in a non-obvious way to
2553	     save a register file writeback)  */
2554	  if (GET_CODE (addr) == POST_INC)
2555	    return "{stws|stw},ma %1,8(%0)\n\tstw %R1,-4(%0)";
2556	  return "{stws|stw},ma %1,-8(%0)\n\tstw %R1,12(%0)";
2557	}
2558      else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2559	{
2560	  rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2561
2562	  operands[0] = XEXP (addr, 0);
2563	  gcc_assert (GET_CODE (operands[1]) == REG
2564		      && GET_CODE (operands[0]) == REG);
2565
2566	  gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
2567	  /* No overlap between high target register and address
2568	     register.  (We do this in a non-obvious way to save a
2569	     register file writeback)  */
2570	  if (GET_CODE (addr) == PRE_INC)
2571	    return "{stws|stw},mb %1,8(%0)\n\tstw %R1,4(%0)";
2572	  return "{stws|stw},mb %1,-8(%0)\n\tstw %R1,4(%0)";
2573	}
2574    }
2575  if (optype1 == MEMOP)
2576    {
2577      /* We have to output the address syntax ourselves, since print_operand
2578	 doesn't deal with the addresses we want to use.  Fix this later.  */
2579
2580      rtx addr = XEXP (operands[1], 0);
2581      if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2582	{
2583	  rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2584
2585	  operands[1] = XEXP (addr, 0);
2586	  gcc_assert (GET_CODE (operands[0]) == REG
2587		      && GET_CODE (operands[1]) == REG);
2588
2589	  if (!reg_overlap_mentioned_p (high_reg, addr))
2590	    {
2591	      /* No overlap between high target register and address
2592		 register.  (We do this in a non-obvious way to
2593		 save a register file writeback)  */
2594	      if (GET_CODE (addr) == POST_INC)
2595		return "{ldws|ldw},ma 8(%1),%0\n\tldw -4(%1),%R0";
2596	      return "{ldws|ldw},ma -8(%1),%0\n\tldw 12(%1),%R0";
2597	    }
2598	  else
2599	    {
2600	      /* This is an undefined situation.  We should load into the
2601		 address register *and* update that register.  Probably
2602		 we don't need to handle this at all.  */
2603	      if (GET_CODE (addr) == POST_INC)
2604		return "ldw 4(%1),%R0\n\t{ldws|ldw},ma 8(%1),%0";
2605	      return "ldw 4(%1),%R0\n\t{ldws|ldw},ma -8(%1),%0";
2606	    }
2607	}
2608      else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2609	{
2610	  rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2611
2612	  operands[1] = XEXP (addr, 0);
2613	  gcc_assert (GET_CODE (operands[0]) == REG
2614		      && GET_CODE (operands[1]) == REG);
2615
2616	  if (!reg_overlap_mentioned_p (high_reg, addr))
2617	    {
2618	      /* No overlap between high target register and address
2619		 register.  (We do this in a non-obvious way to
2620		 save a register file writeback)  */
2621	      if (GET_CODE (addr) == PRE_INC)
2622		return "{ldws|ldw},mb 8(%1),%0\n\tldw 4(%1),%R0";
2623	      return "{ldws|ldw},mb -8(%1),%0\n\tldw 4(%1),%R0";
2624	    }
2625	  else
2626	    {
2627	      /* This is an undefined situation.  We should load into the
2628		 address register *and* update that register.  Probably
2629		 we don't need to handle this at all.  */
2630	      if (GET_CODE (addr) == PRE_INC)
2631		return "ldw 12(%1),%R0\n\t{ldws|ldw},mb 8(%1),%0";
2632	      return "ldw -4(%1),%R0\n\t{ldws|ldw},mb -8(%1),%0";
2633	    }
2634	}
2635      else if (GET_CODE (addr) == PLUS
2636	       && GET_CODE (XEXP (addr, 0)) == MULT)
2637	{
2638	  rtx xoperands[4];
2639
2640	  /* Load address into left half of destination register.  */
2641	  xoperands[0] = gen_rtx_SUBREG (SImode, operands[0], 0);
2642	  xoperands[1] = XEXP (addr, 1);
2643	  xoperands[2] = XEXP (XEXP (addr, 0), 0);
2644	  xoperands[3] = XEXP (XEXP (addr, 0), 1);
2645	  output_asm_insn ("{sh%O3addl %2,%1,%0|shladd,l %2,%O3,%1,%0}",
2646			   xoperands);
2647	  return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
2648	}
2649      else if (GET_CODE (addr) == PLUS
2650	       && REG_P (XEXP (addr, 0))
2651	       && REG_P (XEXP (addr, 1)))
2652	{
2653	  rtx xoperands[3];
2654
2655	  /* Load address into left half of destination register.  */
2656	  xoperands[0] = gen_rtx_SUBREG (SImode, operands[0], 0);
2657	  xoperands[1] = XEXP (addr, 0);
2658	  xoperands[2] = XEXP (addr, 1);
2659	  output_asm_insn ("{addl|add,l} %1,%2,%0",
2660			   xoperands);
2661	  return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
2662	}
2663    }
2664
2665  /* If an operand is an unoffsettable memory ref, find a register
2666     we can increment temporarily to make it refer to the second word.  */
2667
2668  if (optype0 == MEMOP)
2669    addreg0 = find_addr_reg (XEXP (operands[0], 0));
2670
2671  if (optype1 == MEMOP)
2672    addreg1 = find_addr_reg (XEXP (operands[1], 0));
2673
2674  /* Ok, we can do one word at a time.
2675     Normally we do the low-numbered word first.
2676
2677     In either case, set up in LATEHALF the operands to use
2678     for the high-numbered word and in some cases alter the
2679     operands in OPERANDS to be suitable for the low-numbered word.  */
2680
2681  if (optype0 == REGOP)
2682    latehalf[0] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2683  else if (optype0 == OFFSOP)
2684    latehalf[0] = adjust_address_nv (operands[0], SImode, 4);
2685  else
2686    latehalf[0] = operands[0];
2687
2688  if (optype1 == REGOP)
2689    latehalf[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1);
2690  else if (optype1 == OFFSOP)
2691    latehalf[1] = adjust_address_nv (operands[1], SImode, 4);
2692  else if (optype1 == CNSTOP)
2693    {
2694      if (GET_CODE (operands[1]) == HIGH)
2695	{
2696	  operands[1] = XEXP (operands[1], 0);
2697	  highonly = 1;
2698	}
2699      split_double (operands[1], &operands[1], &latehalf[1]);
2700    }
2701  else
2702    latehalf[1] = operands[1];
2703
2704  /* If the first move would clobber the source of the second one,
2705     do them in the other order.
2706
2707     This can happen in two cases:
2708
2709	mem -> register where the first half of the destination register
2710 	is the same register used in the memory's address.  Reload
2711	can create such insns.
2712
2713	mem in this case will be either register indirect or register
2714	indirect plus a valid offset.
2715
2716	register -> register move where REGNO(dst) == REGNO(src + 1)
2717	someone (Tim/Tege?) claimed this can happen for parameter loads.
2718
2719     Handle mem -> register case first.  */
2720  if (optype0 == REGOP
2721      && (optype1 == MEMOP || optype1 == OFFSOP)
2722      && refers_to_regno_p (REGNO (operands[0]), operands[1]))
2723    {
2724      /* Do the late half first.  */
2725      if (addreg1)
2726	output_asm_insn ("ldo 4(%0),%0", &addreg1);
2727      output_asm_insn (pa_singlemove_string (latehalf), latehalf);
2728
2729      /* Then clobber.  */
2730      if (addreg1)
2731	output_asm_insn ("ldo -4(%0),%0", &addreg1);
2732      return pa_singlemove_string (operands);
2733    }
2734
2735  /* Now handle register -> register case.  */
2736  if (optype0 == REGOP && optype1 == REGOP
2737      && REGNO (operands[0]) == REGNO (operands[1]) + 1)
2738    {
2739      output_asm_insn (pa_singlemove_string (latehalf), latehalf);
2740      return pa_singlemove_string (operands);
2741    }
2742
2743  /* Normal case: do the two words, low-numbered first.  */
2744
2745  output_asm_insn (pa_singlemove_string (operands), operands);
2746
2747  /* Make any unoffsettable addresses point at high-numbered word.  */
2748  if (addreg0)
2749    output_asm_insn ("ldo 4(%0),%0", &addreg0);
2750  if (addreg1)
2751    output_asm_insn ("ldo 4(%0),%0", &addreg1);
2752
2753  /* Do high-numbered word.  */
2754  if (highonly)
2755    output_asm_insn ("ldil L'%1,%0", latehalf);
2756  else
2757    output_asm_insn (pa_singlemove_string (latehalf), latehalf);
2758
2759  /* Undo the adds we just did.  */
2760  if (addreg0)
2761    output_asm_insn ("ldo -4(%0),%0", &addreg0);
2762  if (addreg1)
2763    output_asm_insn ("ldo -4(%0),%0", &addreg1);
2764
2765  return "";
2766}
2767
2768const char *
2769pa_output_fp_move_double (rtx *operands)
2770{
2771  if (FP_REG_P (operands[0]))
2772    {
2773      if (FP_REG_P (operands[1])
2774	  || operands[1] == CONST0_RTX (GET_MODE (operands[0])))
2775	output_asm_insn ("fcpy,dbl %f1,%0", operands);
2776      else
2777	output_asm_insn ("fldd%F1 %1,%0", operands);
2778    }
2779  else if (FP_REG_P (operands[1]))
2780    {
2781      output_asm_insn ("fstd%F0 %1,%0", operands);
2782    }
2783  else
2784    {
2785      rtx xoperands[2];
2786
2787      gcc_assert (operands[1] == CONST0_RTX (GET_MODE (operands[0])));
2788
2789      /* This is a pain.  You have to be prepared to deal with an
2790	 arbitrary address here including pre/post increment/decrement.
2791
2792	 so avoid this in the MD.  */
2793      gcc_assert (GET_CODE (operands[0]) == REG);
2794
2795      xoperands[1] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2796      xoperands[0] = operands[0];
2797      output_asm_insn ("copy %%r0,%0\n\tcopy %%r0,%1", xoperands);
2798    }
2799  return "";
2800}
2801
2802/* Return a REG that occurs in ADDR with coefficient 1.
2803   ADDR can be effectively incremented by incrementing REG.  */
2804
2805static rtx
2806find_addr_reg (rtx addr)
2807{
2808  while (GET_CODE (addr) == PLUS)
2809    {
2810      if (GET_CODE (XEXP (addr, 0)) == REG)
2811	addr = XEXP (addr, 0);
2812      else if (GET_CODE (XEXP (addr, 1)) == REG)
2813	addr = XEXP (addr, 1);
2814      else if (CONSTANT_P (XEXP (addr, 0)))
2815	addr = XEXP (addr, 1);
2816      else if (CONSTANT_P (XEXP (addr, 1)))
2817	addr = XEXP (addr, 0);
2818      else
2819	gcc_unreachable ();
2820    }
2821  gcc_assert (GET_CODE (addr) == REG);
2822  return addr;
2823}
2824
2825/* Emit code to perform a block move.
2826
2827   OPERANDS[0] is the destination pointer as a REG, clobbered.
2828   OPERANDS[1] is the source pointer as a REG, clobbered.
2829   OPERANDS[2] is a register for temporary storage.
2830   OPERANDS[3] is a register for temporary storage.
2831   OPERANDS[4] is the size as a CONST_INT
2832   OPERANDS[5] is the alignment safe to use, as a CONST_INT.
2833   OPERANDS[6] is another temporary register.  */
2834
2835const char *
2836pa_output_block_move (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
2837{
2838  int align = INTVAL (operands[5]);
2839  unsigned long n_bytes = INTVAL (operands[4]);
2840
2841  /* We can't move more than a word at a time because the PA
2842     has no longer integer move insns.  (Could use fp mem ops?)  */
2843  if (align > (TARGET_64BIT ? 8 : 4))
2844    align = (TARGET_64BIT ? 8 : 4);
2845
2846  /* Note that we know each loop below will execute at least twice
2847     (else we would have open-coded the copy).  */
2848  switch (align)
2849    {
2850      case 8:
2851	/* Pre-adjust the loop counter.  */
2852	operands[4] = GEN_INT (n_bytes - 16);
2853	output_asm_insn ("ldi %4,%2", operands);
2854
2855	/* Copying loop.  */
2856	output_asm_insn ("ldd,ma 8(%1),%3", operands);
2857	output_asm_insn ("ldd,ma 8(%1),%6", operands);
2858	output_asm_insn ("std,ma %3,8(%0)", operands);
2859	output_asm_insn ("addib,>= -16,%2,.-12", operands);
2860	output_asm_insn ("std,ma %6,8(%0)", operands);
2861
2862	/* Handle the residual.  There could be up to 7 bytes of
2863	   residual to copy!  */
2864	if (n_bytes % 16 != 0)
2865	  {
2866	    operands[4] = GEN_INT (n_bytes % 8);
2867	    if (n_bytes % 16 >= 8)
2868	      output_asm_insn ("ldd,ma 8(%1),%3", operands);
2869	    if (n_bytes % 8 != 0)
2870	      output_asm_insn ("ldd 0(%1),%6", operands);
2871	    if (n_bytes % 16 >= 8)
2872	      output_asm_insn ("std,ma %3,8(%0)", operands);
2873	    if (n_bytes % 8 != 0)
2874	      output_asm_insn ("stdby,e %6,%4(%0)", operands);
2875	  }
2876	return "";
2877
2878      case 4:
2879	/* Pre-adjust the loop counter.  */
2880	operands[4] = GEN_INT (n_bytes - 8);
2881	output_asm_insn ("ldi %4,%2", operands);
2882
2883	/* Copying loop.  */
2884	output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
2885	output_asm_insn ("{ldws|ldw},ma 4(%1),%6", operands);
2886	output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
2887	output_asm_insn ("addib,>= -8,%2,.-12", operands);
2888	output_asm_insn ("{stws|stw},ma %6,4(%0)", operands);
2889
2890	/* Handle the residual.  There could be up to 7 bytes of
2891	   residual to copy!  */
2892	if (n_bytes % 8 != 0)
2893	  {
2894	    operands[4] = GEN_INT (n_bytes % 4);
2895	    if (n_bytes % 8 >= 4)
2896	      output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
2897	    if (n_bytes % 4 != 0)
2898	      output_asm_insn ("ldw 0(%1),%6", operands);
2899	    if (n_bytes % 8 >= 4)
2900	      output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
2901	    if (n_bytes % 4 != 0)
2902	      output_asm_insn ("{stbys|stby},e %6,%4(%0)", operands);
2903	  }
2904	return "";
2905
2906      case 2:
2907	/* Pre-adjust the loop counter.  */
2908	operands[4] = GEN_INT (n_bytes - 4);
2909	output_asm_insn ("ldi %4,%2", operands);
2910
2911	/* Copying loop.  */
2912	output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
2913	output_asm_insn ("{ldhs|ldh},ma 2(%1),%6", operands);
2914	output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
2915	output_asm_insn ("addib,>= -4,%2,.-12", operands);
2916	output_asm_insn ("{sths|sth},ma %6,2(%0)", operands);
2917
2918	/* Handle the residual.  */
2919	if (n_bytes % 4 != 0)
2920	  {
2921	    if (n_bytes % 4 >= 2)
2922	      output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
2923	    if (n_bytes % 2 != 0)
2924	      output_asm_insn ("ldb 0(%1),%6", operands);
2925	    if (n_bytes % 4 >= 2)
2926	      output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
2927	    if (n_bytes % 2 != 0)
2928	      output_asm_insn ("stb %6,0(%0)", operands);
2929	  }
2930	return "";
2931
2932      case 1:
2933	/* Pre-adjust the loop counter.  */
2934	operands[4] = GEN_INT (n_bytes - 2);
2935	output_asm_insn ("ldi %4,%2", operands);
2936
2937	/* Copying loop.  */
2938	output_asm_insn ("{ldbs|ldb},ma 1(%1),%3", operands);
2939	output_asm_insn ("{ldbs|ldb},ma 1(%1),%6", operands);
2940	output_asm_insn ("{stbs|stb},ma %3,1(%0)", operands);
2941	output_asm_insn ("addib,>= -2,%2,.-12", operands);
2942	output_asm_insn ("{stbs|stb},ma %6,1(%0)", operands);
2943
2944	/* Handle the residual.  */
2945	if (n_bytes % 2 != 0)
2946	  {
2947	    output_asm_insn ("ldb 0(%1),%3", operands);
2948	    output_asm_insn ("stb %3,0(%0)", operands);
2949	  }
2950	return "";
2951
2952      default:
2953	gcc_unreachable ();
2954    }
2955}
2956
2957/* Count the number of insns necessary to handle this block move.
2958
2959   Basic structure is the same as emit_block_move, except that we
2960   count insns rather than emit them.  */
2961
2962static int
2963compute_movmem_length (rtx_insn *insn)
2964{
2965  rtx pat = PATTERN (insn);
2966  unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 7), 0));
2967  unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 6), 0));
2968  unsigned int n_insns = 0;
2969
2970  /* We can't move more than four bytes at a time because the PA
2971     has no longer integer move insns.  (Could use fp mem ops?)  */
2972  if (align > (TARGET_64BIT ? 8 : 4))
2973    align = (TARGET_64BIT ? 8 : 4);
2974
2975  /* The basic copying loop.  */
2976  n_insns = 6;
2977
2978  /* Residuals.  */
2979  if (n_bytes % (2 * align) != 0)
2980    {
2981      if ((n_bytes % (2 * align)) >= align)
2982	n_insns += 2;
2983
2984      if ((n_bytes % align) != 0)
2985	n_insns += 2;
2986    }
2987
2988  /* Lengths are expressed in bytes now; each insn is 4 bytes.  */
2989  return n_insns * 4;
2990}
2991
2992/* Emit code to perform a block clear.
2993
2994   OPERANDS[0] is the destination pointer as a REG, clobbered.
2995   OPERANDS[1] is a register for temporary storage.
2996   OPERANDS[2] is the size as a CONST_INT
2997   OPERANDS[3] is the alignment safe to use, as a CONST_INT.  */
2998
2999const char *
3000pa_output_block_clear (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
3001{
3002  int align = INTVAL (operands[3]);
3003  unsigned long n_bytes = INTVAL (operands[2]);
3004
3005  /* We can't clear more than a word at a time because the PA
3006     has no longer integer move insns.  */
3007  if (align > (TARGET_64BIT ? 8 : 4))
3008    align = (TARGET_64BIT ? 8 : 4);
3009
3010  /* Note that we know each loop below will execute at least twice
3011     (else we would have open-coded the copy).  */
3012  switch (align)
3013    {
3014      case 8:
3015	/* Pre-adjust the loop counter.  */
3016	operands[2] = GEN_INT (n_bytes - 16);
3017	output_asm_insn ("ldi %2,%1", operands);
3018
3019	/* Loop.  */
3020	output_asm_insn ("std,ma %%r0,8(%0)", operands);
3021	output_asm_insn ("addib,>= -16,%1,.-4", operands);
3022	output_asm_insn ("std,ma %%r0,8(%0)", operands);
3023
3024	/* Handle the residual.  There could be up to 7 bytes of
3025	   residual to copy!  */
3026	if (n_bytes % 16 != 0)
3027	  {
3028	    operands[2] = GEN_INT (n_bytes % 8);
3029	    if (n_bytes % 16 >= 8)
3030	      output_asm_insn ("std,ma %%r0,8(%0)", operands);
3031	    if (n_bytes % 8 != 0)
3032	      output_asm_insn ("stdby,e %%r0,%2(%0)", operands);
3033	  }
3034	return "";
3035
3036      case 4:
3037	/* Pre-adjust the loop counter.  */
3038	operands[2] = GEN_INT (n_bytes - 8);
3039	output_asm_insn ("ldi %2,%1", operands);
3040
3041	/* Loop.  */
3042	output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
3043	output_asm_insn ("addib,>= -8,%1,.-4", operands);
3044	output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
3045
3046	/* Handle the residual.  There could be up to 7 bytes of
3047	   residual to copy!  */
3048	if (n_bytes % 8 != 0)
3049	  {
3050	    operands[2] = GEN_INT (n_bytes % 4);
3051	    if (n_bytes % 8 >= 4)
3052	      output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
3053	    if (n_bytes % 4 != 0)
3054	      output_asm_insn ("{stbys|stby},e %%r0,%2(%0)", operands);
3055	  }
3056	return "";
3057
3058      case 2:
3059	/* Pre-adjust the loop counter.  */
3060	operands[2] = GEN_INT (n_bytes - 4);
3061	output_asm_insn ("ldi %2,%1", operands);
3062
3063	/* Loop.  */
3064	output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
3065	output_asm_insn ("addib,>= -4,%1,.-4", operands);
3066	output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
3067
3068	/* Handle the residual.  */
3069	if (n_bytes % 4 != 0)
3070	  {
3071	    if (n_bytes % 4 >= 2)
3072	      output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
3073	    if (n_bytes % 2 != 0)
3074	      output_asm_insn ("stb %%r0,0(%0)", operands);
3075	  }
3076	return "";
3077
3078      case 1:
3079	/* Pre-adjust the loop counter.  */
3080	operands[2] = GEN_INT (n_bytes - 2);
3081	output_asm_insn ("ldi %2,%1", operands);
3082
3083	/* Loop.  */
3084	output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
3085	output_asm_insn ("addib,>= -2,%1,.-4", operands);
3086	output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
3087
3088	/* Handle the residual.  */
3089	if (n_bytes % 2 != 0)
3090	  output_asm_insn ("stb %%r0,0(%0)", operands);
3091
3092	return "";
3093
3094      default:
3095	gcc_unreachable ();
3096    }
3097}
3098
3099/* Count the number of insns necessary to handle this block move.
3100
3101   Basic structure is the same as emit_block_move, except that we
3102   count insns rather than emit them.  */
3103
3104static int
3105compute_clrmem_length (rtx_insn *insn)
3106{
3107  rtx pat = PATTERN (insn);
3108  unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 4), 0));
3109  unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 3), 0));
3110  unsigned int n_insns = 0;
3111
3112  /* We can't clear more than a word at a time because the PA
3113     has no longer integer move insns.  */
3114  if (align > (TARGET_64BIT ? 8 : 4))
3115    align = (TARGET_64BIT ? 8 : 4);
3116
3117  /* The basic loop.  */
3118  n_insns = 4;
3119
3120  /* Residuals.  */
3121  if (n_bytes % (2 * align) != 0)
3122    {
3123      if ((n_bytes % (2 * align)) >= align)
3124	n_insns++;
3125
3126      if ((n_bytes % align) != 0)
3127	n_insns++;
3128    }
3129
3130  /* Lengths are expressed in bytes now; each insn is 4 bytes.  */
3131  return n_insns * 4;
3132}
3133
3134
3135const char *
3136pa_output_and (rtx *operands)
3137{
3138  if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
3139    {
3140      unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3141      int ls0, ls1, ms0, p, len;
3142
3143      for (ls0 = 0; ls0 < 32; ls0++)
3144	if ((mask & (1 << ls0)) == 0)
3145	  break;
3146
3147      for (ls1 = ls0; ls1 < 32; ls1++)
3148	if ((mask & (1 << ls1)) != 0)
3149	  break;
3150
3151      for (ms0 = ls1; ms0 < 32; ms0++)
3152	if ((mask & (1 << ms0)) == 0)
3153	  break;
3154
3155      gcc_assert (ms0 == 32);
3156
3157      if (ls1 == 32)
3158	{
3159	  len = ls0;
3160
3161	  gcc_assert (len);
3162
3163	  operands[2] = GEN_INT (len);
3164	  return "{extru|extrw,u} %1,31,%2,%0";
3165	}
3166      else
3167	{
3168	  /* We could use this `depi' for the case above as well, but `depi'
3169	     requires one more register file access than an `extru'.  */
3170
3171	  p = 31 - ls0;
3172	  len = ls1 - ls0;
3173
3174	  operands[2] = GEN_INT (p);
3175	  operands[3] = GEN_INT (len);
3176	  return "{depi|depwi} 0,%2,%3,%0";
3177	}
3178    }
3179  else
3180    return "and %1,%2,%0";
3181}
3182
3183/* Return a string to perform a bitwise-and of operands[1] with operands[2]
3184   storing the result in operands[0].  */
3185const char *
3186pa_output_64bit_and (rtx *operands)
3187{
3188  if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
3189    {
3190      unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3191      int ls0, ls1, ms0, p, len;
3192
3193      for (ls0 = 0; ls0 < HOST_BITS_PER_WIDE_INT; ls0++)
3194	if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls0)) == 0)
3195	  break;
3196
3197      for (ls1 = ls0; ls1 < HOST_BITS_PER_WIDE_INT; ls1++)
3198	if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls1)) != 0)
3199	  break;
3200
3201      for (ms0 = ls1; ms0 < HOST_BITS_PER_WIDE_INT; ms0++)
3202	if ((mask & ((unsigned HOST_WIDE_INT) 1 << ms0)) == 0)
3203	  break;
3204
3205      gcc_assert (ms0 == HOST_BITS_PER_WIDE_INT);
3206
3207      if (ls1 == HOST_BITS_PER_WIDE_INT)
3208	{
3209	  len = ls0;
3210
3211	  gcc_assert (len);
3212
3213	  operands[2] = GEN_INT (len);
3214	  return "extrd,u %1,63,%2,%0";
3215	}
3216      else
3217	{
3218	  /* We could use this `depi' for the case above as well, but `depi'
3219	     requires one more register file access than an `extru'.  */
3220
3221	  p = 63 - ls0;
3222	  len = ls1 - ls0;
3223
3224	  operands[2] = GEN_INT (p);
3225	  operands[3] = GEN_INT (len);
3226	  return "depdi 0,%2,%3,%0";
3227	}
3228    }
3229  else
3230    return "and %1,%2,%0";
3231}
3232
3233const char *
3234pa_output_ior (rtx *operands)
3235{
3236  unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3237  int bs0, bs1, p, len;
3238
3239  if (INTVAL (operands[2]) == 0)
3240    return "copy %1,%0";
3241
3242  for (bs0 = 0; bs0 < 32; bs0++)
3243    if ((mask & (1 << bs0)) != 0)
3244      break;
3245
3246  for (bs1 = bs0; bs1 < 32; bs1++)
3247    if ((mask & (1 << bs1)) == 0)
3248      break;
3249
3250  gcc_assert (bs1 == 32 || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask);
3251
3252  p = 31 - bs0;
3253  len = bs1 - bs0;
3254
3255  operands[2] = GEN_INT (p);
3256  operands[3] = GEN_INT (len);
3257  return "{depi|depwi} -1,%2,%3,%0";
3258}
3259
3260/* Return a string to perform a bitwise-and of operands[1] with operands[2]
3261   storing the result in operands[0].  */
3262const char *
3263pa_output_64bit_ior (rtx *operands)
3264{
3265  unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3266  int bs0, bs1, p, len;
3267
3268  if (INTVAL (operands[2]) == 0)
3269    return "copy %1,%0";
3270
3271  for (bs0 = 0; bs0 < HOST_BITS_PER_WIDE_INT; bs0++)
3272    if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs0)) != 0)
3273      break;
3274
3275  for (bs1 = bs0; bs1 < HOST_BITS_PER_WIDE_INT; bs1++)
3276    if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs1)) == 0)
3277      break;
3278
3279  gcc_assert (bs1 == HOST_BITS_PER_WIDE_INT
3280	      || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask);
3281
3282  p = 63 - bs0;
3283  len = bs1 - bs0;
3284
3285  operands[2] = GEN_INT (p);
3286  operands[3] = GEN_INT (len);
3287  return "depdi -1,%2,%3,%0";
3288}
3289
3290/* Target hook for assembling integer objects.  This code handles
3291   aligned SI and DI integers specially since function references
3292   must be preceded by P%.  */
3293
3294static bool
3295pa_assemble_integer (rtx x, unsigned int size, int aligned_p)
3296{
3297  if (size == UNITS_PER_WORD
3298      && aligned_p
3299      && function_label_operand (x, VOIDmode))
3300    {
3301      fputs (size == 8? "\t.dword\t" : "\t.word\t", asm_out_file);
3302
3303      /* We don't want an OPD when generating fast indirect calls.  */
3304      if (!TARGET_FAST_INDIRECT_CALLS)
3305	fputs ("P%", asm_out_file);
3306
3307      output_addr_const (asm_out_file, x);
3308      fputc ('\n', asm_out_file);
3309      return true;
3310    }
3311  return default_assemble_integer (x, size, aligned_p);
3312}
3313
3314/* Output an ascii string.  */
3315void
3316pa_output_ascii (FILE *file, const char *p, int size)
3317{
3318  int i;
3319  int chars_output;
3320  unsigned char partial_output[16];	/* Max space 4 chars can occupy.  */
3321
3322  /* The HP assembler can only take strings of 256 characters at one
3323     time.  This is a limitation on input line length, *not* the
3324     length of the string.  Sigh.  Even worse, it seems that the
3325     restriction is in number of input characters (see \xnn &
3326     \whatever).  So we have to do this very carefully.  */
3327
3328  fputs ("\t.STRING \"", file);
3329
3330  chars_output = 0;
3331  for (i = 0; i < size; i += 4)
3332    {
3333      int co = 0;
3334      int io = 0;
3335      for (io = 0, co = 0; io < MIN (4, size - i); io++)
3336	{
3337	  register unsigned int c = (unsigned char) p[i + io];
3338
3339	  if (c == '\"' || c == '\\')
3340	    partial_output[co++] = '\\';
3341	  if (c >= ' ' && c < 0177)
3342	    partial_output[co++] = c;
3343	  else
3344	    {
3345	      unsigned int hexd;
3346	      partial_output[co++] = '\\';
3347	      partial_output[co++] = 'x';
3348	      hexd =  c  / 16 - 0 + '0';
3349	      if (hexd > '9')
3350		hexd -= '9' - 'a' + 1;
3351	      partial_output[co++] = hexd;
3352	      hexd =  c % 16 - 0 + '0';
3353	      if (hexd > '9')
3354		hexd -= '9' - 'a' + 1;
3355	      partial_output[co++] = hexd;
3356	    }
3357	}
3358      if (chars_output + co > 243)
3359	{
3360	  fputs ("\"\n\t.STRING \"", file);
3361	  chars_output = 0;
3362	}
3363      fwrite (partial_output, 1, (size_t) co, file);
3364      chars_output += co;
3365      co = 0;
3366    }
3367  fputs ("\"\n", file);
3368}
3369
3370/* Try to rewrite floating point comparisons & branches to avoid
3371   useless add,tr insns.
3372
3373   CHECK_NOTES is nonzero if we should examine REG_DEAD notes
3374   to see if FPCC is dead.  CHECK_NOTES is nonzero for the
3375   first attempt to remove useless add,tr insns.  It is zero
3376   for the second pass as reorg sometimes leaves bogus REG_DEAD
3377   notes lying around.
3378
3379   When CHECK_NOTES is zero we can only eliminate add,tr insns
3380   when there's a 1:1 correspondence between fcmp and ftest/fbranch
3381   instructions.  */
3382static void
3383remove_useless_addtr_insns (int check_notes)
3384{
3385  rtx_insn *insn;
3386  static int pass = 0;
3387
3388  /* This is fairly cheap, so always run it when optimizing.  */
3389  if (optimize > 0)
3390    {
3391      int fcmp_count = 0;
3392      int fbranch_count = 0;
3393
3394      /* Walk all the insns in this function looking for fcmp & fbranch
3395	 instructions.  Keep track of how many of each we find.  */
3396      for (insn = get_insns (); insn; insn = next_insn (insn))
3397	{
3398	  rtx tmp;
3399
3400	  /* Ignore anything that isn't an INSN or a JUMP_INSN.  */
3401	  if (! NONJUMP_INSN_P (insn) && ! JUMP_P (insn))
3402	    continue;
3403
3404	  tmp = PATTERN (insn);
3405
3406	  /* It must be a set.  */
3407	  if (GET_CODE (tmp) != SET)
3408	    continue;
3409
3410	  /* If the destination is CCFP, then we've found an fcmp insn.  */
3411	  tmp = SET_DEST (tmp);
3412	  if (GET_CODE (tmp) == REG && REGNO (tmp) == 0)
3413	    {
3414	      fcmp_count++;
3415	      continue;
3416	    }
3417
3418	  tmp = PATTERN (insn);
3419	  /* If this is an fbranch instruction, bump the fbranch counter.  */
3420	  if (GET_CODE (tmp) == SET
3421	      && SET_DEST (tmp) == pc_rtx
3422	      && GET_CODE (SET_SRC (tmp)) == IF_THEN_ELSE
3423	      && GET_CODE (XEXP (SET_SRC (tmp), 0)) == NE
3424	      && GET_CODE (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == REG
3425	      && REGNO (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == 0)
3426	    {
3427	      fbranch_count++;
3428	      continue;
3429	    }
3430	}
3431
3432
3433      /* Find all floating point compare + branch insns.  If possible,
3434	 reverse the comparison & the branch to avoid add,tr insns.  */
3435      for (insn = get_insns (); insn; insn = next_insn (insn))
3436	{
3437	  rtx tmp;
3438	  rtx_insn *next;
3439
3440	  /* Ignore anything that isn't an INSN.  */
3441	  if (! NONJUMP_INSN_P (insn))
3442	    continue;
3443
3444	  tmp = PATTERN (insn);
3445
3446	  /* It must be a set.  */
3447	  if (GET_CODE (tmp) != SET)
3448	    continue;
3449
3450	  /* The destination must be CCFP, which is register zero.  */
3451	  tmp = SET_DEST (tmp);
3452	  if (GET_CODE (tmp) != REG || REGNO (tmp) != 0)
3453	    continue;
3454
3455	  /* INSN should be a set of CCFP.
3456
3457	     See if the result of this insn is used in a reversed FP
3458	     conditional branch.  If so, reverse our condition and
3459	     the branch.  Doing so avoids useless add,tr insns.  */
3460	  next = next_insn (insn);
3461	  while (next)
3462	    {
3463	      /* Jumps, calls and labels stop our search.  */
3464	      if (JUMP_P (next) || CALL_P (next) || LABEL_P (next))
3465		break;
3466
3467	      /* As does another fcmp insn.  */
3468	      if (NONJUMP_INSN_P (next)
3469		  && GET_CODE (PATTERN (next)) == SET
3470		  && GET_CODE (SET_DEST (PATTERN (next))) == REG
3471		  && REGNO (SET_DEST (PATTERN (next))) == 0)
3472		break;
3473
3474	      next = next_insn (next);
3475	    }
3476
3477	  /* Is NEXT_INSN a branch?  */
3478	  if (next && JUMP_P (next))
3479	    {
3480	      rtx pattern = PATTERN (next);
3481
3482	      /* If it a reversed fp conditional branch (e.g. uses add,tr)
3483		 and CCFP dies, then reverse our conditional and the branch
3484		 to avoid the add,tr.  */
3485	      if (GET_CODE (pattern) == SET
3486		  && SET_DEST (pattern) == pc_rtx
3487		  && GET_CODE (SET_SRC (pattern)) == IF_THEN_ELSE
3488		  && GET_CODE (XEXP (SET_SRC (pattern), 0)) == NE
3489		  && GET_CODE (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == REG
3490		  && REGNO (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == 0
3491		  && GET_CODE (XEXP (SET_SRC (pattern), 1)) == PC
3492		  && (fcmp_count == fbranch_count
3493		      || (check_notes
3494			  && find_regno_note (next, REG_DEAD, 0))))
3495		{
3496		  /* Reverse the branch.  */
3497		  tmp = XEXP (SET_SRC (pattern), 1);
3498		  XEXP (SET_SRC (pattern), 1) = XEXP (SET_SRC (pattern), 2);
3499		  XEXP (SET_SRC (pattern), 2) = tmp;
3500		  INSN_CODE (next) = -1;
3501
3502		  /* Reverse our condition.  */
3503		  tmp = PATTERN (insn);
3504		  PUT_CODE (XEXP (tmp, 1),
3505			    (reverse_condition_maybe_unordered
3506			     (GET_CODE (XEXP (tmp, 1)))));
3507		}
3508	    }
3509	}
3510    }
3511
3512  pass = !pass;
3513
3514}
3515
3516/* You may have trouble believing this, but this is the 32 bit HP-PA
3517   stack layout.  Wow.
3518
3519   Offset		Contents
3520
3521   Variable arguments	(optional; any number may be allocated)
3522
3523   SP-(4*(N+9))		arg word N
3524   	:		    :
3525      SP-56		arg word 5
3526      SP-52		arg word 4
3527
3528   Fixed arguments	(must be allocated; may remain unused)
3529
3530      SP-48		arg word 3
3531      SP-44		arg word 2
3532      SP-40		arg word 1
3533      SP-36		arg word 0
3534
3535   Frame Marker
3536
3537      SP-32		External Data Pointer (DP)
3538      SP-28		External sr4
3539      SP-24		External/stub RP (RP')
3540      SP-20		Current RP
3541      SP-16		Static Link
3542      SP-12		Clean up
3543      SP-8		Calling Stub RP (RP'')
3544      SP-4		Previous SP
3545
3546   Top of Frame
3547
3548      SP-0		Stack Pointer (points to next available address)
3549
3550*/
3551
3552/* This function saves registers as follows.  Registers marked with ' are
3553   this function's registers (as opposed to the previous function's).
3554   If a frame_pointer isn't needed, r4 is saved as a general register;
3555   the space for the frame pointer is still allocated, though, to keep
3556   things simple.
3557
3558
3559   Top of Frame
3560
3561       SP (FP')		Previous FP
3562       SP + 4		Alignment filler (sigh)
3563       SP + 8		Space for locals reserved here.
3564       .
3565       .
3566       .
3567       SP + n		All call saved register used.
3568       .
3569       .
3570       .
3571       SP + o		All call saved fp registers used.
3572       .
3573       .
3574       .
3575       SP + p (SP')	points to next available address.
3576
3577*/
3578
3579/* Global variables set by output_function_prologue().  */
3580/* Size of frame.  Need to know this to emit return insns from
3581   leaf procedures.  */
3582static HOST_WIDE_INT actual_fsize, local_fsize;
3583static int save_fregs;
3584
3585/* Emit RTL to store REG at the memory location specified by BASE+DISP.
3586   Handle case where DISP > 8k by using the add_high_const patterns.
3587
3588   Note in DISP > 8k case, we will leave the high part of the address
3589   in %r1.  There is code in expand_hppa_{prologue,epilogue} that knows this.*/
3590
3591static void
3592store_reg (int reg, HOST_WIDE_INT disp, int base)
3593{
3594  rtx dest, src, basereg;
3595  rtx_insn *insn;
3596
3597  src = gen_rtx_REG (word_mode, reg);
3598  basereg = gen_rtx_REG (Pmode, base);
3599  if (VAL_14_BITS_P (disp))
3600    {
3601      dest = gen_rtx_MEM (word_mode, plus_constant (Pmode, basereg, disp));
3602      insn = emit_move_insn (dest, src);
3603    }
3604  else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3605    {
3606      rtx delta = GEN_INT (disp);
3607      rtx tmpreg = gen_rtx_REG (Pmode, 1);
3608
3609      emit_move_insn (tmpreg, delta);
3610      insn = emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
3611      if (DO_FRAME_NOTES)
3612	{
3613	  add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3614			gen_rtx_SET (VOIDmode, tmpreg,
3615				     gen_rtx_PLUS (Pmode, basereg, delta)));
3616	  RTX_FRAME_RELATED_P (insn) = 1;
3617	}
3618      dest = gen_rtx_MEM (word_mode, tmpreg);
3619      insn = emit_move_insn (dest, src);
3620    }
3621  else
3622    {
3623      rtx delta = GEN_INT (disp);
3624      rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
3625      rtx tmpreg = gen_rtx_REG (Pmode, 1);
3626
3627      emit_move_insn (tmpreg, high);
3628      dest = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3629      insn = emit_move_insn (dest, src);
3630      if (DO_FRAME_NOTES)
3631	add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3632		      gen_rtx_SET (VOIDmode,
3633				   gen_rtx_MEM (word_mode,
3634						gen_rtx_PLUS (word_mode,
3635							      basereg,
3636							      delta)),
3637				   src));
3638    }
3639
3640  if (DO_FRAME_NOTES)
3641    RTX_FRAME_RELATED_P (insn) = 1;
3642}
3643
3644/* Emit RTL to store REG at the memory location specified by BASE and then
3645   add MOD to BASE.  MOD must be <= 8k.  */
3646
3647static void
3648store_reg_modify (int base, int reg, HOST_WIDE_INT mod)
3649{
3650  rtx basereg, srcreg, delta;
3651  rtx_insn *insn;
3652
3653  gcc_assert (VAL_14_BITS_P (mod));
3654
3655  basereg = gen_rtx_REG (Pmode, base);
3656  srcreg = gen_rtx_REG (word_mode, reg);
3657  delta = GEN_INT (mod);
3658
3659  insn = emit_insn (gen_post_store (basereg, srcreg, delta));
3660  if (DO_FRAME_NOTES)
3661    {
3662      RTX_FRAME_RELATED_P (insn) = 1;
3663
3664      /* RTX_FRAME_RELATED_P must be set on each frame related set
3665	 in a parallel with more than one element.  */
3666      RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 0)) = 1;
3667      RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
3668    }
3669}
3670
3671/* Emit RTL to set REG to the value specified by BASE+DISP.  Handle case
3672   where DISP > 8k by using the add_high_const patterns.  NOTE indicates
3673   whether to add a frame note or not.
3674
3675   In the DISP > 8k case, we leave the high part of the address in %r1.
3676   There is code in expand_hppa_{prologue,epilogue} that knows about this.  */
3677
3678static void
3679set_reg_plus_d (int reg, int base, HOST_WIDE_INT disp, int note)
3680{
3681  rtx_insn *insn;
3682
3683  if (VAL_14_BITS_P (disp))
3684    {
3685      insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3686			     plus_constant (Pmode,
3687					    gen_rtx_REG (Pmode, base), disp));
3688    }
3689  else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3690    {
3691      rtx basereg = gen_rtx_REG (Pmode, base);
3692      rtx delta = GEN_INT (disp);
3693      rtx tmpreg = gen_rtx_REG (Pmode, 1);
3694
3695      emit_move_insn (tmpreg, delta);
3696      insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3697			     gen_rtx_PLUS (Pmode, tmpreg, basereg));
3698      if (DO_FRAME_NOTES)
3699	add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3700		      gen_rtx_SET (VOIDmode, tmpreg,
3701				   gen_rtx_PLUS (Pmode, basereg, delta)));
3702    }
3703  else
3704    {
3705      rtx basereg = gen_rtx_REG (Pmode, base);
3706      rtx delta = GEN_INT (disp);
3707      rtx tmpreg = gen_rtx_REG (Pmode, 1);
3708
3709      emit_move_insn (tmpreg,
3710		      gen_rtx_PLUS (Pmode, basereg,
3711				    gen_rtx_HIGH (Pmode, delta)));
3712      insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3713			     gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3714    }
3715
3716  if (DO_FRAME_NOTES && note)
3717    RTX_FRAME_RELATED_P (insn) = 1;
3718}
3719
3720HOST_WIDE_INT
3721pa_compute_frame_size (HOST_WIDE_INT size, int *fregs_live)
3722{
3723  int freg_saved = 0;
3724  int i, j;
3725
3726  /* The code in pa_expand_prologue and pa_expand_epilogue must
3727     be consistent with the rounding and size calculation done here.
3728     Change them at the same time.  */
3729
3730  /* We do our own stack alignment.  First, round the size of the
3731     stack locals up to a word boundary.  */
3732  size = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
3733
3734  /* Space for previous frame pointer + filler.  If any frame is
3735     allocated, we need to add in the STARTING_FRAME_OFFSET.  We
3736     waste some space here for the sake of HP compatibility.  The
3737     first slot is only used when the frame pointer is needed.  */
3738  if (size || frame_pointer_needed)
3739    size += STARTING_FRAME_OFFSET;
3740
3741  /* If the current function calls __builtin_eh_return, then we need
3742     to allocate stack space for registers that will hold data for
3743     the exception handler.  */
3744  if (DO_FRAME_NOTES && crtl->calls_eh_return)
3745    {
3746      unsigned int i;
3747
3748      for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i)
3749	continue;
3750      size += i * UNITS_PER_WORD;
3751    }
3752
3753  /* Account for space used by the callee general register saves.  */
3754  for (i = 18, j = frame_pointer_needed ? 4 : 3; i >= j; i--)
3755    if (df_regs_ever_live_p (i))
3756      size += UNITS_PER_WORD;
3757
3758  /* Account for space used by the callee floating point register saves.  */
3759  for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
3760    if (df_regs_ever_live_p (i)
3761	|| (!TARGET_64BIT && df_regs_ever_live_p (i + 1)))
3762      {
3763	freg_saved = 1;
3764
3765	/* We always save both halves of the FP register, so always
3766	   increment the frame size by 8 bytes.  */
3767	size += 8;
3768      }
3769
3770  /* If any of the floating registers are saved, account for the
3771     alignment needed for the floating point register save block.  */
3772  if (freg_saved)
3773    {
3774      size = (size + 7) & ~7;
3775      if (fregs_live)
3776	*fregs_live = 1;
3777    }
3778
3779  /* The various ABIs include space for the outgoing parameters in the
3780     size of the current function's stack frame.  We don't need to align
3781     for the outgoing arguments as their alignment is set by the final
3782     rounding for the frame as a whole.  */
3783  size += crtl->outgoing_args_size;
3784
3785  /* Allocate space for the fixed frame marker.  This space must be
3786     allocated for any function that makes calls or allocates
3787     stack space.  */
3788  if (!crtl->is_leaf || size)
3789    size += TARGET_64BIT ? 48 : 32;
3790
3791  /* Finally, round to the preferred stack boundary.  */
3792  return ((size + PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1)
3793	  & ~(PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1));
3794}
3795
3796/* Generate the assembly code for function entry.  FILE is a stdio
3797   stream to output the code to.  SIZE is an int: how many units of
3798   temporary storage to allocate.
3799
3800   Refer to the array `regs_ever_live' to determine which registers to
3801   save; `regs_ever_live[I]' is nonzero if register number I is ever
3802   used in the function.  This function is responsible for knowing
3803   which registers should not be saved even if used.  */
3804
3805/* On HP-PA, move-double insns between fpu and cpu need an 8-byte block
3806   of memory.  If any fpu reg is used in the function, we allocate
3807   such a block here, at the bottom of the frame, just in case it's needed.
3808
3809   If this function is a leaf procedure, then we may choose not
3810   to do a "save" insn.  The decision about whether or not
3811   to do this is made in regclass.c.  */
3812
3813static void
3814pa_output_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
3815{
3816  /* The function's label and associated .PROC must never be
3817     separated and must be output *after* any profiling declarations
3818     to avoid changing spaces/subspaces within a procedure.  */
3819  ASM_OUTPUT_LABEL (file, XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0));
3820  fputs ("\t.PROC\n", file);
3821
3822  /* pa_expand_prologue does the dirty work now.  We just need
3823     to output the assembler directives which denote the start
3824     of a function.  */
3825  fprintf (file, "\t.CALLINFO FRAME=" HOST_WIDE_INT_PRINT_DEC, actual_fsize);
3826  if (crtl->is_leaf)
3827    fputs (",NO_CALLS", file);
3828  else
3829    fputs (",CALLS", file);
3830  if (rp_saved)
3831    fputs (",SAVE_RP", file);
3832
3833  /* The SAVE_SP flag is used to indicate that register %r3 is stored
3834     at the beginning of the frame and that it is used as the frame
3835     pointer for the frame.  We do this because our current frame
3836     layout doesn't conform to that specified in the HP runtime
3837     documentation and we need a way to indicate to programs such as
3838     GDB where %r3 is saved.  The SAVE_SP flag was chosen because it
3839     isn't used by HP compilers but is supported by the assembler.
3840     However, SAVE_SP is supposed to indicate that the previous stack
3841     pointer has been saved in the frame marker.  */
3842  if (frame_pointer_needed)
3843    fputs (",SAVE_SP", file);
3844
3845  /* Pass on information about the number of callee register saves
3846     performed in the prologue.
3847
3848     The compiler is supposed to pass the highest register number
3849     saved, the assembler then has to adjust that number before
3850     entering it into the unwind descriptor (to account for any
3851     caller saved registers with lower register numbers than the
3852     first callee saved register).  */
3853  if (gr_saved)
3854    fprintf (file, ",ENTRY_GR=%d", gr_saved + 2);
3855
3856  if (fr_saved)
3857    fprintf (file, ",ENTRY_FR=%d", fr_saved + 11);
3858
3859  fputs ("\n\t.ENTRY\n", file);
3860
3861  remove_useless_addtr_insns (0);
3862}
3863
3864void
3865pa_expand_prologue (void)
3866{
3867  int merge_sp_adjust_with_store = 0;
3868  HOST_WIDE_INT size = get_frame_size ();
3869  HOST_WIDE_INT offset;
3870  int i;
3871  rtx tmpreg;
3872  rtx_insn *insn;
3873
3874  gr_saved = 0;
3875  fr_saved = 0;
3876  save_fregs = 0;
3877
3878  /* Compute total size for frame pointer, filler, locals and rounding to
3879     the next word boundary.  Similar code appears in pa_compute_frame_size
3880     and must be changed in tandem with this code.  */
3881  local_fsize = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
3882  if (local_fsize || frame_pointer_needed)
3883    local_fsize += STARTING_FRAME_OFFSET;
3884
3885  actual_fsize = pa_compute_frame_size (size, &save_fregs);
3886  if (flag_stack_usage_info)
3887    current_function_static_stack_size = actual_fsize;
3888
3889  /* Compute a few things we will use often.  */
3890  tmpreg = gen_rtx_REG (word_mode, 1);
3891
3892  /* Save RP first.  The calling conventions manual states RP will
3893     always be stored into the caller's frame at sp - 20 or sp - 16
3894     depending on which ABI is in use.  */
3895  if (df_regs_ever_live_p (2) || crtl->calls_eh_return)
3896    {
3897      store_reg (2, TARGET_64BIT ? -16 : -20, STACK_POINTER_REGNUM);
3898      rp_saved = true;
3899    }
3900  else
3901    rp_saved = false;
3902
3903  /* Allocate the local frame and set up the frame pointer if needed.  */
3904  if (actual_fsize != 0)
3905    {
3906      if (frame_pointer_needed)
3907	{
3908	  /* Copy the old frame pointer temporarily into %r1.  Set up the
3909	     new stack pointer, then store away the saved old frame pointer
3910	     into the stack at sp and at the same time update the stack
3911	     pointer by actual_fsize bytes.  Two versions, first
3912	     handles small (<8k) frames.  The second handles large (>=8k)
3913	     frames.  */
3914	  insn = emit_move_insn (tmpreg, hard_frame_pointer_rtx);
3915	  if (DO_FRAME_NOTES)
3916	    RTX_FRAME_RELATED_P (insn) = 1;
3917
3918	  insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
3919	  if (DO_FRAME_NOTES)
3920	    RTX_FRAME_RELATED_P (insn) = 1;
3921
3922	  if (VAL_14_BITS_P (actual_fsize))
3923	    store_reg_modify (STACK_POINTER_REGNUM, 1, actual_fsize);
3924	  else
3925	    {
3926	      /* It is incorrect to store the saved frame pointer at *sp,
3927		 then increment sp (writes beyond the current stack boundary).
3928
3929		 So instead use stwm to store at *sp and post-increment the
3930		 stack pointer as an atomic operation.  Then increment sp to
3931		 finish allocating the new frame.  */
3932	      HOST_WIDE_INT adjust1 = 8192 - 64;
3933	      HOST_WIDE_INT adjust2 = actual_fsize - adjust1;
3934
3935	      store_reg_modify (STACK_POINTER_REGNUM, 1, adjust1);
3936	      set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3937			      adjust2, 1);
3938	    }
3939
3940	  /* We set SAVE_SP in frames that need a frame pointer.  Thus,
3941	     we need to store the previous stack pointer (frame pointer)
3942	     into the frame marker on targets that use the HP unwind
3943	     library.  This allows the HP unwind library to be used to
3944	     unwind GCC frames.  However, we are not fully compatible
3945	     with the HP library because our frame layout differs from
3946	     that specified in the HP runtime specification.
3947
3948	     We don't want a frame note on this instruction as the frame
3949	     marker moves during dynamic stack allocation.
3950
3951	     This instruction also serves as a blockage to prevent
3952	     register spills from being scheduled before the stack
3953	     pointer is raised.  This is necessary as we store
3954	     registers using the frame pointer as a base register,
3955	     and the frame pointer is set before sp is raised.  */
3956	  if (TARGET_HPUX_UNWIND_LIBRARY)
3957	    {
3958	      rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx,
3959				       GEN_INT (TARGET_64BIT ? -8 : -4));
3960
3961	      emit_move_insn (gen_rtx_MEM (word_mode, addr),
3962			      hard_frame_pointer_rtx);
3963	    }
3964	  else
3965	    emit_insn (gen_blockage ());
3966	}
3967      /* no frame pointer needed.  */
3968      else
3969	{
3970	  /* In some cases we can perform the first callee register save
3971	     and allocating the stack frame at the same time.   If so, just
3972	     make a note of it and defer allocating the frame until saving
3973	     the callee registers.  */
3974	  if (VAL_14_BITS_P (actual_fsize) && local_fsize == 0)
3975	    merge_sp_adjust_with_store = 1;
3976	  /* Can not optimize.  Adjust the stack frame by actual_fsize
3977	     bytes.  */
3978	  else
3979	    set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3980			    actual_fsize, 1);
3981	}
3982    }
3983
3984  /* Normal register save.
3985
3986     Do not save the frame pointer in the frame_pointer_needed case.  It
3987     was done earlier.  */
3988  if (frame_pointer_needed)
3989    {
3990      offset = local_fsize;
3991
3992      /* Saving the EH return data registers in the frame is the simplest
3993	 way to get the frame unwind information emitted.  We put them
3994	 just before the general registers.  */
3995      if (DO_FRAME_NOTES && crtl->calls_eh_return)
3996	{
3997	  unsigned int i, regno;
3998
3999	  for (i = 0; ; ++i)
4000	    {
4001	      regno = EH_RETURN_DATA_REGNO (i);
4002	      if (regno == INVALID_REGNUM)
4003		break;
4004
4005	      store_reg (regno, offset, HARD_FRAME_POINTER_REGNUM);
4006	      offset += UNITS_PER_WORD;
4007	    }
4008	}
4009
4010      for (i = 18; i >= 4; i--)
4011	if (df_regs_ever_live_p (i) && ! call_used_regs[i])
4012	  {
4013	    store_reg (i, offset, HARD_FRAME_POINTER_REGNUM);
4014	    offset += UNITS_PER_WORD;
4015	    gr_saved++;
4016	  }
4017      /* Account for %r3 which is saved in a special place.  */
4018      gr_saved++;
4019    }
4020  /* No frame pointer needed.  */
4021  else
4022    {
4023      offset = local_fsize - actual_fsize;
4024
4025      /* Saving the EH return data registers in the frame is the simplest
4026         way to get the frame unwind information emitted.  */
4027      if (DO_FRAME_NOTES && crtl->calls_eh_return)
4028	{
4029	  unsigned int i, regno;
4030
4031	  for (i = 0; ; ++i)
4032	    {
4033	      regno = EH_RETURN_DATA_REGNO (i);
4034	      if (regno == INVALID_REGNUM)
4035		break;
4036
4037	      /* If merge_sp_adjust_with_store is nonzero, then we can
4038		 optimize the first save.  */
4039	      if (merge_sp_adjust_with_store)
4040		{
4041		  store_reg_modify (STACK_POINTER_REGNUM, regno, -offset);
4042		  merge_sp_adjust_with_store = 0;
4043		}
4044	      else
4045		store_reg (regno, offset, STACK_POINTER_REGNUM);
4046	      offset += UNITS_PER_WORD;
4047	    }
4048	}
4049
4050      for (i = 18; i >= 3; i--)
4051      	if (df_regs_ever_live_p (i) && ! call_used_regs[i])
4052	  {
4053	    /* If merge_sp_adjust_with_store is nonzero, then we can
4054	       optimize the first GR save.  */
4055	    if (merge_sp_adjust_with_store)
4056	      {
4057		store_reg_modify (STACK_POINTER_REGNUM, i, -offset);
4058		merge_sp_adjust_with_store = 0;
4059	      }
4060	    else
4061	      store_reg (i, offset, STACK_POINTER_REGNUM);
4062	    offset += UNITS_PER_WORD;
4063	    gr_saved++;
4064	  }
4065
4066      /* If we wanted to merge the SP adjustment with a GR save, but we never
4067	 did any GR saves, then just emit the adjustment here.  */
4068      if (merge_sp_adjust_with_store)
4069	set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4070			actual_fsize, 1);
4071    }
4072
4073  /* The hppa calling conventions say that %r19, the pic offset
4074     register, is saved at sp - 32 (in this function's frame)
4075     when generating PIC code.  FIXME:  What is the correct thing
4076     to do for functions which make no calls and allocate no
4077     frame?  Do we need to allocate a frame, or can we just omit
4078     the save?   For now we'll just omit the save.
4079
4080     We don't want a note on this insn as the frame marker can
4081     move if there is a dynamic stack allocation.  */
4082  if (flag_pic && actual_fsize != 0 && !TARGET_64BIT)
4083    {
4084      rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx, GEN_INT (-32));
4085
4086      emit_move_insn (gen_rtx_MEM (word_mode, addr), pic_offset_table_rtx);
4087
4088    }
4089
4090  /* Align pointer properly (doubleword boundary).  */
4091  offset = (offset + 7) & ~7;
4092
4093  /* Floating point register store.  */
4094  if (save_fregs)
4095    {
4096      rtx base;
4097
4098      /* First get the frame or stack pointer to the start of the FP register
4099	 save area.  */
4100      if (frame_pointer_needed)
4101	{
4102	  set_reg_plus_d (1, HARD_FRAME_POINTER_REGNUM, offset, 0);
4103	  base = hard_frame_pointer_rtx;
4104	}
4105      else
4106	{
4107	  set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
4108	  base = stack_pointer_rtx;
4109	}
4110
4111      /* Now actually save the FP registers.  */
4112      for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
4113	{
4114	  if (df_regs_ever_live_p (i)
4115	      || (! TARGET_64BIT && df_regs_ever_live_p (i + 1)))
4116	    {
4117	      rtx addr, reg;
4118	      rtx_insn *insn;
4119	      addr = gen_rtx_MEM (DFmode,
4120				  gen_rtx_POST_INC (word_mode, tmpreg));
4121	      reg = gen_rtx_REG (DFmode, i);
4122	      insn = emit_move_insn (addr, reg);
4123	      if (DO_FRAME_NOTES)
4124		{
4125		  RTX_FRAME_RELATED_P (insn) = 1;
4126		  if (TARGET_64BIT)
4127		    {
4128		      rtx mem = gen_rtx_MEM (DFmode,
4129					     plus_constant (Pmode, base,
4130							    offset));
4131		      add_reg_note (insn, REG_FRAME_RELATED_EXPR,
4132				    gen_rtx_SET (VOIDmode, mem, reg));
4133		    }
4134		  else
4135		    {
4136		      rtx meml = gen_rtx_MEM (SFmode,
4137					      plus_constant (Pmode, base,
4138							     offset));
4139		      rtx memr = gen_rtx_MEM (SFmode,
4140					      plus_constant (Pmode, base,
4141							     offset + 4));
4142		      rtx regl = gen_rtx_REG (SFmode, i);
4143		      rtx regr = gen_rtx_REG (SFmode, i + 1);
4144		      rtx setl = gen_rtx_SET (VOIDmode, meml, regl);
4145		      rtx setr = gen_rtx_SET (VOIDmode, memr, regr);
4146		      rtvec vec;
4147
4148		      RTX_FRAME_RELATED_P (setl) = 1;
4149		      RTX_FRAME_RELATED_P (setr) = 1;
4150		      vec = gen_rtvec (2, setl, setr);
4151		      add_reg_note (insn, REG_FRAME_RELATED_EXPR,
4152				    gen_rtx_SEQUENCE (VOIDmode, vec));
4153		    }
4154		}
4155	      offset += GET_MODE_SIZE (DFmode);
4156	      fr_saved++;
4157	    }
4158	}
4159    }
4160}
4161
4162/* Emit RTL to load REG from the memory location specified by BASE+DISP.
4163   Handle case where DISP > 8k by using the add_high_const patterns.  */
4164
4165static void
4166load_reg (int reg, HOST_WIDE_INT disp, int base)
4167{
4168  rtx dest = gen_rtx_REG (word_mode, reg);
4169  rtx basereg = gen_rtx_REG (Pmode, base);
4170  rtx src;
4171
4172  if (VAL_14_BITS_P (disp))
4173    src = gen_rtx_MEM (word_mode, plus_constant (Pmode, basereg, disp));
4174  else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
4175    {
4176      rtx delta = GEN_INT (disp);
4177      rtx tmpreg = gen_rtx_REG (Pmode, 1);
4178
4179      emit_move_insn (tmpreg, delta);
4180      if (TARGET_DISABLE_INDEXING)
4181	{
4182	  emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
4183	  src = gen_rtx_MEM (word_mode, tmpreg);
4184	}
4185      else
4186	src = gen_rtx_MEM (word_mode, gen_rtx_PLUS (Pmode, tmpreg, basereg));
4187    }
4188  else
4189    {
4190      rtx delta = GEN_INT (disp);
4191      rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
4192      rtx tmpreg = gen_rtx_REG (Pmode, 1);
4193
4194      emit_move_insn (tmpreg, high);
4195      src = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
4196    }
4197
4198  emit_move_insn (dest, src);
4199}
4200
4201/* Update the total code bytes output to the text section.  */
4202
4203static void
4204update_total_code_bytes (unsigned int nbytes)
4205{
4206  if ((TARGET_PORTABLE_RUNTIME || !TARGET_GAS || !TARGET_SOM)
4207      && !IN_NAMED_SECTION_P (cfun->decl))
4208    {
4209      unsigned int old_total = total_code_bytes;
4210
4211      total_code_bytes += nbytes;
4212
4213      /* Be prepared to handle overflows.  */
4214      if (old_total > total_code_bytes)
4215        total_code_bytes = UINT_MAX;
4216    }
4217}
4218
4219/* This function generates the assembly code for function exit.
4220   Args are as for output_function_prologue ().
4221
4222   The function epilogue should not depend on the current stack
4223   pointer!  It should use the frame pointer only.  This is mandatory
4224   because of alloca; we also take advantage of it to omit stack
4225   adjustments before returning.  */
4226
4227static void
4228pa_output_function_epilogue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
4229{
4230  rtx_insn *insn = get_last_insn ();
4231  bool extra_nop;
4232
4233  /* pa_expand_epilogue does the dirty work now.  We just need
4234     to output the assembler directives which denote the end
4235     of a function.
4236
4237     To make debuggers happy, emit a nop if the epilogue was completely
4238     eliminated due to a volatile call as the last insn in the
4239     current function.  That way the return address (in %r2) will
4240     always point to a valid instruction in the current function.  */
4241
4242  /* Get the last real insn.  */
4243  if (NOTE_P (insn))
4244    insn = prev_real_insn (insn);
4245
4246  /* If it is a sequence, then look inside.  */
4247  if (insn && NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == SEQUENCE)
4248    insn = as_a <rtx_sequence *> (PATTERN (insn))-> insn (0);
4249
4250  /* If insn is a CALL_INSN, then it must be a call to a volatile
4251     function (otherwise there would be epilogue insns).  */
4252  if (insn && CALL_P (insn))
4253    {
4254      fputs ("\tnop\n", file);
4255      extra_nop = true;
4256    }
4257  else
4258    extra_nop = false;
4259
4260  fputs ("\t.EXIT\n\t.PROCEND\n", file);
4261
4262  if (TARGET_SOM && TARGET_GAS)
4263    {
4264      /* We are done with this subspace except possibly for some additional
4265	 debug information.  Forget that we are in this subspace to ensure
4266	 that the next function is output in its own subspace.  */
4267      in_section = NULL;
4268      cfun->machine->in_nsubspa = 2;
4269    }
4270
4271  /* Thunks do their own insn accounting.  */
4272  if (cfun->is_thunk)
4273    return;
4274
4275  if (INSN_ADDRESSES_SET_P ())
4276    {
4277      last_address = extra_nop ? 4 : 0;
4278      insn = get_last_nonnote_insn ();
4279      if (insn)
4280	{
4281	  last_address += INSN_ADDRESSES (INSN_UID (insn));
4282	  if (INSN_P (insn))
4283	    last_address += insn_default_length (insn);
4284	}
4285      last_address = ((last_address + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
4286		      & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
4287    }
4288  else
4289    last_address = UINT_MAX;
4290
4291  /* Finally, update the total number of code bytes output so far.  */
4292  update_total_code_bytes (last_address);
4293}
4294
4295void
4296pa_expand_epilogue (void)
4297{
4298  rtx tmpreg;
4299  HOST_WIDE_INT offset;
4300  HOST_WIDE_INT ret_off = 0;
4301  int i;
4302  int merge_sp_adjust_with_load = 0;
4303
4304  /* We will use this often.  */
4305  tmpreg = gen_rtx_REG (word_mode, 1);
4306
4307  /* Try to restore RP early to avoid load/use interlocks when
4308     RP gets used in the return (bv) instruction.  This appears to still
4309     be necessary even when we schedule the prologue and epilogue.  */
4310  if (rp_saved)
4311    {
4312      ret_off = TARGET_64BIT ? -16 : -20;
4313      if (frame_pointer_needed)
4314	{
4315	  load_reg (2, ret_off, HARD_FRAME_POINTER_REGNUM);
4316	  ret_off = 0;
4317	}
4318      else
4319	{
4320	  /* No frame pointer, and stack is smaller than 8k.  */
4321	  if (VAL_14_BITS_P (ret_off - actual_fsize))
4322	    {
4323	      load_reg (2, ret_off - actual_fsize, STACK_POINTER_REGNUM);
4324	      ret_off = 0;
4325	    }
4326	}
4327    }
4328
4329  /* General register restores.  */
4330  if (frame_pointer_needed)
4331    {
4332      offset = local_fsize;
4333
4334      /* If the current function calls __builtin_eh_return, then we need
4335         to restore the saved EH data registers.  */
4336      if (DO_FRAME_NOTES && crtl->calls_eh_return)
4337	{
4338	  unsigned int i, regno;
4339
4340	  for (i = 0; ; ++i)
4341	    {
4342	      regno = EH_RETURN_DATA_REGNO (i);
4343	      if (regno == INVALID_REGNUM)
4344		break;
4345
4346	      load_reg (regno, offset, HARD_FRAME_POINTER_REGNUM);
4347	      offset += UNITS_PER_WORD;
4348	    }
4349	}
4350
4351      for (i = 18; i >= 4; i--)
4352	if (df_regs_ever_live_p (i) && ! call_used_regs[i])
4353	  {
4354	    load_reg (i, offset, HARD_FRAME_POINTER_REGNUM);
4355	    offset += UNITS_PER_WORD;
4356	  }
4357    }
4358  else
4359    {
4360      offset = local_fsize - actual_fsize;
4361
4362      /* If the current function calls __builtin_eh_return, then we need
4363         to restore the saved EH data registers.  */
4364      if (DO_FRAME_NOTES && crtl->calls_eh_return)
4365	{
4366	  unsigned int i, regno;
4367
4368	  for (i = 0; ; ++i)
4369	    {
4370	      regno = EH_RETURN_DATA_REGNO (i);
4371	      if (regno == INVALID_REGNUM)
4372		break;
4373
4374	      /* Only for the first load.
4375	         merge_sp_adjust_with_load holds the register load
4376	         with which we will merge the sp adjustment.  */
4377	      if (merge_sp_adjust_with_load == 0
4378		  && local_fsize == 0
4379		  && VAL_14_BITS_P (-actual_fsize))
4380	        merge_sp_adjust_with_load = regno;
4381	      else
4382		load_reg (regno, offset, STACK_POINTER_REGNUM);
4383	      offset += UNITS_PER_WORD;
4384	    }
4385	}
4386
4387      for (i = 18; i >= 3; i--)
4388	{
4389	  if (df_regs_ever_live_p (i) && ! call_used_regs[i])
4390	    {
4391	      /* Only for the first load.
4392	         merge_sp_adjust_with_load holds the register load
4393	         with which we will merge the sp adjustment.  */
4394	      if (merge_sp_adjust_with_load == 0
4395		  && local_fsize == 0
4396		  && VAL_14_BITS_P (-actual_fsize))
4397	        merge_sp_adjust_with_load = i;
4398	      else
4399		load_reg (i, offset, STACK_POINTER_REGNUM);
4400	      offset += UNITS_PER_WORD;
4401	    }
4402	}
4403    }
4404
4405  /* Align pointer properly (doubleword boundary).  */
4406  offset = (offset + 7) & ~7;
4407
4408  /* FP register restores.  */
4409  if (save_fregs)
4410    {
4411      /* Adjust the register to index off of.  */
4412      if (frame_pointer_needed)
4413	set_reg_plus_d (1, HARD_FRAME_POINTER_REGNUM, offset, 0);
4414      else
4415	set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
4416
4417      /* Actually do the restores now.  */
4418      for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
4419	if (df_regs_ever_live_p (i)
4420	    || (! TARGET_64BIT && df_regs_ever_live_p (i + 1)))
4421	  {
4422	    rtx src = gen_rtx_MEM (DFmode,
4423				   gen_rtx_POST_INC (word_mode, tmpreg));
4424	    rtx dest = gen_rtx_REG (DFmode, i);
4425	    emit_move_insn (dest, src);
4426	  }
4427    }
4428
4429  /* Emit a blockage insn here to keep these insns from being moved to
4430     an earlier spot in the epilogue, or into the main instruction stream.
4431
4432     This is necessary as we must not cut the stack back before all the
4433     restores are finished.  */
4434  emit_insn (gen_blockage ());
4435
4436  /* Reset stack pointer (and possibly frame pointer).  The stack
4437     pointer is initially set to fp + 64 to avoid a race condition.  */
4438  if (frame_pointer_needed)
4439    {
4440      rtx delta = GEN_INT (-64);
4441
4442      set_reg_plus_d (STACK_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM, 64, 0);
4443      emit_insn (gen_pre_load (hard_frame_pointer_rtx,
4444			       stack_pointer_rtx, delta));
4445    }
4446  /* If we were deferring a callee register restore, do it now.  */
4447  else if (merge_sp_adjust_with_load)
4448    {
4449      rtx delta = GEN_INT (-actual_fsize);
4450      rtx dest = gen_rtx_REG (word_mode, merge_sp_adjust_with_load);
4451
4452      emit_insn (gen_pre_load (dest, stack_pointer_rtx, delta));
4453    }
4454  else if (actual_fsize != 0)
4455    set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4456		    - actual_fsize, 0);
4457
4458  /* If we haven't restored %r2 yet (no frame pointer, and a stack
4459     frame greater than 8k), do so now.  */
4460  if (ret_off != 0)
4461    load_reg (2, ret_off, STACK_POINTER_REGNUM);
4462
4463  if (DO_FRAME_NOTES && crtl->calls_eh_return)
4464    {
4465      rtx sa = EH_RETURN_STACKADJ_RTX;
4466
4467      emit_insn (gen_blockage ());
4468      emit_insn (TARGET_64BIT
4469		 ? gen_subdi3 (stack_pointer_rtx, stack_pointer_rtx, sa)
4470		 : gen_subsi3 (stack_pointer_rtx, stack_pointer_rtx, sa));
4471    }
4472}
4473
4474bool
4475pa_can_use_return_insn (void)
4476{
4477  if (!reload_completed)
4478    return false;
4479
4480  if (frame_pointer_needed)
4481    return false;
4482
4483  if (df_regs_ever_live_p (2))
4484    return false;
4485
4486  if (crtl->profile)
4487    return false;
4488
4489  return pa_compute_frame_size (get_frame_size (), 0) == 0;
4490}
4491
4492rtx
4493hppa_pic_save_rtx (void)
4494{
4495  return get_hard_reg_initial_val (word_mode, PIC_OFFSET_TABLE_REGNUM);
4496}
4497
4498#ifndef NO_DEFERRED_PROFILE_COUNTERS
4499#define NO_DEFERRED_PROFILE_COUNTERS 0
4500#endif
4501
4502
4503/* Vector of funcdef numbers.  */
4504static vec<int> funcdef_nos;
4505
4506/* Output deferred profile counters.  */
4507static void
4508output_deferred_profile_counters (void)
4509{
4510  unsigned int i;
4511  int align, n;
4512
4513  if (funcdef_nos.is_empty ())
4514   return;
4515
4516  switch_to_section (data_section);
4517  align = MIN (BIGGEST_ALIGNMENT, LONG_TYPE_SIZE);
4518  ASM_OUTPUT_ALIGN (asm_out_file, floor_log2 (align / BITS_PER_UNIT));
4519
4520  for (i = 0; funcdef_nos.iterate (i, &n); i++)
4521    {
4522      targetm.asm_out.internal_label (asm_out_file, "LP", n);
4523      assemble_integer (const0_rtx, LONG_TYPE_SIZE / BITS_PER_UNIT, align, 1);
4524    }
4525
4526  funcdef_nos.release ();
4527}
4528
4529void
4530hppa_profile_hook (int label_no)
4531{
4532  /* We use SImode for the address of the function in both 32 and
4533     64-bit code to avoid having to provide DImode versions of the
4534     lcla2 and load_offset_label_address insn patterns.  */
4535  rtx reg = gen_reg_rtx (SImode);
4536  rtx_code_label *label_rtx = gen_label_rtx ();
4537  rtx begin_label_rtx;
4538  rtx_insn *call_insn;
4539  char begin_label_name[16];
4540
4541  ASM_GENERATE_INTERNAL_LABEL (begin_label_name, FUNC_BEGIN_PROLOG_LABEL,
4542			       label_no);
4543  begin_label_rtx = gen_rtx_SYMBOL_REF (SImode, ggc_strdup (begin_label_name));
4544
4545  if (TARGET_64BIT)
4546    emit_move_insn (arg_pointer_rtx,
4547		    gen_rtx_PLUS (word_mode, virtual_outgoing_args_rtx,
4548				  GEN_INT (64)));
4549
4550  emit_move_insn (gen_rtx_REG (word_mode, 26), gen_rtx_REG (word_mode, 2));
4551
4552  /* The address of the function is loaded into %r25 with an instruction-
4553     relative sequence that avoids the use of relocations.  The sequence
4554     is split so that the load_offset_label_address instruction can
4555     occupy the delay slot of the call to _mcount.  */
4556  if (TARGET_PA_20)
4557    emit_insn (gen_lcla2 (reg, label_rtx));
4558  else
4559    emit_insn (gen_lcla1 (reg, label_rtx));
4560
4561  emit_insn (gen_load_offset_label_address (gen_rtx_REG (SImode, 25),
4562					    reg, begin_label_rtx, label_rtx));
4563
4564#if !NO_DEFERRED_PROFILE_COUNTERS
4565  {
4566    rtx count_label_rtx, addr, r24;
4567    char count_label_name[16];
4568
4569    funcdef_nos.safe_push (label_no);
4570    ASM_GENERATE_INTERNAL_LABEL (count_label_name, "LP", label_no);
4571    count_label_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (count_label_name));
4572
4573    addr = force_reg (Pmode, count_label_rtx);
4574    r24 = gen_rtx_REG (Pmode, 24);
4575    emit_move_insn (r24, addr);
4576
4577    call_insn =
4578      emit_call_insn (gen_call (gen_rtx_MEM (Pmode,
4579					     gen_rtx_SYMBOL_REF (Pmode,
4580								 "_mcount")),
4581				GEN_INT (TARGET_64BIT ? 24 : 12)));
4582
4583    use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), r24);
4584  }
4585#else
4586
4587  call_insn =
4588    emit_call_insn (gen_call (gen_rtx_MEM (Pmode,
4589					   gen_rtx_SYMBOL_REF (Pmode,
4590							       "_mcount")),
4591			      GEN_INT (TARGET_64BIT ? 16 : 8)));
4592
4593#endif
4594
4595  use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 25));
4596  use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 26));
4597
4598  /* Indicate the _mcount call cannot throw, nor will it execute a
4599     non-local goto.  */
4600  make_reg_eh_region_note_nothrow_nononlocal (call_insn);
4601}
4602
4603/* Fetch the return address for the frame COUNT steps up from
4604   the current frame, after the prologue.  FRAMEADDR is the
4605   frame pointer of the COUNT frame.
4606
4607   We want to ignore any export stub remnants here.  To handle this,
4608   we examine the code at the return address, and if it is an export
4609   stub, we return a memory rtx for the stub return address stored
4610   at frame-24.
4611
4612   The value returned is used in two different ways:
4613
4614	1. To find a function's caller.
4615
4616	2. To change the return address for a function.
4617
4618   This function handles most instances of case 1; however, it will
4619   fail if there are two levels of stubs to execute on the return
4620   path.  The only way I believe that can happen is if the return value
4621   needs a parameter relocation, which never happens for C code.
4622
4623   This function handles most instances of case 2; however, it will
4624   fail if we did not originally have stub code on the return path
4625   but will need stub code on the new return path.  This can happen if
4626   the caller & callee are both in the main program, but the new
4627   return location is in a shared library.  */
4628
4629rtx
4630pa_return_addr_rtx (int count, rtx frameaddr)
4631{
4632  rtx label;
4633  rtx rp;
4634  rtx saved_rp;
4635  rtx ins;
4636
4637  /* The instruction stream at the return address of a PA1.X export stub is:
4638
4639	0x4bc23fd1 | stub+8:   ldw -18(sr0,sp),rp
4640	0x004010a1 | stub+12:  ldsid (sr0,rp),r1
4641	0x00011820 | stub+16:  mtsp r1,sr0
4642	0xe0400002 | stub+20:  be,n 0(sr0,rp)
4643
4644     0xe0400002 must be specified as -532676606 so that it won't be
4645     rejected as an invalid immediate operand on 64-bit hosts.
4646
4647     The instruction stream at the return address of a PA2.0 export stub is:
4648
4649	0x4bc23fd1 | stub+8:   ldw -18(sr0,sp),rp
4650	0xe840d002 | stub+12:  bve,n (rp)
4651  */
4652
4653  HOST_WIDE_INT insns[4];
4654  int i, len;
4655
4656  if (count != 0)
4657    return NULL_RTX;
4658
4659  rp = get_hard_reg_initial_val (Pmode, 2);
4660
4661  if (TARGET_64BIT || TARGET_NO_SPACE_REGS)
4662    return rp;
4663
4664  /* If there is no export stub then just use the value saved from
4665     the return pointer register.  */
4666
4667  saved_rp = gen_reg_rtx (Pmode);
4668  emit_move_insn (saved_rp, rp);
4669
4670  /* Get pointer to the instruction stream.  We have to mask out the
4671     privilege level from the two low order bits of the return address
4672     pointer here so that ins will point to the start of the first
4673     instruction that would have been executed if we returned.  */
4674  ins = copy_to_reg (gen_rtx_AND (Pmode, rp, MASK_RETURN_ADDR));
4675  label = gen_label_rtx ();
4676
4677  if (TARGET_PA_20)
4678    {
4679      insns[0] = 0x4bc23fd1;
4680      insns[1] = -398405630;
4681      len = 2;
4682    }
4683  else
4684    {
4685      insns[0] = 0x4bc23fd1;
4686      insns[1] = 0x004010a1;
4687      insns[2] = 0x00011820;
4688      insns[3] = -532676606;
4689      len = 4;
4690    }
4691
4692  /* Check the instruction stream at the normal return address for the
4693     export stub.  If it is an export stub, than our return address is
4694     really in -24[frameaddr].  */
4695
4696  for (i = 0; i < len; i++)
4697    {
4698      rtx op0 = gen_rtx_MEM (SImode, plus_constant (Pmode, ins, i * 4));
4699      rtx op1 = GEN_INT (insns[i]);
4700      emit_cmp_and_jump_insns (op0, op1, NE, NULL, SImode, 0, label);
4701    }
4702
4703  /* Here we know that our return address points to an export
4704     stub.  We don't want to return the address of the export stub,
4705     but rather the return address of the export stub.  That return
4706     address is stored at -24[frameaddr].  */
4707
4708  emit_move_insn (saved_rp,
4709		  gen_rtx_MEM (Pmode,
4710			       memory_address (Pmode,
4711					       plus_constant (Pmode, frameaddr,
4712							      -24))));
4713
4714  emit_label (label);
4715
4716  return saved_rp;
4717}
4718
4719void
4720pa_emit_bcond_fp (rtx operands[])
4721{
4722  enum rtx_code code = GET_CODE (operands[0]);
4723  rtx operand0 = operands[1];
4724  rtx operand1 = operands[2];
4725  rtx label = operands[3];
4726
4727  emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_REG (CCFPmode, 0),
4728		          gen_rtx_fmt_ee (code, CCFPmode, operand0, operand1)));
4729
4730  emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
4731			       gen_rtx_IF_THEN_ELSE (VOIDmode,
4732						     gen_rtx_fmt_ee (NE,
4733							      VOIDmode,
4734							      gen_rtx_REG (CCFPmode, 0),
4735							      const0_rtx),
4736						     gen_rtx_LABEL_REF (VOIDmode, label),
4737						     pc_rtx)));
4738
4739}
4740
4741/* Adjust the cost of a scheduling dependency.  Return the new cost of
4742   a dependency LINK or INSN on DEP_INSN.  COST is the current cost.  */
4743
4744static int
4745pa_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn, int cost)
4746{
4747  enum attr_type attr_type;
4748
4749  /* Don't adjust costs for a pa8000 chip, also do not adjust any
4750     true dependencies as they are described with bypasses now.  */
4751  if (pa_cpu >= PROCESSOR_8000 || REG_NOTE_KIND (link) == 0)
4752    return cost;
4753
4754  if (! recog_memoized (insn))
4755    return 0;
4756
4757  attr_type = get_attr_type (insn);
4758
4759  switch (REG_NOTE_KIND (link))
4760    {
4761    case REG_DEP_ANTI:
4762      /* Anti dependency; DEP_INSN reads a register that INSN writes some
4763	 cycles later.  */
4764
4765      if (attr_type == TYPE_FPLOAD)
4766	{
4767	  rtx pat = PATTERN (insn);
4768	  rtx dep_pat = PATTERN (dep_insn);
4769	  if (GET_CODE (pat) == PARALLEL)
4770	    {
4771	      /* This happens for the fldXs,mb patterns.  */
4772	      pat = XVECEXP (pat, 0, 0);
4773	    }
4774	  if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4775	    /* If this happens, we have to extend this to schedule
4776	       optimally.  Return 0 for now.  */
4777	  return 0;
4778
4779	  if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
4780	    {
4781	      if (! recog_memoized (dep_insn))
4782		return 0;
4783	      switch (get_attr_type (dep_insn))
4784		{
4785		case TYPE_FPALU:
4786		case TYPE_FPMULSGL:
4787		case TYPE_FPMULDBL:
4788		case TYPE_FPDIVSGL:
4789		case TYPE_FPDIVDBL:
4790		case TYPE_FPSQRTSGL:
4791		case TYPE_FPSQRTDBL:
4792		  /* A fpload can't be issued until one cycle before a
4793		     preceding arithmetic operation has finished if
4794		     the target of the fpload is any of the sources
4795		     (or destination) of the arithmetic operation.  */
4796		  return insn_default_latency (dep_insn) - 1;
4797
4798		default:
4799		  return 0;
4800		}
4801	    }
4802	}
4803      else if (attr_type == TYPE_FPALU)
4804	{
4805	  rtx pat = PATTERN (insn);
4806	  rtx dep_pat = PATTERN (dep_insn);
4807	  if (GET_CODE (pat) == PARALLEL)
4808	    {
4809	      /* This happens for the fldXs,mb patterns.  */
4810	      pat = XVECEXP (pat, 0, 0);
4811	    }
4812	  if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4813	    /* If this happens, we have to extend this to schedule
4814	       optimally.  Return 0 for now.  */
4815	  return 0;
4816
4817	  if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
4818	    {
4819	      if (! recog_memoized (dep_insn))
4820		return 0;
4821	      switch (get_attr_type (dep_insn))
4822		{
4823		case TYPE_FPDIVSGL:
4824		case TYPE_FPDIVDBL:
4825		case TYPE_FPSQRTSGL:
4826		case TYPE_FPSQRTDBL:
4827		  /* An ALU flop can't be issued until two cycles before a
4828		     preceding divide or sqrt operation has finished if
4829		     the target of the ALU flop is any of the sources
4830		     (or destination) of the divide or sqrt operation.  */
4831		  return insn_default_latency (dep_insn) - 2;
4832
4833		default:
4834		  return 0;
4835		}
4836	    }
4837	}
4838
4839      /* For other anti dependencies, the cost is 0.  */
4840      return 0;
4841
4842    case REG_DEP_OUTPUT:
4843      /* Output dependency; DEP_INSN writes a register that INSN writes some
4844	 cycles later.  */
4845      if (attr_type == TYPE_FPLOAD)
4846	{
4847	  rtx pat = PATTERN (insn);
4848	  rtx dep_pat = PATTERN (dep_insn);
4849	  if (GET_CODE (pat) == PARALLEL)
4850	    {
4851	      /* This happens for the fldXs,mb patterns.  */
4852	      pat = XVECEXP (pat, 0, 0);
4853	    }
4854	  if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4855	    /* If this happens, we have to extend this to schedule
4856	       optimally.  Return 0 for now.  */
4857	  return 0;
4858
4859	  if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
4860	    {
4861	      if (! recog_memoized (dep_insn))
4862		return 0;
4863	      switch (get_attr_type (dep_insn))
4864		{
4865		case TYPE_FPALU:
4866		case TYPE_FPMULSGL:
4867		case TYPE_FPMULDBL:
4868		case TYPE_FPDIVSGL:
4869		case TYPE_FPDIVDBL:
4870		case TYPE_FPSQRTSGL:
4871		case TYPE_FPSQRTDBL:
4872		  /* A fpload can't be issued until one cycle before a
4873		     preceding arithmetic operation has finished if
4874		     the target of the fpload is the destination of the
4875		     arithmetic operation.
4876
4877		     Exception: For PA7100LC, PA7200 and PA7300, the cost
4878		     is 3 cycles, unless they bundle together.   We also
4879		     pay the penalty if the second insn is a fpload.  */
4880		  return insn_default_latency (dep_insn) - 1;
4881
4882		default:
4883		  return 0;
4884		}
4885	    }
4886	}
4887      else if (attr_type == TYPE_FPALU)
4888	{
4889	  rtx pat = PATTERN (insn);
4890	  rtx dep_pat = PATTERN (dep_insn);
4891	  if (GET_CODE (pat) == PARALLEL)
4892	    {
4893	      /* This happens for the fldXs,mb patterns.  */
4894	      pat = XVECEXP (pat, 0, 0);
4895	    }
4896	  if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4897	    /* If this happens, we have to extend this to schedule
4898	       optimally.  Return 0 for now.  */
4899	  return 0;
4900
4901	  if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
4902	    {
4903	      if (! recog_memoized (dep_insn))
4904		return 0;
4905	      switch (get_attr_type (dep_insn))
4906		{
4907		case TYPE_FPDIVSGL:
4908		case TYPE_FPDIVDBL:
4909		case TYPE_FPSQRTSGL:
4910		case TYPE_FPSQRTDBL:
4911		  /* An ALU flop can't be issued until two cycles before a
4912		     preceding divide or sqrt operation has finished if
4913		     the target of the ALU flop is also the target of
4914		     the divide or sqrt operation.  */
4915		  return insn_default_latency (dep_insn) - 2;
4916
4917		default:
4918		  return 0;
4919		}
4920	    }
4921	}
4922
4923      /* For other output dependencies, the cost is 0.  */
4924      return 0;
4925
4926    default:
4927      gcc_unreachable ();
4928    }
4929}
4930
4931/* Adjust scheduling priorities.  We use this to try and keep addil
4932   and the next use of %r1 close together.  */
4933static int
4934pa_adjust_priority (rtx_insn *insn, int priority)
4935{
4936  rtx set = single_set (insn);
4937  rtx src, dest;
4938  if (set)
4939    {
4940      src = SET_SRC (set);
4941      dest = SET_DEST (set);
4942      if (GET_CODE (src) == LO_SUM
4943	  && symbolic_operand (XEXP (src, 1), VOIDmode)
4944	  && ! read_only_operand (XEXP (src, 1), VOIDmode))
4945	priority >>= 3;
4946
4947      else if (GET_CODE (src) == MEM
4948	       && GET_CODE (XEXP (src, 0)) == LO_SUM
4949	       && symbolic_operand (XEXP (XEXP (src, 0), 1), VOIDmode)
4950	       && ! read_only_operand (XEXP (XEXP (src, 0), 1), VOIDmode))
4951	priority >>= 1;
4952
4953      else if (GET_CODE (dest) == MEM
4954	       && GET_CODE (XEXP (dest, 0)) == LO_SUM
4955	       && symbolic_operand (XEXP (XEXP (dest, 0), 1), VOIDmode)
4956	       && ! read_only_operand (XEXP (XEXP (dest, 0), 1), VOIDmode))
4957	priority >>= 3;
4958    }
4959  return priority;
4960}
4961
4962/* The 700 can only issue a single insn at a time.
4963   The 7XXX processors can issue two insns at a time.
4964   The 8000 can issue 4 insns at a time.  */
4965static int
4966pa_issue_rate (void)
4967{
4968  switch (pa_cpu)
4969    {
4970    case PROCESSOR_700:		return 1;
4971    case PROCESSOR_7100:	return 2;
4972    case PROCESSOR_7100LC:	return 2;
4973    case PROCESSOR_7200:	return 2;
4974    case PROCESSOR_7300:	return 2;
4975    case PROCESSOR_8000:	return 4;
4976
4977    default:
4978      gcc_unreachable ();
4979    }
4980}
4981
4982
4983
4984/* Return any length plus adjustment needed by INSN which already has
4985   its length computed as LENGTH.   Return LENGTH if no adjustment is
4986   necessary.
4987
4988   Also compute the length of an inline block move here as it is too
4989   complicated to express as a length attribute in pa.md.  */
4990int
4991pa_adjust_insn_length (rtx_insn *insn, int length)
4992{
4993  rtx pat = PATTERN (insn);
4994
4995  /* If length is negative or undefined, provide initial length.  */
4996  if ((unsigned int) length >= INT_MAX)
4997    {
4998      if (GET_CODE (pat) == SEQUENCE)
4999	insn = as_a <rtx_insn *> (XVECEXP (pat, 0, 0));
5000
5001      switch (get_attr_type (insn))
5002	{
5003	case TYPE_MILLI:
5004	  length = pa_attr_length_millicode_call (insn);
5005	  break;
5006	case TYPE_CALL:
5007	  length = pa_attr_length_call (insn, 0);
5008	  break;
5009	case TYPE_SIBCALL:
5010	  length = pa_attr_length_call (insn, 1);
5011	  break;
5012	case TYPE_DYNCALL:
5013	  length = pa_attr_length_indirect_call (insn);
5014	  break;
5015	case TYPE_SH_FUNC_ADRS:
5016	  length = pa_attr_length_millicode_call (insn) + 20;
5017	  break;
5018	default:
5019	  gcc_unreachable ();
5020	}
5021    }
5022
5023  /* Block move pattern.  */
5024  if (NONJUMP_INSN_P (insn)
5025      && GET_CODE (pat) == PARALLEL
5026      && GET_CODE (XVECEXP (pat, 0, 0)) == SET
5027      && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
5028      && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 1)) == MEM
5029      && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode
5030      && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 1)) == BLKmode)
5031    length += compute_movmem_length (insn) - 4;
5032  /* Block clear pattern.  */
5033  else if (NONJUMP_INSN_P (insn)
5034	   && GET_CODE (pat) == PARALLEL
5035	   && GET_CODE (XVECEXP (pat, 0, 0)) == SET
5036	   && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
5037	   && XEXP (XVECEXP (pat, 0, 0), 1) == const0_rtx
5038	   && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode)
5039    length += compute_clrmem_length (insn) - 4;
5040  /* Conditional branch with an unfilled delay slot.  */
5041  else if (JUMP_P (insn) && ! simplejump_p (insn))
5042    {
5043      /* Adjust a short backwards conditional with an unfilled delay slot.  */
5044      if (GET_CODE (pat) == SET
5045	  && length == 4
5046	  && JUMP_LABEL (insn) != NULL_RTX
5047	  && ! forward_branch_p (insn))
5048	length += 4;
5049      else if (GET_CODE (pat) == PARALLEL
5050	       && get_attr_type (insn) == TYPE_PARALLEL_BRANCH
5051	       && length == 4)
5052	length += 4;
5053      /* Adjust dbra insn with short backwards conditional branch with
5054	 unfilled delay slot -- only for case where counter is in a
5055	 general register register.  */
5056      else if (GET_CODE (pat) == PARALLEL
5057	       && GET_CODE (XVECEXP (pat, 0, 1)) == SET
5058	       && GET_CODE (XEXP (XVECEXP (pat, 0, 1), 0)) == REG
5059 	       && ! FP_REG_P (XEXP (XVECEXP (pat, 0, 1), 0))
5060	       && length == 4
5061	       && ! forward_branch_p (insn))
5062	length += 4;
5063    }
5064  return length;
5065}
5066
5067/* Implement the TARGET_PRINT_OPERAND_PUNCT_VALID_P hook.  */
5068
5069static bool
5070pa_print_operand_punct_valid_p (unsigned char code)
5071{
5072  if (code == '@'
5073      || code == '#'
5074      || code == '*'
5075      || code == '^')
5076    return true;
5077
5078  return false;
5079}
5080
5081/* Print operand X (an rtx) in assembler syntax to file FILE.
5082   CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
5083   For `%' followed by punctuation, CODE is the punctuation and X is null.  */
5084
5085void
5086pa_print_operand (FILE *file, rtx x, int code)
5087{
5088  switch (code)
5089    {
5090    case '#':
5091      /* Output a 'nop' if there's nothing for the delay slot.  */
5092      if (dbr_sequence_length () == 0)
5093	fputs ("\n\tnop", file);
5094      return;
5095    case '*':
5096      /* Output a nullification completer if there's nothing for the */
5097      /* delay slot or nullification is requested.  */
5098      if (dbr_sequence_length () == 0 ||
5099	  (final_sequence &&
5100	   INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))))
5101        fputs (",n", file);
5102      return;
5103    case 'R':
5104      /* Print out the second register name of a register pair.
5105	 I.e., R (6) => 7.  */
5106      fputs (reg_names[REGNO (x) + 1], file);
5107      return;
5108    case 'r':
5109      /* A register or zero.  */
5110      if (x == const0_rtx
5111	  || (x == CONST0_RTX (DFmode))
5112	  || (x == CONST0_RTX (SFmode)))
5113	{
5114	  fputs ("%r0", file);
5115	  return;
5116	}
5117      else
5118	break;
5119    case 'f':
5120      /* A register or zero (floating point).  */
5121      if (x == const0_rtx
5122	  || (x == CONST0_RTX (DFmode))
5123	  || (x == CONST0_RTX (SFmode)))
5124	{
5125	  fputs ("%fr0", file);
5126	  return;
5127	}
5128      else
5129	break;
5130    case 'A':
5131      {
5132	rtx xoperands[2];
5133
5134	xoperands[0] = XEXP (XEXP (x, 0), 0);
5135	xoperands[1] = XVECEXP (XEXP (XEXP (x, 0), 1), 0, 0);
5136	pa_output_global_address (file, xoperands[1], 0);
5137        fprintf (file, "(%s)", reg_names [REGNO (xoperands[0])]);
5138	return;
5139      }
5140
5141    case 'C':			/* Plain (C)ondition */
5142    case 'X':
5143      switch (GET_CODE (x))
5144	{
5145	case EQ:
5146	  fputs ("=", file);  break;
5147	case NE:
5148	  fputs ("<>", file);  break;
5149	case GT:
5150	  fputs (">", file);  break;
5151	case GE:
5152	  fputs (">=", file);  break;
5153	case GEU:
5154	  fputs (">>=", file);  break;
5155	case GTU:
5156	  fputs (">>", file);  break;
5157	case LT:
5158	  fputs ("<", file);  break;
5159	case LE:
5160	  fputs ("<=", file);  break;
5161	case LEU:
5162	  fputs ("<<=", file);  break;
5163	case LTU:
5164	  fputs ("<<", file);  break;
5165	default:
5166	  gcc_unreachable ();
5167	}
5168      return;
5169    case 'N':			/* Condition, (N)egated */
5170      switch (GET_CODE (x))
5171	{
5172	case EQ:
5173	  fputs ("<>", file);  break;
5174	case NE:
5175	  fputs ("=", file);  break;
5176	case GT:
5177	  fputs ("<=", file);  break;
5178	case GE:
5179	  fputs ("<", file);  break;
5180	case GEU:
5181	  fputs ("<<", file);  break;
5182	case GTU:
5183	  fputs ("<<=", file);  break;
5184	case LT:
5185	  fputs (">=", file);  break;
5186	case LE:
5187	  fputs (">", file);  break;
5188	case LEU:
5189	  fputs (">>", file);  break;
5190	case LTU:
5191	  fputs (">>=", file);  break;
5192	default:
5193	  gcc_unreachable ();
5194	}
5195      return;
5196    /* For floating point comparisons.  Note that the output
5197       predicates are the complement of the desired mode.  The
5198       conditions for GT, GE, LT, LE and LTGT cause an invalid
5199       operation exception if the result is unordered and this
5200       exception is enabled in the floating-point status register.  */
5201    case 'Y':
5202      switch (GET_CODE (x))
5203	{
5204	case EQ:
5205	  fputs ("!=", file);  break;
5206	case NE:
5207	  fputs ("=", file);  break;
5208	case GT:
5209	  fputs ("!>", file);  break;
5210	case GE:
5211	  fputs ("!>=", file);  break;
5212	case LT:
5213	  fputs ("!<", file);  break;
5214	case LE:
5215	  fputs ("!<=", file);  break;
5216	case LTGT:
5217	  fputs ("!<>", file);  break;
5218	case UNLE:
5219	  fputs ("!?<=", file);  break;
5220	case UNLT:
5221	  fputs ("!?<", file);  break;
5222	case UNGE:
5223	  fputs ("!?>=", file);  break;
5224	case UNGT:
5225	  fputs ("!?>", file);  break;
5226	case UNEQ:
5227	  fputs ("!?=", file);  break;
5228	case UNORDERED:
5229	  fputs ("!?", file);  break;
5230	case ORDERED:
5231	  fputs ("?", file);  break;
5232	default:
5233	  gcc_unreachable ();
5234	}
5235      return;
5236    case 'S':			/* Condition, operands are (S)wapped.  */
5237      switch (GET_CODE (x))
5238	{
5239	case EQ:
5240	  fputs ("=", file);  break;
5241	case NE:
5242	  fputs ("<>", file);  break;
5243	case GT:
5244	  fputs ("<", file);  break;
5245	case GE:
5246	  fputs ("<=", file);  break;
5247	case GEU:
5248	  fputs ("<<=", file);  break;
5249	case GTU:
5250	  fputs ("<<", file);  break;
5251	case LT:
5252	  fputs (">", file);  break;
5253	case LE:
5254	  fputs (">=", file);  break;
5255	case LEU:
5256	  fputs (">>=", file);  break;
5257	case LTU:
5258	  fputs (">>", file);  break;
5259	default:
5260	  gcc_unreachable ();
5261	}
5262      return;
5263    case 'B':			/* Condition, (B)oth swapped and negate.  */
5264      switch (GET_CODE (x))
5265	{
5266	case EQ:
5267	  fputs ("<>", file);  break;
5268	case NE:
5269	  fputs ("=", file);  break;
5270	case GT:
5271	  fputs (">=", file);  break;
5272	case GE:
5273	  fputs (">", file);  break;
5274	case GEU:
5275	  fputs (">>", file);  break;
5276	case GTU:
5277	  fputs (">>=", file);  break;
5278	case LT:
5279	  fputs ("<=", file);  break;
5280	case LE:
5281	  fputs ("<", file);  break;
5282	case LEU:
5283	  fputs ("<<", file);  break;
5284	case LTU:
5285	  fputs ("<<=", file);  break;
5286	default:
5287	  gcc_unreachable ();
5288	}
5289      return;
5290    case 'k':
5291      gcc_assert (GET_CODE (x) == CONST_INT);
5292      fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~INTVAL (x));
5293      return;
5294    case 'Q':
5295      gcc_assert (GET_CODE (x) == CONST_INT);
5296      fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - (INTVAL (x) & 63));
5297      return;
5298    case 'L':
5299      gcc_assert (GET_CODE (x) == CONST_INT);
5300      fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - (INTVAL (x) & 31));
5301      return;
5302    case 'O':
5303      gcc_assert (GET_CODE (x) == CONST_INT && exact_log2 (INTVAL (x)) >= 0);
5304      fprintf (file, "%d", exact_log2 (INTVAL (x)));
5305      return;
5306    case 'p':
5307      gcc_assert (GET_CODE (x) == CONST_INT);
5308      fprintf (file, HOST_WIDE_INT_PRINT_DEC, 63 - (INTVAL (x) & 63));
5309      return;
5310    case 'P':
5311      gcc_assert (GET_CODE (x) == CONST_INT);
5312      fprintf (file, HOST_WIDE_INT_PRINT_DEC, 31 - (INTVAL (x) & 31));
5313      return;
5314    case 'I':
5315      if (GET_CODE (x) == CONST_INT)
5316	fputs ("i", file);
5317      return;
5318    case 'M':
5319    case 'F':
5320      switch (GET_CODE (XEXP (x, 0)))
5321	{
5322	case PRE_DEC:
5323	case PRE_INC:
5324	  if (ASSEMBLER_DIALECT == 0)
5325	    fputs ("s,mb", file);
5326	  else
5327	    fputs (",mb", file);
5328	  break;
5329	case POST_DEC:
5330	case POST_INC:
5331	  if (ASSEMBLER_DIALECT == 0)
5332	    fputs ("s,ma", file);
5333	  else
5334	    fputs (",ma", file);
5335	  break;
5336	case PLUS:
5337	  if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5338	      && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
5339	    {
5340	      if (ASSEMBLER_DIALECT == 0)
5341		fputs ("x", file);
5342	    }
5343	  else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5344		   || GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
5345	    {
5346	      if (ASSEMBLER_DIALECT == 0)
5347		fputs ("x,s", file);
5348	      else
5349		fputs (",s", file);
5350	    }
5351	  else if (code == 'F' && ASSEMBLER_DIALECT == 0)
5352	    fputs ("s", file);
5353	  break;
5354	default:
5355	  if (code == 'F' && ASSEMBLER_DIALECT == 0)
5356	    fputs ("s", file);
5357	  break;
5358	}
5359      return;
5360    case 'G':
5361      pa_output_global_address (file, x, 0);
5362      return;
5363    case 'H':
5364      pa_output_global_address (file, x, 1);
5365      return;
5366    case 0:			/* Don't do anything special */
5367      break;
5368    case 'Z':
5369      {
5370	unsigned op[3];
5371	compute_zdepwi_operands (INTVAL (x), op);
5372	fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
5373	return;
5374      }
5375    case 'z':
5376      {
5377	unsigned op[3];
5378	compute_zdepdi_operands (INTVAL (x), op);
5379	fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
5380	return;
5381      }
5382    case 'c':
5383      /* We can get here from a .vtable_inherit due to our
5384	 CONSTANT_ADDRESS_P rejecting perfectly good constant
5385	 addresses.  */
5386      break;
5387    default:
5388      gcc_unreachable ();
5389    }
5390  if (GET_CODE (x) == REG)
5391    {
5392      fputs (reg_names [REGNO (x)], file);
5393      if (TARGET_64BIT && FP_REG_P (x) && GET_MODE_SIZE (GET_MODE (x)) <= 4)
5394	{
5395	  fputs ("R", file);
5396	  return;
5397	}
5398      if (FP_REG_P (x)
5399	  && GET_MODE_SIZE (GET_MODE (x)) <= 4
5400	  && (REGNO (x) & 1) == 0)
5401	fputs ("L", file);
5402    }
5403  else if (GET_CODE (x) == MEM)
5404    {
5405      int size = GET_MODE_SIZE (GET_MODE (x));
5406      rtx base = NULL_RTX;
5407      switch (GET_CODE (XEXP (x, 0)))
5408	{
5409	case PRE_DEC:
5410	case POST_DEC:
5411          base = XEXP (XEXP (x, 0), 0);
5412	  fprintf (file, "-%d(%s)", size, reg_names [REGNO (base)]);
5413	  break;
5414	case PRE_INC:
5415	case POST_INC:
5416          base = XEXP (XEXP (x, 0), 0);
5417	  fprintf (file, "%d(%s)", size, reg_names [REGNO (base)]);
5418	  break;
5419	case PLUS:
5420	  if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT)
5421	    fprintf (file, "%s(%s)",
5422		     reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 0), 0))],
5423		     reg_names [REGNO (XEXP (XEXP (x, 0), 1))]);
5424	  else if (GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
5425	    fprintf (file, "%s(%s)",
5426		     reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 1), 0))],
5427		     reg_names [REGNO (XEXP (XEXP (x, 0), 0))]);
5428	  else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5429		   && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
5430	    {
5431	      /* Because the REG_POINTER flag can get lost during reload,
5432		 pa_legitimate_address_p canonicalizes the order of the
5433		 index and base registers in the combined move patterns.  */
5434	      rtx base = XEXP (XEXP (x, 0), 1);
5435	      rtx index = XEXP (XEXP (x, 0), 0);
5436
5437	      fprintf (file, "%s(%s)",
5438		       reg_names [REGNO (index)], reg_names [REGNO (base)]);
5439	    }
5440	  else
5441	    output_address (XEXP (x, 0));
5442	  break;
5443	default:
5444	  output_address (XEXP (x, 0));
5445	  break;
5446	}
5447    }
5448  else
5449    output_addr_const (file, x);
5450}
5451
5452/* output a SYMBOL_REF or a CONST expression involving a SYMBOL_REF.  */
5453
5454void
5455pa_output_global_address (FILE *file, rtx x, int round_constant)
5456{
5457
5458  /* Imagine  (high (const (plus ...))).  */
5459  if (GET_CODE (x) == HIGH)
5460    x = XEXP (x, 0);
5461
5462  if (GET_CODE (x) == SYMBOL_REF && read_only_operand (x, VOIDmode))
5463    output_addr_const (file, x);
5464  else if (GET_CODE (x) == SYMBOL_REF && !flag_pic)
5465    {
5466      output_addr_const (file, x);
5467      fputs ("-$global$", file);
5468    }
5469  else if (GET_CODE (x) == CONST)
5470    {
5471      const char *sep = "";
5472      int offset = 0;		/* assembler wants -$global$ at end */
5473      rtx base = NULL_RTX;
5474
5475      switch (GET_CODE (XEXP (XEXP (x, 0), 0)))
5476	{
5477	case LABEL_REF:
5478	case SYMBOL_REF:
5479	  base = XEXP (XEXP (x, 0), 0);
5480	  output_addr_const (file, base);
5481	  break;
5482	case CONST_INT:
5483	  offset = INTVAL (XEXP (XEXP (x, 0), 0));
5484	  break;
5485	default:
5486	  gcc_unreachable ();
5487	}
5488
5489      switch (GET_CODE (XEXP (XEXP (x, 0), 1)))
5490	{
5491	case LABEL_REF:
5492	case SYMBOL_REF:
5493	  base = XEXP (XEXP (x, 0), 1);
5494	  output_addr_const (file, base);
5495	  break;
5496	case CONST_INT:
5497	  offset = INTVAL (XEXP (XEXP (x, 0), 1));
5498	  break;
5499	default:
5500	  gcc_unreachable ();
5501	}
5502
5503      /* How bogus.  The compiler is apparently responsible for
5504	 rounding the constant if it uses an LR field selector.
5505
5506	 The linker and/or assembler seem a better place since
5507	 they have to do this kind of thing already.
5508
5509	 If we fail to do this, HP's optimizing linker may eliminate
5510	 an addil, but not update the ldw/stw/ldo instruction that
5511	 uses the result of the addil.  */
5512      if (round_constant)
5513	offset = ((offset + 0x1000) & ~0x1fff);
5514
5515      switch (GET_CODE (XEXP (x, 0)))
5516	{
5517	case PLUS:
5518	  if (offset < 0)
5519	    {
5520	      offset = -offset;
5521	      sep = "-";
5522	    }
5523	  else
5524	    sep = "+";
5525	  break;
5526
5527	case MINUS:
5528	  gcc_assert (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF);
5529	  sep = "-";
5530	  break;
5531
5532	default:
5533	  gcc_unreachable ();
5534	}
5535
5536      if (!read_only_operand (base, VOIDmode) && !flag_pic)
5537	fputs ("-$global$", file);
5538      if (offset)
5539	fprintf (file, "%s%d", sep, offset);
5540    }
5541  else
5542    output_addr_const (file, x);
5543}
5544
5545/* Output boilerplate text to appear at the beginning of the file.
5546   There are several possible versions.  */
5547#define aputs(x) fputs(x, asm_out_file)
5548static inline void
5549pa_file_start_level (void)
5550{
5551  if (TARGET_64BIT)
5552    aputs ("\t.LEVEL 2.0w\n");
5553  else if (TARGET_PA_20)
5554    aputs ("\t.LEVEL 2.0\n");
5555  else if (TARGET_PA_11)
5556    aputs ("\t.LEVEL 1.1\n");
5557  else
5558    aputs ("\t.LEVEL 1.0\n");
5559}
5560
5561static inline void
5562pa_file_start_space (int sortspace)
5563{
5564  aputs ("\t.SPACE $PRIVATE$");
5565  if (sortspace)
5566    aputs (",SORT=16");
5567  aputs ("\n\t.SUBSPA $DATA$,QUAD=1,ALIGN=8,ACCESS=31");
5568  if (flag_tm)
5569    aputs ("\n\t.SUBSPA $TM_CLONE_TABLE$,QUAD=1,ALIGN=8,ACCESS=31");
5570  aputs ("\n\t.SUBSPA $BSS$,QUAD=1,ALIGN=8,ACCESS=31,ZERO,SORT=82"
5571	 "\n\t.SPACE $TEXT$");
5572  if (sortspace)
5573    aputs (",SORT=8");
5574  aputs ("\n\t.SUBSPA $LIT$,QUAD=0,ALIGN=8,ACCESS=44"
5575	 "\n\t.SUBSPA $CODE$,QUAD=0,ALIGN=8,ACCESS=44,CODE_ONLY\n");
5576}
5577
5578static inline void
5579pa_file_start_file (int want_version)
5580{
5581  if (write_symbols != NO_DEBUG)
5582    {
5583      output_file_directive (asm_out_file, main_input_filename);
5584      if (want_version)
5585	aputs ("\t.version\t\"01.01\"\n");
5586    }
5587}
5588
5589static inline void
5590pa_file_start_mcount (const char *aswhat)
5591{
5592  if (profile_flag)
5593    fprintf (asm_out_file, "\t.IMPORT _mcount,%s\n", aswhat);
5594}
5595
5596static void
5597pa_elf_file_start (void)
5598{
5599  pa_file_start_level ();
5600  pa_file_start_mcount ("ENTRY");
5601  pa_file_start_file (0);
5602}
5603
5604static void
5605pa_som_file_start (void)
5606{
5607  pa_file_start_level ();
5608  pa_file_start_space (0);
5609  aputs ("\t.IMPORT $global$,DATA\n"
5610         "\t.IMPORT $$dyncall,MILLICODE\n");
5611  pa_file_start_mcount ("CODE");
5612  pa_file_start_file (0);
5613}
5614
5615static void
5616pa_linux_file_start (void)
5617{
5618  pa_file_start_file (1);
5619  pa_file_start_level ();
5620  pa_file_start_mcount ("CODE");
5621}
5622
5623static void
5624pa_hpux64_gas_file_start (void)
5625{
5626  pa_file_start_level ();
5627#ifdef ASM_OUTPUT_TYPE_DIRECTIVE
5628  if (profile_flag)
5629    ASM_OUTPUT_TYPE_DIRECTIVE (asm_out_file, "_mcount", "function");
5630#endif
5631  pa_file_start_file (1);
5632}
5633
5634static void
5635pa_hpux64_hpas_file_start (void)
5636{
5637  pa_file_start_level ();
5638  pa_file_start_space (1);
5639  pa_file_start_mcount ("CODE");
5640  pa_file_start_file (0);
5641}
5642#undef aputs
5643
5644/* Search the deferred plabel list for SYMBOL and return its internal
5645   label.  If an entry for SYMBOL is not found, a new entry is created.  */
5646
5647rtx
5648pa_get_deferred_plabel (rtx symbol)
5649{
5650  const char *fname = XSTR (symbol, 0);
5651  size_t i;
5652
5653  /* See if we have already put this function on the list of deferred
5654     plabels.  This list is generally small, so a liner search is not
5655     too ugly.  If it proves too slow replace it with something faster.  */
5656  for (i = 0; i < n_deferred_plabels; i++)
5657    if (strcmp (fname, XSTR (deferred_plabels[i].symbol, 0)) == 0)
5658      break;
5659
5660  /* If the deferred plabel list is empty, or this entry was not found
5661     on the list, create a new entry on the list.  */
5662  if (deferred_plabels == NULL || i == n_deferred_plabels)
5663    {
5664      tree id;
5665
5666      if (deferred_plabels == 0)
5667	deferred_plabels =  ggc_alloc<deferred_plabel> ();
5668      else
5669        deferred_plabels = GGC_RESIZEVEC (struct deferred_plabel,
5670                                          deferred_plabels,
5671                                          n_deferred_plabels + 1);
5672
5673      i = n_deferred_plabels++;
5674      deferred_plabels[i].internal_label = gen_label_rtx ();
5675      deferred_plabels[i].symbol = symbol;
5676
5677      /* Gross.  We have just implicitly taken the address of this
5678	 function.  Mark it in the same manner as assemble_name.  */
5679      id = maybe_get_identifier (targetm.strip_name_encoding (fname));
5680      if (id)
5681	mark_referenced (id);
5682    }
5683
5684  return deferred_plabels[i].internal_label;
5685}
5686
5687static void
5688output_deferred_plabels (void)
5689{
5690  size_t i;
5691
5692  /* If we have some deferred plabels, then we need to switch into the
5693     data or readonly data section, and align it to a 4 byte boundary
5694     before outputting the deferred plabels.  */
5695  if (n_deferred_plabels)
5696    {
5697      switch_to_section (flag_pic ? data_section : readonly_data_section);
5698      ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
5699    }
5700
5701  /* Now output the deferred plabels.  */
5702  for (i = 0; i < n_deferred_plabels; i++)
5703    {
5704      targetm.asm_out.internal_label (asm_out_file, "L",
5705		 CODE_LABEL_NUMBER (deferred_plabels[i].internal_label));
5706      assemble_integer (deferred_plabels[i].symbol,
5707			TARGET_64BIT ? 8 : 4, TARGET_64BIT ? 64 : 32, 1);
5708    }
5709}
5710
5711/* Initialize optabs to point to emulation routines.  */
5712
5713static void
5714pa_init_libfuncs (void)
5715{
5716  if (HPUX_LONG_DOUBLE_LIBRARY)
5717    {
5718      set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
5719      set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
5720      set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
5721      set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
5722      set_optab_libfunc (smin_optab, TFmode, "_U_Qmin");
5723      set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
5724      set_optab_libfunc (sqrt_optab, TFmode, "_U_Qfsqrt");
5725      set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
5726      set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
5727
5728      set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq");
5729      set_optab_libfunc (ne_optab, TFmode, "_U_Qfne");
5730      set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt");
5731      set_optab_libfunc (ge_optab, TFmode, "_U_Qfge");
5732      set_optab_libfunc (lt_optab, TFmode, "_U_Qflt");
5733      set_optab_libfunc (le_optab, TFmode, "_U_Qfle");
5734      set_optab_libfunc (unord_optab, TFmode, "_U_Qfunord");
5735
5736      set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
5737      set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
5738      set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
5739      set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
5740
5741      set_conv_libfunc (sfix_optab, SImode, TFmode,
5742			TARGET_64BIT ? "__U_Qfcnvfxt_quad_to_sgl"
5743				     : "_U_Qfcnvfxt_quad_to_sgl");
5744      set_conv_libfunc (sfix_optab, DImode, TFmode,
5745			"_U_Qfcnvfxt_quad_to_dbl");
5746      set_conv_libfunc (ufix_optab, SImode, TFmode,
5747			"_U_Qfcnvfxt_quad_to_usgl");
5748      set_conv_libfunc (ufix_optab, DImode, TFmode,
5749			"_U_Qfcnvfxt_quad_to_udbl");
5750
5751      set_conv_libfunc (sfloat_optab, TFmode, SImode,
5752			"_U_Qfcnvxf_sgl_to_quad");
5753      set_conv_libfunc (sfloat_optab, TFmode, DImode,
5754			"_U_Qfcnvxf_dbl_to_quad");
5755      set_conv_libfunc (ufloat_optab, TFmode, SImode,
5756			"_U_Qfcnvxf_usgl_to_quad");
5757      set_conv_libfunc (ufloat_optab, TFmode, DImode,
5758			"_U_Qfcnvxf_udbl_to_quad");
5759    }
5760
5761  if (TARGET_SYNC_LIBCALL)
5762    init_sync_libfuncs (8);
5763}
5764
5765/* HP's millicode routines mean something special to the assembler.
5766   Keep track of which ones we have used.  */
5767
5768enum millicodes { remI, remU, divI, divU, mulI, end1000 };
5769static void import_milli (enum millicodes);
5770static char imported[(int) end1000];
5771static const char * const milli_names[] = {"remI", "remU", "divI", "divU", "mulI"};
5772static const char import_string[] = ".IMPORT $$....,MILLICODE";
5773#define MILLI_START 10
5774
5775static void
5776import_milli (enum millicodes code)
5777{
5778  char str[sizeof (import_string)];
5779
5780  if (!imported[(int) code])
5781    {
5782      imported[(int) code] = 1;
5783      strcpy (str, import_string);
5784      strncpy (str + MILLI_START, milli_names[(int) code], 4);
5785      output_asm_insn (str, 0);
5786    }
5787}
5788
5789/* The register constraints have put the operands and return value in
5790   the proper registers.  */
5791
5792const char *
5793pa_output_mul_insn (int unsignedp ATTRIBUTE_UNUSED, rtx_insn *insn)
5794{
5795  import_milli (mulI);
5796  return pa_output_millicode_call (insn, gen_rtx_SYMBOL_REF (Pmode, "$$mulI"));
5797}
5798
5799/* Emit the rtl for doing a division by a constant.  */
5800
5801/* Do magic division millicodes exist for this value? */
5802const int pa_magic_milli[]= {0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1};
5803
5804/* We'll use an array to keep track of the magic millicodes and
5805   whether or not we've used them already. [n][0] is signed, [n][1] is
5806   unsigned.  */
5807
5808static int div_milli[16][2];
5809
5810int
5811pa_emit_hpdiv_const (rtx *operands, int unsignedp)
5812{
5813  if (GET_CODE (operands[2]) == CONST_INT
5814      && INTVAL (operands[2]) > 0
5815      && INTVAL (operands[2]) < 16
5816      && pa_magic_milli[INTVAL (operands[2])])
5817    {
5818      rtx ret = gen_rtx_REG (SImode, TARGET_64BIT ? 2 : 31);
5819
5820      emit_move_insn (gen_rtx_REG (SImode, 26), operands[1]);
5821      emit
5822	(gen_rtx_PARALLEL
5823	 (VOIDmode,
5824	  gen_rtvec (6, gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, 29),
5825				     gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
5826						     SImode,
5827						     gen_rtx_REG (SImode, 26),
5828						     operands[2])),
5829		     gen_rtx_CLOBBER (VOIDmode, operands[4]),
5830		     gen_rtx_CLOBBER (VOIDmode, operands[3]),
5831		     gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 26)),
5832		     gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 25)),
5833		     gen_rtx_CLOBBER (VOIDmode, ret))));
5834      emit_move_insn (operands[0], gen_rtx_REG (SImode, 29));
5835      return 1;
5836    }
5837  return 0;
5838}
5839
5840const char *
5841pa_output_div_insn (rtx *operands, int unsignedp, rtx_insn *insn)
5842{
5843  int divisor;
5844
5845  /* If the divisor is a constant, try to use one of the special
5846     opcodes .*/
5847  if (GET_CODE (operands[0]) == CONST_INT)
5848    {
5849      static char buf[100];
5850      divisor = INTVAL (operands[0]);
5851      if (!div_milli[divisor][unsignedp])
5852	{
5853	  div_milli[divisor][unsignedp] = 1;
5854	  if (unsignedp)
5855	    output_asm_insn (".IMPORT $$divU_%0,MILLICODE", operands);
5856	  else
5857	    output_asm_insn (".IMPORT $$divI_%0,MILLICODE", operands);
5858	}
5859      if (unsignedp)
5860	{
5861	  sprintf (buf, "$$divU_" HOST_WIDE_INT_PRINT_DEC,
5862		   INTVAL (operands[0]));
5863	  return pa_output_millicode_call (insn,
5864					   gen_rtx_SYMBOL_REF (SImode, buf));
5865	}
5866      else
5867	{
5868	  sprintf (buf, "$$divI_" HOST_WIDE_INT_PRINT_DEC,
5869		   INTVAL (operands[0]));
5870	  return pa_output_millicode_call (insn,
5871					   gen_rtx_SYMBOL_REF (SImode, buf));
5872	}
5873    }
5874  /* Divisor isn't a special constant.  */
5875  else
5876    {
5877      if (unsignedp)
5878	{
5879	  import_milli (divU);
5880	  return pa_output_millicode_call (insn,
5881					gen_rtx_SYMBOL_REF (SImode, "$$divU"));
5882	}
5883      else
5884	{
5885	  import_milli (divI);
5886	  return pa_output_millicode_call (insn,
5887					gen_rtx_SYMBOL_REF (SImode, "$$divI"));
5888	}
5889    }
5890}
5891
5892/* Output a $$rem millicode to do mod.  */
5893
5894const char *
5895pa_output_mod_insn (int unsignedp, rtx_insn *insn)
5896{
5897  if (unsignedp)
5898    {
5899      import_milli (remU);
5900      return pa_output_millicode_call (insn,
5901				       gen_rtx_SYMBOL_REF (SImode, "$$remU"));
5902    }
5903  else
5904    {
5905      import_milli (remI);
5906      return pa_output_millicode_call (insn,
5907				       gen_rtx_SYMBOL_REF (SImode, "$$remI"));
5908    }
5909}
5910
5911void
5912pa_output_arg_descriptor (rtx_insn *call_insn)
5913{
5914  const char *arg_regs[4];
5915  machine_mode arg_mode;
5916  rtx link;
5917  int i, output_flag = 0;
5918  int regno;
5919
5920  /* We neither need nor want argument location descriptors for the
5921     64bit runtime environment or the ELF32 environment.  */
5922  if (TARGET_64BIT || TARGET_ELF32)
5923    return;
5924
5925  for (i = 0; i < 4; i++)
5926    arg_regs[i] = 0;
5927
5928  /* Specify explicitly that no argument relocations should take place
5929     if using the portable runtime calling conventions.  */
5930  if (TARGET_PORTABLE_RUNTIME)
5931    {
5932      fputs ("\t.CALL ARGW0=NO,ARGW1=NO,ARGW2=NO,ARGW3=NO,RETVAL=NO\n",
5933	     asm_out_file);
5934      return;
5935    }
5936
5937  gcc_assert (CALL_P (call_insn));
5938  for (link = CALL_INSN_FUNCTION_USAGE (call_insn);
5939       link; link = XEXP (link, 1))
5940    {
5941      rtx use = XEXP (link, 0);
5942
5943      if (! (GET_CODE (use) == USE
5944	     && GET_CODE (XEXP (use, 0)) == REG
5945	     && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
5946	continue;
5947
5948      arg_mode = GET_MODE (XEXP (use, 0));
5949      regno = REGNO (XEXP (use, 0));
5950      if (regno >= 23 && regno <= 26)
5951	{
5952	  arg_regs[26 - regno] = "GR";
5953	  if (arg_mode == DImode)
5954	    arg_regs[25 - regno] = "GR";
5955	}
5956      else if (regno >= 32 && regno <= 39)
5957	{
5958	  if (arg_mode == SFmode)
5959	    arg_regs[(regno - 32) / 2] = "FR";
5960	  else
5961	    {
5962#ifndef HP_FP_ARG_DESCRIPTOR_REVERSED
5963	      arg_regs[(regno - 34) / 2] = "FR";
5964	      arg_regs[(regno - 34) / 2 + 1] = "FU";
5965#else
5966	      arg_regs[(regno - 34) / 2] = "FU";
5967	      arg_regs[(regno - 34) / 2 + 1] = "FR";
5968#endif
5969	    }
5970	}
5971    }
5972  fputs ("\t.CALL ", asm_out_file);
5973  for (i = 0; i < 4; i++)
5974    {
5975      if (arg_regs[i])
5976	{
5977	  if (output_flag++)
5978	    fputc (',', asm_out_file);
5979	  fprintf (asm_out_file, "ARGW%d=%s", i, arg_regs[i]);
5980	}
5981    }
5982  fputc ('\n', asm_out_file);
5983}
5984
5985/* Inform reload about cases where moving X with a mode MODE to or from
5986   a register in RCLASS requires an extra scratch or immediate register.
5987   Return the class needed for the immediate register.  */
5988
5989static reg_class_t
5990pa_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
5991		     machine_mode mode, secondary_reload_info *sri)
5992{
5993  int regno;
5994  enum reg_class rclass = (enum reg_class) rclass_i;
5995
5996  /* Handle the easy stuff first.  */
5997  if (rclass == R1_REGS)
5998    return NO_REGS;
5999
6000  if (REG_P (x))
6001    {
6002      regno = REGNO (x);
6003      if (rclass == BASE_REG_CLASS && regno < FIRST_PSEUDO_REGISTER)
6004	return NO_REGS;
6005    }
6006  else
6007    regno = -1;
6008
6009  /* If we have something like (mem (mem (...)), we can safely assume the
6010     inner MEM will end up in a general register after reloading, so there's
6011     no need for a secondary reload.  */
6012  if (GET_CODE (x) == MEM && GET_CODE (XEXP (x, 0)) == MEM)
6013    return NO_REGS;
6014
6015  /* Trying to load a constant into a FP register during PIC code
6016     generation requires %r1 as a scratch register.  For float modes,
6017     the only legitimate constant is CONST0_RTX.  However, there are
6018     a few patterns that accept constant double operands.  */
6019  if (flag_pic
6020      && FP_REG_CLASS_P (rclass)
6021      && (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE))
6022    {
6023      switch (mode)
6024	{
6025	case SImode:
6026	  sri->icode = CODE_FOR_reload_insi_r1;
6027	  break;
6028
6029	case DImode:
6030	  sri->icode = CODE_FOR_reload_indi_r1;
6031	  break;
6032
6033	case SFmode:
6034	  sri->icode = CODE_FOR_reload_insf_r1;
6035	  break;
6036
6037	case DFmode:
6038	  sri->icode = CODE_FOR_reload_indf_r1;
6039	  break;
6040
6041	default:
6042	  gcc_unreachable ();
6043	}
6044      return NO_REGS;
6045    }
6046
6047  /* Secondary reloads of symbolic expressions require %r1 as a scratch
6048     register when we're generating PIC code or when the operand isn't
6049     readonly.  */
6050  if (pa_symbolic_expression_p (x))
6051    {
6052      if (GET_CODE (x) == HIGH)
6053	x = XEXP (x, 0);
6054
6055      if (flag_pic || !read_only_operand (x, VOIDmode))
6056	{
6057	  switch (mode)
6058	    {
6059	    case SImode:
6060	      sri->icode = CODE_FOR_reload_insi_r1;
6061	      break;
6062
6063	    case DImode:
6064	      sri->icode = CODE_FOR_reload_indi_r1;
6065	      break;
6066
6067	    default:
6068	      gcc_unreachable ();
6069	    }
6070	  return NO_REGS;
6071	}
6072    }
6073
6074  /* Profiling showed the PA port spends about 1.3% of its compilation
6075     time in true_regnum from calls inside pa_secondary_reload_class.  */
6076  if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
6077    regno = true_regnum (x);
6078
6079  /* Handle reloads for floating point loads and stores.  */
6080  if ((regno >= FIRST_PSEUDO_REGISTER || regno == -1)
6081      && FP_REG_CLASS_P (rclass))
6082    {
6083      if (MEM_P (x))
6084	{
6085	  x = XEXP (x, 0);
6086
6087	  /* We don't need a secondary reload for indexed memory addresses.
6088
6089	     When INT14_OK_STRICT is true, it might appear that we could
6090	     directly allow register indirect memory addresses.  However,
6091	     this doesn't work because we don't support SUBREGs in
6092	     floating-point register copies and reload doesn't tell us
6093	     when it's going to use a SUBREG.  */
6094	  if (IS_INDEX_ADDR_P (x))
6095	    return NO_REGS;
6096	}
6097
6098      /* Request a secondary reload with a general scratch register
6099	 for everything else.  ??? Could symbolic operands be handled
6100	 directly when generating non-pic PA 2.0 code?  */
6101      sri->icode = (in_p
6102		    ? direct_optab_handler (reload_in_optab, mode)
6103		    : direct_optab_handler (reload_out_optab, mode));
6104      return NO_REGS;
6105    }
6106
6107  /* A SAR<->FP register copy requires an intermediate general register
6108     and secondary memory.  We need a secondary reload with a general
6109     scratch register for spills.  */
6110  if (rclass == SHIFT_REGS)
6111    {
6112      /* Handle spill.  */
6113      if (regno >= FIRST_PSEUDO_REGISTER || regno < 0)
6114	{
6115	  sri->icode = (in_p
6116			? direct_optab_handler (reload_in_optab, mode)
6117			: direct_optab_handler (reload_out_optab, mode));
6118	  return NO_REGS;
6119	}
6120
6121      /* Handle FP copy.  */
6122      if (FP_REG_CLASS_P (REGNO_REG_CLASS (regno)))
6123	return GENERAL_REGS;
6124    }
6125
6126  if (regno >= 0 && regno < FIRST_PSEUDO_REGISTER
6127      && REGNO_REG_CLASS (regno) == SHIFT_REGS
6128      && FP_REG_CLASS_P (rclass))
6129    return GENERAL_REGS;
6130
6131  return NO_REGS;
6132}
6133
6134/* Implement TARGET_EXTRA_LIVE_ON_ENTRY.  The argument pointer
6135   is only marked as live on entry by df-scan when it is a fixed
6136   register.  It isn't a fixed register in the 64-bit runtime,
6137   so we need to mark it here.  */
6138
6139static void
6140pa_extra_live_on_entry (bitmap regs)
6141{
6142  if (TARGET_64BIT)
6143    bitmap_set_bit (regs, ARG_POINTER_REGNUM);
6144}
6145
6146/* Implement EH_RETURN_HANDLER_RTX.  The MEM needs to be volatile
6147   to prevent it from being deleted.  */
6148
6149rtx
6150pa_eh_return_handler_rtx (void)
6151{
6152  rtx tmp;
6153
6154  tmp = gen_rtx_PLUS (word_mode, hard_frame_pointer_rtx,
6155		      TARGET_64BIT ? GEN_INT (-16) : GEN_INT (-20));
6156  tmp = gen_rtx_MEM (word_mode, tmp);
6157  tmp->volatil = 1;
6158  return tmp;
6159}
6160
6161/* In the 32-bit runtime, arguments larger than eight bytes are passed
6162   by invisible reference.  As a GCC extension, we also pass anything
6163   with a zero or variable size by reference.
6164
6165   The 64-bit runtime does not describe passing any types by invisible
6166   reference.  The internals of GCC can't currently handle passing
6167   empty structures, and zero or variable length arrays when they are
6168   not passed entirely on the stack or by reference.  Thus, as a GCC
6169   extension, we pass these types by reference.  The HP compiler doesn't
6170   support these types, so hopefully there shouldn't be any compatibility
6171   issues.  This may have to be revisited when HP releases a C99 compiler
6172   or updates the ABI.  */
6173
6174static bool
6175pa_pass_by_reference (cumulative_args_t ca ATTRIBUTE_UNUSED,
6176		      machine_mode mode, const_tree type,
6177		      bool named ATTRIBUTE_UNUSED)
6178{
6179  HOST_WIDE_INT size;
6180
6181  if (type)
6182    size = int_size_in_bytes (type);
6183  else
6184    size = GET_MODE_SIZE (mode);
6185
6186  if (TARGET_64BIT)
6187    return size <= 0;
6188  else
6189    return size <= 0 || size > 8;
6190}
6191
6192enum direction
6193pa_function_arg_padding (machine_mode mode, const_tree type)
6194{
6195  if (mode == BLKmode
6196      || (TARGET_64BIT
6197	  && type
6198	  && (AGGREGATE_TYPE_P (type)
6199	      || TREE_CODE (type) == COMPLEX_TYPE
6200	      || TREE_CODE (type) == VECTOR_TYPE)))
6201    {
6202      /* Return none if justification is not required.  */
6203      if (type
6204	  && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
6205	  && (int_size_in_bytes (type) * BITS_PER_UNIT) % PARM_BOUNDARY == 0)
6206	return none;
6207
6208      /* The directions set here are ignored when a BLKmode argument larger
6209	 than a word is placed in a register.  Different code is used for
6210	 the stack and registers.  This makes it difficult to have a
6211	 consistent data representation for both the stack and registers.
6212	 For both runtimes, the justification and padding for arguments on
6213	 the stack and in registers should be identical.  */
6214      if (TARGET_64BIT)
6215	/* The 64-bit runtime specifies left justification for aggregates.  */
6216        return upward;
6217      else
6218	/* The 32-bit runtime architecture specifies right justification.
6219	   When the argument is passed on the stack, the argument is padded
6220	   with garbage on the left.  The HP compiler pads with zeros.  */
6221	return downward;
6222    }
6223
6224  if (GET_MODE_BITSIZE (mode) < PARM_BOUNDARY)
6225    return downward;
6226  else
6227    return none;
6228}
6229
6230
6231/* Do what is necessary for `va_start'.  We look at the current function
6232   to determine if stdargs or varargs is used and fill in an initial
6233   va_list.  A pointer to this constructor is returned.  */
6234
6235static rtx
6236hppa_builtin_saveregs (void)
6237{
6238  rtx offset, dest;
6239  tree fntype = TREE_TYPE (current_function_decl);
6240  int argadj = ((!stdarg_p (fntype))
6241		? UNITS_PER_WORD : 0);
6242
6243  if (argadj)
6244    offset = plus_constant (Pmode, crtl->args.arg_offset_rtx, argadj);
6245  else
6246    offset = crtl->args.arg_offset_rtx;
6247
6248  if (TARGET_64BIT)
6249    {
6250      int i, off;
6251
6252      /* Adjust for varargs/stdarg differences.  */
6253      if (argadj)
6254	offset = plus_constant (Pmode, crtl->args.arg_offset_rtx, -argadj);
6255      else
6256	offset = crtl->args.arg_offset_rtx;
6257
6258      /* We need to save %r26 .. %r19 inclusive starting at offset -64
6259	 from the incoming arg pointer and growing to larger addresses.  */
6260      for (i = 26, off = -64; i >= 19; i--, off += 8)
6261	emit_move_insn (gen_rtx_MEM (word_mode,
6262				     plus_constant (Pmode,
6263						    arg_pointer_rtx, off)),
6264			gen_rtx_REG (word_mode, i));
6265
6266      /* The incoming args pointer points just beyond the flushback area;
6267	 normally this is not a serious concern.  However, when we are doing
6268	 varargs/stdargs we want to make the arg pointer point to the start
6269	 of the incoming argument area.  */
6270      emit_move_insn (virtual_incoming_args_rtx,
6271		      plus_constant (Pmode, arg_pointer_rtx, -64));
6272
6273      /* Now return a pointer to the first anonymous argument.  */
6274      return copy_to_reg (expand_binop (Pmode, add_optab,
6275					virtual_incoming_args_rtx,
6276					offset, 0, 0, OPTAB_LIB_WIDEN));
6277    }
6278
6279  /* Store general registers on the stack.  */
6280  dest = gen_rtx_MEM (BLKmode,
6281		      plus_constant (Pmode, crtl->args.internal_arg_pointer,
6282				     -16));
6283  set_mem_alias_set (dest, get_varargs_alias_set ());
6284  set_mem_align (dest, BITS_PER_WORD);
6285  move_block_from_reg (23, dest, 4);
6286
6287  /* move_block_from_reg will emit code to store the argument registers
6288     individually as scalar stores.
6289
6290     However, other insns may later load from the same addresses for
6291     a structure load (passing a struct to a varargs routine).
6292
6293     The alias code assumes that such aliasing can never happen, so we
6294     have to keep memory referencing insns from moving up beyond the
6295     last argument register store.  So we emit a blockage insn here.  */
6296  emit_insn (gen_blockage ());
6297
6298  return copy_to_reg (expand_binop (Pmode, add_optab,
6299				    crtl->args.internal_arg_pointer,
6300				    offset, 0, 0, OPTAB_LIB_WIDEN));
6301}
6302
6303static void
6304hppa_va_start (tree valist, rtx nextarg)
6305{
6306  nextarg = expand_builtin_saveregs ();
6307  std_expand_builtin_va_start (valist, nextarg);
6308}
6309
6310static tree
6311hppa_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
6312			   gimple_seq *post_p)
6313{
6314  if (TARGET_64BIT)
6315    {
6316      /* Args grow upward.  We can use the generic routines.  */
6317      return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
6318    }
6319  else /* !TARGET_64BIT */
6320    {
6321      tree ptr = build_pointer_type (type);
6322      tree valist_type;
6323      tree t, u;
6324      unsigned int size, ofs;
6325      bool indirect;
6326
6327      indirect = pass_by_reference (NULL, TYPE_MODE (type), type, 0);
6328      if (indirect)
6329	{
6330	  type = ptr;
6331	  ptr = build_pointer_type (type);
6332	}
6333      size = int_size_in_bytes (type);
6334      valist_type = TREE_TYPE (valist);
6335
6336      /* Args grow down.  Not handled by generic routines.  */
6337
6338      u = fold_convert (sizetype, size_in_bytes (type));
6339      u = fold_build1 (NEGATE_EXPR, sizetype, u);
6340      t = fold_build_pointer_plus (valist, u);
6341
6342      /* Align to 4 or 8 byte boundary depending on argument size.  */
6343
6344      u = build_int_cst (TREE_TYPE (t), (HOST_WIDE_INT)(size > 4 ? -8 : -4));
6345      t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t, u);
6346      t = fold_convert (valist_type, t);
6347
6348      t = build2 (MODIFY_EXPR, valist_type, valist, t);
6349
6350      ofs = (8 - size) % 4;
6351      if (ofs != 0)
6352	t = fold_build_pointer_plus_hwi (t, ofs);
6353
6354      t = fold_convert (ptr, t);
6355      t = build_va_arg_indirect_ref (t);
6356
6357      if (indirect)
6358	t = build_va_arg_indirect_ref (t);
6359
6360      return t;
6361    }
6362}
6363
6364/* True if MODE is valid for the target.  By "valid", we mean able to
6365   be manipulated in non-trivial ways.  In particular, this means all
6366   the arithmetic is supported.
6367
6368   Currently, TImode is not valid as the HP 64-bit runtime documentation
6369   doesn't document the alignment and calling conventions for this type.
6370   Thus, we return false when PRECISION is 2 * BITS_PER_WORD and
6371   2 * BITS_PER_WORD isn't equal LONG_LONG_TYPE_SIZE.  */
6372
6373static bool
6374pa_scalar_mode_supported_p (machine_mode mode)
6375{
6376  int precision = GET_MODE_PRECISION (mode);
6377
6378  switch (GET_MODE_CLASS (mode))
6379    {
6380    case MODE_PARTIAL_INT:
6381    case MODE_INT:
6382      if (precision == CHAR_TYPE_SIZE)
6383	return true;
6384      if (precision == SHORT_TYPE_SIZE)
6385	return true;
6386      if (precision == INT_TYPE_SIZE)
6387	return true;
6388      if (precision == LONG_TYPE_SIZE)
6389	return true;
6390      if (precision == LONG_LONG_TYPE_SIZE)
6391	return true;
6392      return false;
6393
6394    case MODE_FLOAT:
6395      if (precision == FLOAT_TYPE_SIZE)
6396	return true;
6397      if (precision == DOUBLE_TYPE_SIZE)
6398	return true;
6399      if (precision == LONG_DOUBLE_TYPE_SIZE)
6400	return true;
6401      return false;
6402
6403    case MODE_DECIMAL_FLOAT:
6404      return false;
6405
6406    default:
6407      gcc_unreachable ();
6408    }
6409}
6410
6411/* Return TRUE if INSN, a jump insn, has an unfilled delay slot and
6412   it branches into the delay slot.  Otherwise, return FALSE.  */
6413
6414static bool
6415branch_to_delay_slot_p (rtx_insn *insn)
6416{
6417  rtx_insn *jump_insn;
6418
6419  if (dbr_sequence_length ())
6420    return FALSE;
6421
6422  jump_insn = next_active_insn (JUMP_LABEL (insn));
6423  while (insn)
6424    {
6425      insn = next_active_insn (insn);
6426      if (jump_insn == insn)
6427	return TRUE;
6428
6429      /* We can't rely on the length of asms.  So, we return FALSE when
6430	 the branch is followed by an asm.  */
6431      if (!insn
6432	  || GET_CODE (PATTERN (insn)) == ASM_INPUT
6433	  || extract_asm_operands (PATTERN (insn)) != NULL_RTX
6434	  || get_attr_length (insn) > 0)
6435	break;
6436    }
6437
6438  return FALSE;
6439}
6440
6441/* Return TRUE if INSN, a forward jump insn, needs a nop in its delay slot.
6442
6443   This occurs when INSN has an unfilled delay slot and is followed
6444   by an asm.  Disaster can occur if the asm is empty and the jump
6445   branches into the delay slot.  So, we add a nop in the delay slot
6446   when this occurs.  */
6447
6448static bool
6449branch_needs_nop_p (rtx_insn *insn)
6450{
6451  rtx_insn *jump_insn;
6452
6453  if (dbr_sequence_length ())
6454    return FALSE;
6455
6456  jump_insn = next_active_insn (JUMP_LABEL (insn));
6457  while (insn)
6458    {
6459      insn = next_active_insn (insn);
6460      if (!insn || jump_insn == insn)
6461	return TRUE;
6462
6463      if (!(GET_CODE (PATTERN (insn)) == ASM_INPUT
6464	   || extract_asm_operands (PATTERN (insn)) != NULL_RTX)
6465	  && get_attr_length (insn) > 0)
6466	break;
6467    }
6468
6469  return FALSE;
6470}
6471
6472/* Return TRUE if INSN, a forward jump insn, can use nullification
6473   to skip the following instruction.  This avoids an extra cycle due
6474   to a mis-predicted branch when we fall through.  */
6475
6476static bool
6477use_skip_p (rtx_insn *insn)
6478{
6479  rtx_insn *jump_insn = next_active_insn (JUMP_LABEL (insn));
6480
6481  while (insn)
6482    {
6483      insn = next_active_insn (insn);
6484
6485      /* We can't rely on the length of asms, so we can't skip asms.  */
6486      if (!insn
6487	  || GET_CODE (PATTERN (insn)) == ASM_INPUT
6488	  || extract_asm_operands (PATTERN (insn)) != NULL_RTX)
6489	break;
6490      if (get_attr_length (insn) == 4
6491	  && jump_insn == next_active_insn (insn))
6492	return TRUE;
6493      if (get_attr_length (insn) > 0)
6494	break;
6495    }
6496
6497  return FALSE;
6498}
6499
6500/* This routine handles all the normal conditional branch sequences we
6501   might need to generate.  It handles compare immediate vs compare
6502   register, nullification of delay slots, varying length branches,
6503   negated branches, and all combinations of the above.  It returns the
6504   output appropriate to emit the branch corresponding to all given
6505   parameters.  */
6506
6507const char *
6508pa_output_cbranch (rtx *operands, int negated, rtx_insn *insn)
6509{
6510  static char buf[100];
6511  bool useskip;
6512  int nullify = INSN_ANNULLED_BRANCH_P (insn);
6513  int length = get_attr_length (insn);
6514  int xdelay;
6515
6516  /* A conditional branch to the following instruction (e.g. the delay slot)
6517     is asking for a disaster.  This can happen when not optimizing and
6518     when jump optimization fails.
6519
6520     While it is usually safe to emit nothing, this can fail if the
6521     preceding instruction is a nullified branch with an empty delay
6522     slot and the same branch target as this branch.  We could check
6523     for this but jump optimization should eliminate nop jumps.  It
6524     is always safe to emit a nop.  */
6525  if (branch_to_delay_slot_p (insn))
6526    return "nop";
6527
6528  /* The doubleword form of the cmpib instruction doesn't have the LEU
6529     and GTU conditions while the cmpb instruction does.  Since we accept
6530     zero for cmpb, we must ensure that we use cmpb for the comparison.  */
6531  if (GET_MODE (operands[1]) == DImode && operands[2] == const0_rtx)
6532    operands[2] = gen_rtx_REG (DImode, 0);
6533  if (GET_MODE (operands[2]) == DImode && operands[1] == const0_rtx)
6534    operands[1] = gen_rtx_REG (DImode, 0);
6535
6536  /* If this is a long branch with its delay slot unfilled, set `nullify'
6537     as it can nullify the delay slot and save a nop.  */
6538  if (length == 8 && dbr_sequence_length () == 0)
6539    nullify = 1;
6540
6541  /* If this is a short forward conditional branch which did not get
6542     its delay slot filled, the delay slot can still be nullified.  */
6543  if (! nullify && length == 4 && dbr_sequence_length () == 0)
6544    nullify = forward_branch_p (insn);
6545
6546  /* A forward branch over a single nullified insn can be done with a
6547     comclr instruction.  This avoids a single cycle penalty due to
6548     mis-predicted branch if we fall through (branch not taken).  */
6549  useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
6550
6551  switch (length)
6552    {
6553      /* All short conditional branches except backwards with an unfilled
6554	 delay slot.  */
6555      case 4:
6556	if (useskip)
6557	  strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
6558	else
6559	  strcpy (buf, "{com%I2b,|cmp%I2b,}");
6560	if (GET_MODE (operands[1]) == DImode)
6561	  strcat (buf, "*");
6562	if (negated)
6563	  strcat (buf, "%B3");
6564	else
6565	  strcat (buf, "%S3");
6566	if (useskip)
6567	  strcat (buf, " %2,%r1,%%r0");
6568	else if (nullify)
6569	  {
6570	    if (branch_needs_nop_p (insn))
6571	      strcat (buf, ",n %2,%r1,%0%#");
6572	    else
6573	      strcat (buf, ",n %2,%r1,%0");
6574	  }
6575	else
6576	  strcat (buf, " %2,%r1,%0");
6577	break;
6578
6579     /* All long conditionals.  Note a short backward branch with an
6580	unfilled delay slot is treated just like a long backward branch
6581	with an unfilled delay slot.  */
6582      case 8:
6583	/* Handle weird backwards branch with a filled delay slot
6584	   which is nullified.  */
6585	if (dbr_sequence_length () != 0
6586	    && ! forward_branch_p (insn)
6587	    && nullify)
6588	  {
6589	    strcpy (buf, "{com%I2b,|cmp%I2b,}");
6590	    if (GET_MODE (operands[1]) == DImode)
6591	      strcat (buf, "*");
6592	    if (negated)
6593	      strcat (buf, "%S3");
6594	    else
6595	      strcat (buf, "%B3");
6596	    strcat (buf, ",n %2,%r1,.+12\n\tb %0");
6597	  }
6598	/* Handle short backwards branch with an unfilled delay slot.
6599	   Using a comb;nop rather than comiclr;bl saves 1 cycle for both
6600	   taken and untaken branches.  */
6601	else if (dbr_sequence_length () == 0
6602		 && ! forward_branch_p (insn)
6603		 && INSN_ADDRESSES_SET_P ()
6604		 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6605				    - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6606	  {
6607	    strcpy (buf, "{com%I2b,|cmp%I2b,}");
6608	    if (GET_MODE (operands[1]) == DImode)
6609	      strcat (buf, "*");
6610	    if (negated)
6611	      strcat (buf, "%B3 %2,%r1,%0%#");
6612	    else
6613	      strcat (buf, "%S3 %2,%r1,%0%#");
6614	  }
6615	else
6616	  {
6617	    strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
6618	    if (GET_MODE (operands[1]) == DImode)
6619	      strcat (buf, "*");
6620	    if (negated)
6621	      strcat (buf, "%S3");
6622	    else
6623	      strcat (buf, "%B3");
6624	    if (nullify)
6625	      strcat (buf, " %2,%r1,%%r0\n\tb,n %0");
6626	    else
6627	      strcat (buf, " %2,%r1,%%r0\n\tb %0");
6628	  }
6629	break;
6630
6631      default:
6632	/* The reversed conditional branch must branch over one additional
6633	   instruction if the delay slot is filled and needs to be extracted
6634	   by pa_output_lbranch.  If the delay slot is empty or this is a
6635	   nullified forward branch, the instruction after the reversed
6636	   condition branch must be nullified.  */
6637	if (dbr_sequence_length () == 0
6638	    || (nullify && forward_branch_p (insn)))
6639	  {
6640	    nullify = 1;
6641	    xdelay = 0;
6642	    operands[4] = GEN_INT (length);
6643	  }
6644	else
6645	  {
6646	    xdelay = 1;
6647	    operands[4] = GEN_INT (length + 4);
6648	  }
6649
6650	/* Create a reversed conditional branch which branches around
6651	   the following insns.  */
6652	if (GET_MODE (operands[1]) != DImode)
6653	  {
6654	    if (nullify)
6655	      {
6656		if (negated)
6657		  strcpy (buf,
6658		    "{com%I2b,%S3,n %2,%r1,.+%4|cmp%I2b,%S3,n %2,%r1,.+%4}");
6659		else
6660		  strcpy (buf,
6661		    "{com%I2b,%B3,n %2,%r1,.+%4|cmp%I2b,%B3,n %2,%r1,.+%4}");
6662	      }
6663	    else
6664	      {
6665		if (negated)
6666		  strcpy (buf,
6667		    "{com%I2b,%S3 %2,%r1,.+%4|cmp%I2b,%S3 %2,%r1,.+%4}");
6668		else
6669		  strcpy (buf,
6670		    "{com%I2b,%B3 %2,%r1,.+%4|cmp%I2b,%B3 %2,%r1,.+%4}");
6671	      }
6672	  }
6673	else
6674	  {
6675	    if (nullify)
6676	      {
6677		if (negated)
6678		  strcpy (buf,
6679		    "{com%I2b,*%S3,n %2,%r1,.+%4|cmp%I2b,*%S3,n %2,%r1,.+%4}");
6680		else
6681		  strcpy (buf,
6682		    "{com%I2b,*%B3,n %2,%r1,.+%4|cmp%I2b,*%B3,n %2,%r1,.+%4}");
6683	      }
6684	    else
6685	      {
6686		if (negated)
6687		  strcpy (buf,
6688		    "{com%I2b,*%S3 %2,%r1,.+%4|cmp%I2b,*%S3 %2,%r1,.+%4}");
6689		else
6690		  strcpy (buf,
6691		    "{com%I2b,*%B3 %2,%r1,.+%4|cmp%I2b,*%B3 %2,%r1,.+%4}");
6692	      }
6693	  }
6694
6695	output_asm_insn (buf, operands);
6696	return pa_output_lbranch (operands[0], insn, xdelay);
6697    }
6698  return buf;
6699}
6700
6701/* This routine handles output of long unconditional branches that
6702   exceed the maximum range of a simple branch instruction.  Since
6703   we don't have a register available for the branch, we save register
6704   %r1 in the frame marker, load the branch destination DEST into %r1,
6705   execute the branch, and restore %r1 in the delay slot of the branch.
6706
6707   Since long branches may have an insn in the delay slot and the
6708   delay slot is used to restore %r1, we in general need to extract
6709   this insn and execute it before the branch.  However, to facilitate
6710   use of this function by conditional branches, we also provide an
6711   option to not extract the delay insn so that it will be emitted
6712   after the long branch.  So, if there is an insn in the delay slot,
6713   it is extracted if XDELAY is nonzero.
6714
6715   The lengths of the various long-branch sequences are 20, 16 and 24
6716   bytes for the portable runtime, non-PIC and PIC cases, respectively.  */
6717
6718const char *
6719pa_output_lbranch (rtx dest, rtx_insn *insn, int xdelay)
6720{
6721  rtx xoperands[2];
6722
6723  xoperands[0] = dest;
6724
6725  /* First, free up the delay slot.  */
6726  if (xdelay && dbr_sequence_length () != 0)
6727    {
6728      /* We can't handle a jump in the delay slot.  */
6729      gcc_assert (! JUMP_P (NEXT_INSN (insn)));
6730
6731      final_scan_insn (NEXT_INSN (insn), asm_out_file,
6732		       optimize, 0, NULL);
6733
6734      /* Now delete the delay insn.  */
6735      SET_INSN_DELETED (NEXT_INSN (insn));
6736    }
6737
6738  /* Output an insn to save %r1.  The runtime documentation doesn't
6739     specify whether the "Clean Up" slot in the callers frame can
6740     be clobbered by the callee.  It isn't copied by HP's builtin
6741     alloca, so this suggests that it can be clobbered if necessary.
6742     The "Static Link" location is copied by HP builtin alloca, so
6743     we avoid using it.  Using the cleanup slot might be a problem
6744     if we have to interoperate with languages that pass cleanup
6745     information.  However, it should be possible to handle these
6746     situations with GCC's asm feature.
6747
6748     The "Current RP" slot is reserved for the called procedure, so
6749     we try to use it when we don't have a frame of our own.  It's
6750     rather unlikely that we won't have a frame when we need to emit
6751     a very long branch.
6752
6753     Really the way to go long term is a register scavenger; goto
6754     the target of the jump and find a register which we can use
6755     as a scratch to hold the value in %r1.  Then, we wouldn't have
6756     to free up the delay slot or clobber a slot that may be needed
6757     for other purposes.  */
6758  if (TARGET_64BIT)
6759    {
6760      if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6761	/* Use the return pointer slot in the frame marker.  */
6762	output_asm_insn ("std %%r1,-16(%%r30)", xoperands);
6763      else
6764	/* Use the slot at -40 in the frame marker since HP builtin
6765	   alloca doesn't copy it.  */
6766	output_asm_insn ("std %%r1,-40(%%r30)", xoperands);
6767    }
6768  else
6769    {
6770      if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6771	/* Use the return pointer slot in the frame marker.  */
6772	output_asm_insn ("stw %%r1,-20(%%r30)", xoperands);
6773      else
6774	/* Use the "Clean Up" slot in the frame marker.  In GCC,
6775	   the only other use of this location is for copying a
6776	   floating point double argument from a floating-point
6777	   register to two general registers.  The copy is done
6778	   as an "atomic" operation when outputting a call, so it
6779	   won't interfere with our using the location here.  */
6780	output_asm_insn ("stw %%r1,-12(%%r30)", xoperands);
6781    }
6782
6783  if (TARGET_PORTABLE_RUNTIME)
6784    {
6785      output_asm_insn ("ldil L'%0,%%r1", xoperands);
6786      output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
6787      output_asm_insn ("bv %%r0(%%r1)", xoperands);
6788    }
6789  else if (flag_pic)
6790    {
6791      output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
6792      if (TARGET_SOM || !TARGET_GAS)
6793	{
6794	  xoperands[1] = gen_label_rtx ();
6795	  output_asm_insn ("addil L'%l0-%l1,%%r1", xoperands);
6796	  targetm.asm_out.internal_label (asm_out_file, "L",
6797					  CODE_LABEL_NUMBER (xoperands[1]));
6798	  output_asm_insn ("ldo R'%l0-%l1(%%r1),%%r1", xoperands);
6799	}
6800      else
6801	{
6802	  output_asm_insn ("addil L'%l0-$PIC_pcrel$0+4,%%r1", xoperands);
6803	  output_asm_insn ("ldo R'%l0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
6804	}
6805      output_asm_insn ("bv %%r0(%%r1)", xoperands);
6806    }
6807  else
6808    /* Now output a very long branch to the original target.  */
6809    output_asm_insn ("ldil L'%l0,%%r1\n\tbe R'%l0(%%sr4,%%r1)", xoperands);
6810
6811  /* Now restore the value of %r1 in the delay slot.  */
6812  if (TARGET_64BIT)
6813    {
6814      if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6815	return "ldd -16(%%r30),%%r1";
6816      else
6817	return "ldd -40(%%r30),%%r1";
6818    }
6819  else
6820    {
6821      if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6822	return "ldw -20(%%r30),%%r1";
6823      else
6824	return "ldw -12(%%r30),%%r1";
6825    }
6826}
6827
6828/* This routine handles all the branch-on-bit conditional branch sequences we
6829   might need to generate.  It handles nullification of delay slots,
6830   varying length branches, negated branches and all combinations of the
6831   above.  it returns the appropriate output template to emit the branch.  */
6832
6833const char *
6834pa_output_bb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx_insn *insn, int which)
6835{
6836  static char buf[100];
6837  bool useskip;
6838  int nullify = INSN_ANNULLED_BRANCH_P (insn);
6839  int length = get_attr_length (insn);
6840  int xdelay;
6841
6842  /* A conditional branch to the following instruction (e.g. the delay slot) is
6843     asking for a disaster.  I do not think this can happen as this pattern
6844     is only used when optimizing; jump optimization should eliminate the
6845     jump.  But be prepared just in case.  */
6846
6847  if (branch_to_delay_slot_p (insn))
6848    return "nop";
6849
6850  /* If this is a long branch with its delay slot unfilled, set `nullify'
6851     as it can nullify the delay slot and save a nop.  */
6852  if (length == 8 && dbr_sequence_length () == 0)
6853    nullify = 1;
6854
6855  /* If this is a short forward conditional branch which did not get
6856     its delay slot filled, the delay slot can still be nullified.  */
6857  if (! nullify && length == 4 && dbr_sequence_length () == 0)
6858    nullify = forward_branch_p (insn);
6859
6860  /* A forward branch over a single nullified insn can be done with a
6861     extrs instruction.  This avoids a single cycle penalty due to
6862     mis-predicted branch if we fall through (branch not taken).  */
6863  useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
6864
6865  switch (length)
6866    {
6867
6868      /* All short conditional branches except backwards with an unfilled
6869	 delay slot.  */
6870      case 4:
6871	if (useskip)
6872	  strcpy (buf, "{extrs,|extrw,s,}");
6873	else
6874	  strcpy (buf, "bb,");
6875	if (useskip && GET_MODE (operands[0]) == DImode)
6876	  strcpy (buf, "extrd,s,*");
6877	else if (GET_MODE (operands[0]) == DImode)
6878	  strcpy (buf, "bb,*");
6879	if ((which == 0 && negated)
6880	     || (which == 1 && ! negated))
6881	  strcat (buf, ">=");
6882	else
6883	  strcat (buf, "<");
6884	if (useskip)
6885	  strcat (buf, " %0,%1,1,%%r0");
6886	else if (nullify && negated)
6887	  {
6888	    if (branch_needs_nop_p (insn))
6889	      strcat (buf, ",n %0,%1,%3%#");
6890	    else
6891	      strcat (buf, ",n %0,%1,%3");
6892	  }
6893	else if (nullify && ! negated)
6894	  {
6895	    if (branch_needs_nop_p (insn))
6896	      strcat (buf, ",n %0,%1,%2%#");
6897	    else
6898	      strcat (buf, ",n %0,%1,%2");
6899	  }
6900	else if (! nullify && negated)
6901	  strcat (buf, " %0,%1,%3");
6902	else if (! nullify && ! negated)
6903	  strcat (buf, " %0,%1,%2");
6904	break;
6905
6906     /* All long conditionals.  Note a short backward branch with an
6907	unfilled delay slot is treated just like a long backward branch
6908	with an unfilled delay slot.  */
6909      case 8:
6910	/* Handle weird backwards branch with a filled delay slot
6911	   which is nullified.  */
6912	if (dbr_sequence_length () != 0
6913	    && ! forward_branch_p (insn)
6914	    && nullify)
6915	  {
6916	    strcpy (buf, "bb,");
6917	    if (GET_MODE (operands[0]) == DImode)
6918	      strcat (buf, "*");
6919	    if ((which == 0 && negated)
6920		|| (which == 1 && ! negated))
6921	      strcat (buf, "<");
6922	    else
6923	      strcat (buf, ">=");
6924	    if (negated)
6925	      strcat (buf, ",n %0,%1,.+12\n\tb %3");
6926	    else
6927	      strcat (buf, ",n %0,%1,.+12\n\tb %2");
6928	  }
6929	/* Handle short backwards branch with an unfilled delay slot.
6930	   Using a bb;nop rather than extrs;bl saves 1 cycle for both
6931	   taken and untaken branches.  */
6932	else if (dbr_sequence_length () == 0
6933		 && ! forward_branch_p (insn)
6934		 && INSN_ADDRESSES_SET_P ()
6935		 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6936				    - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6937	  {
6938	    strcpy (buf, "bb,");
6939	    if (GET_MODE (operands[0]) == DImode)
6940	      strcat (buf, "*");
6941	    if ((which == 0 && negated)
6942		|| (which == 1 && ! negated))
6943	      strcat (buf, ">=");
6944	    else
6945	      strcat (buf, "<");
6946	    if (negated)
6947	      strcat (buf, " %0,%1,%3%#");
6948	    else
6949	      strcat (buf, " %0,%1,%2%#");
6950	  }
6951	else
6952	  {
6953	    if (GET_MODE (operands[0]) == DImode)
6954	      strcpy (buf, "extrd,s,*");
6955	    else
6956	      strcpy (buf, "{extrs,|extrw,s,}");
6957	    if ((which == 0 && negated)
6958		|| (which == 1 && ! negated))
6959	      strcat (buf, "<");
6960	    else
6961	      strcat (buf, ">=");
6962	    if (nullify && negated)
6963	      strcat (buf, " %0,%1,1,%%r0\n\tb,n %3");
6964	    else if (nullify && ! negated)
6965	      strcat (buf, " %0,%1,1,%%r0\n\tb,n %2");
6966	    else if (negated)
6967	      strcat (buf, " %0,%1,1,%%r0\n\tb %3");
6968	    else
6969	      strcat (buf, " %0,%1,1,%%r0\n\tb %2");
6970	  }
6971	break;
6972
6973      default:
6974	/* The reversed conditional branch must branch over one additional
6975	   instruction if the delay slot is filled and needs to be extracted
6976	   by pa_output_lbranch.  If the delay slot is empty or this is a
6977	   nullified forward branch, the instruction after the reversed
6978	   condition branch must be nullified.  */
6979	if (dbr_sequence_length () == 0
6980	    || (nullify && forward_branch_p (insn)))
6981	  {
6982	    nullify = 1;
6983	    xdelay = 0;
6984	    operands[4] = GEN_INT (length);
6985	  }
6986	else
6987	  {
6988	    xdelay = 1;
6989	    operands[4] = GEN_INT (length + 4);
6990	  }
6991
6992	if (GET_MODE (operands[0]) == DImode)
6993	  strcpy (buf, "bb,*");
6994	else
6995	  strcpy (buf, "bb,");
6996	if ((which == 0 && negated)
6997	    || (which == 1 && !negated))
6998	  strcat (buf, "<");
6999	else
7000	  strcat (buf, ">=");
7001	if (nullify)
7002	  strcat (buf, ",n %0,%1,.+%4");
7003	else
7004	  strcat (buf, " %0,%1,.+%4");
7005	output_asm_insn (buf, operands);
7006	return pa_output_lbranch (negated ? operands[3] : operands[2],
7007				  insn, xdelay);
7008    }
7009  return buf;
7010}
7011
7012/* This routine handles all the branch-on-variable-bit conditional branch
7013   sequences we might need to generate.  It handles nullification of delay
7014   slots, varying length branches, negated branches and all combinations
7015   of the above.  it returns the appropriate output template to emit the
7016   branch.  */
7017
7018const char *
7019pa_output_bvb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx_insn *insn,
7020	       int which)
7021{
7022  static char buf[100];
7023  bool useskip;
7024  int nullify = INSN_ANNULLED_BRANCH_P (insn);
7025  int length = get_attr_length (insn);
7026  int xdelay;
7027
7028  /* A conditional branch to the following instruction (e.g. the delay slot) is
7029     asking for a disaster.  I do not think this can happen as this pattern
7030     is only used when optimizing; jump optimization should eliminate the
7031     jump.  But be prepared just in case.  */
7032
7033  if (branch_to_delay_slot_p (insn))
7034    return "nop";
7035
7036  /* If this is a long branch with its delay slot unfilled, set `nullify'
7037     as it can nullify the delay slot and save a nop.  */
7038  if (length == 8 && dbr_sequence_length () == 0)
7039    nullify = 1;
7040
7041  /* If this is a short forward conditional branch which did not get
7042     its delay slot filled, the delay slot can still be nullified.  */
7043  if (! nullify && length == 4 && dbr_sequence_length () == 0)
7044    nullify = forward_branch_p (insn);
7045
7046  /* A forward branch over a single nullified insn can be done with a
7047     extrs instruction.  This avoids a single cycle penalty due to
7048     mis-predicted branch if we fall through (branch not taken).  */
7049  useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
7050
7051  switch (length)
7052    {
7053
7054      /* All short conditional branches except backwards with an unfilled
7055	 delay slot.  */
7056      case 4:
7057	if (useskip)
7058	  strcpy (buf, "{vextrs,|extrw,s,}");
7059	else
7060	  strcpy (buf, "{bvb,|bb,}");
7061	if (useskip && GET_MODE (operands[0]) == DImode)
7062	  strcpy (buf, "extrd,s,*");
7063	else if (GET_MODE (operands[0]) == DImode)
7064	  strcpy (buf, "bb,*");
7065	if ((which == 0 && negated)
7066	     || (which == 1 && ! negated))
7067	  strcat (buf, ">=");
7068	else
7069	  strcat (buf, "<");
7070	if (useskip)
7071	  strcat (buf, "{ %0,1,%%r0| %0,%%sar,1,%%r0}");
7072	else if (nullify && negated)
7073	  {
7074	    if (branch_needs_nop_p (insn))
7075	      strcat (buf, "{,n %0,%3%#|,n %0,%%sar,%3%#}");
7076	    else
7077	      strcat (buf, "{,n %0,%3|,n %0,%%sar,%3}");
7078	  }
7079	else if (nullify && ! negated)
7080	  {
7081	    if (branch_needs_nop_p (insn))
7082	      strcat (buf, "{,n %0,%2%#|,n %0,%%sar,%2%#}");
7083	    else
7084	      strcat (buf, "{,n %0,%2|,n %0,%%sar,%2}");
7085	  }
7086	else if (! nullify && negated)
7087	  strcat (buf, "{ %0,%3| %0,%%sar,%3}");
7088	else if (! nullify && ! negated)
7089	  strcat (buf, "{ %0,%2| %0,%%sar,%2}");
7090	break;
7091
7092     /* All long conditionals.  Note a short backward branch with an
7093	unfilled delay slot is treated just like a long backward branch
7094	with an unfilled delay slot.  */
7095      case 8:
7096	/* Handle weird backwards branch with a filled delay slot
7097	   which is nullified.  */
7098	if (dbr_sequence_length () != 0
7099	    && ! forward_branch_p (insn)
7100	    && nullify)
7101	  {
7102	    strcpy (buf, "{bvb,|bb,}");
7103	    if (GET_MODE (operands[0]) == DImode)
7104	      strcat (buf, "*");
7105	    if ((which == 0 && negated)
7106		|| (which == 1 && ! negated))
7107	      strcat (buf, "<");
7108	    else
7109	      strcat (buf, ">=");
7110	    if (negated)
7111	      strcat (buf, "{,n %0,.+12\n\tb %3|,n %0,%%sar,.+12\n\tb %3}");
7112	    else
7113	      strcat (buf, "{,n %0,.+12\n\tb %2|,n %0,%%sar,.+12\n\tb %2}");
7114	  }
7115	/* Handle short backwards branch with an unfilled delay slot.
7116	   Using a bb;nop rather than extrs;bl saves 1 cycle for both
7117	   taken and untaken branches.  */
7118	else if (dbr_sequence_length () == 0
7119		 && ! forward_branch_p (insn)
7120		 && INSN_ADDRESSES_SET_P ()
7121		 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7122				    - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7123	  {
7124	    strcpy (buf, "{bvb,|bb,}");
7125	    if (GET_MODE (operands[0]) == DImode)
7126	      strcat (buf, "*");
7127	    if ((which == 0 && negated)
7128		|| (which == 1 && ! negated))
7129	      strcat (buf, ">=");
7130	    else
7131	      strcat (buf, "<");
7132	    if (negated)
7133	      strcat (buf, "{ %0,%3%#| %0,%%sar,%3%#}");
7134	    else
7135	      strcat (buf, "{ %0,%2%#| %0,%%sar,%2%#}");
7136	  }
7137	else
7138	  {
7139	    strcpy (buf, "{vextrs,|extrw,s,}");
7140	    if (GET_MODE (operands[0]) == DImode)
7141	      strcpy (buf, "extrd,s,*");
7142	    if ((which == 0 && negated)
7143		|| (which == 1 && ! negated))
7144	      strcat (buf, "<");
7145	    else
7146	      strcat (buf, ">=");
7147	    if (nullify && negated)
7148	      strcat (buf, "{ %0,1,%%r0\n\tb,n %3| %0,%%sar,1,%%r0\n\tb,n %3}");
7149	    else if (nullify && ! negated)
7150	      strcat (buf, "{ %0,1,%%r0\n\tb,n %2| %0,%%sar,1,%%r0\n\tb,n %2}");
7151	    else if (negated)
7152	      strcat (buf, "{ %0,1,%%r0\n\tb %3| %0,%%sar,1,%%r0\n\tb %3}");
7153	    else
7154	      strcat (buf, "{ %0,1,%%r0\n\tb %2| %0,%%sar,1,%%r0\n\tb %2}");
7155	  }
7156	break;
7157
7158      default:
7159	/* The reversed conditional branch must branch over one additional
7160	   instruction if the delay slot is filled and needs to be extracted
7161	   by pa_output_lbranch.  If the delay slot is empty or this is a
7162	   nullified forward branch, the instruction after the reversed
7163	   condition branch must be nullified.  */
7164	if (dbr_sequence_length () == 0
7165	    || (nullify && forward_branch_p (insn)))
7166	  {
7167	    nullify = 1;
7168	    xdelay = 0;
7169	    operands[4] = GEN_INT (length);
7170	  }
7171	else
7172	  {
7173	    xdelay = 1;
7174	    operands[4] = GEN_INT (length + 4);
7175	  }
7176
7177	if (GET_MODE (operands[0]) == DImode)
7178	  strcpy (buf, "bb,*");
7179	else
7180	  strcpy (buf, "{bvb,|bb,}");
7181	if ((which == 0 && negated)
7182	    || (which == 1 && !negated))
7183	  strcat (buf, "<");
7184	else
7185	  strcat (buf, ">=");
7186	if (nullify)
7187	  strcat (buf, ",n {%0,.+%4|%0,%%sar,.+%4}");
7188	else
7189	  strcat (buf, " {%0,.+%4|%0,%%sar,.+%4}");
7190	output_asm_insn (buf, operands);
7191	return pa_output_lbranch (negated ? operands[3] : operands[2],
7192				  insn, xdelay);
7193    }
7194  return buf;
7195}
7196
7197/* Return the output template for emitting a dbra type insn.
7198
7199   Note it may perform some output operations on its own before
7200   returning the final output string.  */
7201const char *
7202pa_output_dbra (rtx *operands, rtx_insn *insn, int which_alternative)
7203{
7204  int length = get_attr_length (insn);
7205
7206  /* A conditional branch to the following instruction (e.g. the delay slot) is
7207     asking for a disaster.  Be prepared!  */
7208
7209  if (branch_to_delay_slot_p (insn))
7210    {
7211      if (which_alternative == 0)
7212	return "ldo %1(%0),%0";
7213      else if (which_alternative == 1)
7214	{
7215	  output_asm_insn ("{fstws|fstw} %0,-16(%%r30)", operands);
7216	  output_asm_insn ("ldw -16(%%r30),%4", operands);
7217	  output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
7218	  return "{fldws|fldw} -16(%%r30),%0";
7219	}
7220      else
7221	{
7222	  output_asm_insn ("ldw %0,%4", operands);
7223	  return "ldo %1(%4),%4\n\tstw %4,%0";
7224	}
7225    }
7226
7227  if (which_alternative == 0)
7228    {
7229      int nullify = INSN_ANNULLED_BRANCH_P (insn);
7230      int xdelay;
7231
7232      /* If this is a long branch with its delay slot unfilled, set `nullify'
7233	 as it can nullify the delay slot and save a nop.  */
7234      if (length == 8 && dbr_sequence_length () == 0)
7235	nullify = 1;
7236
7237      /* If this is a short forward conditional branch which did not get
7238	 its delay slot filled, the delay slot can still be nullified.  */
7239      if (! nullify && length == 4 && dbr_sequence_length () == 0)
7240	nullify = forward_branch_p (insn);
7241
7242      switch (length)
7243	{
7244	case 4:
7245	  if (nullify)
7246	    {
7247	      if (branch_needs_nop_p (insn))
7248		return "addib,%C2,n %1,%0,%3%#";
7249	      else
7250		return "addib,%C2,n %1,%0,%3";
7251	    }
7252	  else
7253	    return "addib,%C2 %1,%0,%3";
7254
7255	case 8:
7256	  /* Handle weird backwards branch with a fulled delay slot
7257	     which is nullified.  */
7258	  if (dbr_sequence_length () != 0
7259	      && ! forward_branch_p (insn)
7260	      && nullify)
7261	    return "addib,%N2,n %1,%0,.+12\n\tb %3";
7262	  /* Handle short backwards branch with an unfilled delay slot.
7263	     Using a addb;nop rather than addi;bl saves 1 cycle for both
7264	     taken and untaken branches.  */
7265	  else if (dbr_sequence_length () == 0
7266		   && ! forward_branch_p (insn)
7267		   && INSN_ADDRESSES_SET_P ()
7268		   && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7269				      - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7270	      return "addib,%C2 %1,%0,%3%#";
7271
7272	  /* Handle normal cases.  */
7273	  if (nullify)
7274	    return "addi,%N2 %1,%0,%0\n\tb,n %3";
7275	  else
7276	    return "addi,%N2 %1,%0,%0\n\tb %3";
7277
7278	default:
7279	  /* The reversed conditional branch must branch over one additional
7280	     instruction if the delay slot is filled and needs to be extracted
7281	     by pa_output_lbranch.  If the delay slot is empty or this is a
7282	     nullified forward branch, the instruction after the reversed
7283	     condition branch must be nullified.  */
7284	  if (dbr_sequence_length () == 0
7285	      || (nullify && forward_branch_p (insn)))
7286	    {
7287	      nullify = 1;
7288	      xdelay = 0;
7289	      operands[4] = GEN_INT (length);
7290	    }
7291	  else
7292	    {
7293	      xdelay = 1;
7294	      operands[4] = GEN_INT (length + 4);
7295	    }
7296
7297	  if (nullify)
7298	    output_asm_insn ("addib,%N2,n %1,%0,.+%4", operands);
7299	  else
7300	    output_asm_insn ("addib,%N2 %1,%0,.+%4", operands);
7301
7302	  return pa_output_lbranch (operands[3], insn, xdelay);
7303	}
7304
7305    }
7306  /* Deal with gross reload from FP register case.  */
7307  else if (which_alternative == 1)
7308    {
7309      /* Move loop counter from FP register to MEM then into a GR,
7310	 increment the GR, store the GR into MEM, and finally reload
7311	 the FP register from MEM from within the branch's delay slot.  */
7312      output_asm_insn ("{fstws|fstw} %0,-16(%%r30)\n\tldw -16(%%r30),%4",
7313		       operands);
7314      output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
7315      if (length == 24)
7316	return "{comb|cmpb},%S2 %%r0,%4,%3\n\t{fldws|fldw} -16(%%r30),%0";
7317      else if (length == 28)
7318	return "{comclr|cmpclr},%B2 %%r0,%4,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
7319      else
7320	{
7321	  operands[5] = GEN_INT (length - 16);
7322	  output_asm_insn ("{comb|cmpb},%B2 %%r0,%4,.+%5", operands);
7323	  output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands);
7324	  return pa_output_lbranch (operands[3], insn, 0);
7325	}
7326    }
7327  /* Deal with gross reload from memory case.  */
7328  else
7329    {
7330      /* Reload loop counter from memory, the store back to memory
7331	 happens in the branch's delay slot.  */
7332      output_asm_insn ("ldw %0,%4", operands);
7333      if (length == 12)
7334	return "addib,%C2 %1,%4,%3\n\tstw %4,%0";
7335      else if (length == 16)
7336	return "addi,%N2 %1,%4,%4\n\tb %3\n\tstw %4,%0";
7337      else
7338	{
7339	  operands[5] = GEN_INT (length - 4);
7340	  output_asm_insn ("addib,%N2 %1,%4,.+%5\n\tstw %4,%0", operands);
7341	  return pa_output_lbranch (operands[3], insn, 0);
7342	}
7343    }
7344}
7345
7346/* Return the output template for emitting a movb type insn.
7347
7348   Note it may perform some output operations on its own before
7349   returning the final output string.  */
7350const char *
7351pa_output_movb (rtx *operands, rtx_insn *insn, int which_alternative,
7352	     int reverse_comparison)
7353{
7354  int length = get_attr_length (insn);
7355
7356  /* A conditional branch to the following instruction (e.g. the delay slot) is
7357     asking for a disaster.  Be prepared!  */
7358
7359  if (branch_to_delay_slot_p (insn))
7360    {
7361      if (which_alternative == 0)
7362	return "copy %1,%0";
7363      else if (which_alternative == 1)
7364	{
7365	  output_asm_insn ("stw %1,-16(%%r30)", operands);
7366	  return "{fldws|fldw} -16(%%r30),%0";
7367	}
7368      else if (which_alternative == 2)
7369	return "stw %1,%0";
7370      else
7371	return "mtsar %r1";
7372    }
7373
7374  /* Support the second variant.  */
7375  if (reverse_comparison)
7376    PUT_CODE (operands[2], reverse_condition (GET_CODE (operands[2])));
7377
7378  if (which_alternative == 0)
7379    {
7380      int nullify = INSN_ANNULLED_BRANCH_P (insn);
7381      int xdelay;
7382
7383      /* If this is a long branch with its delay slot unfilled, set `nullify'
7384	 as it can nullify the delay slot and save a nop.  */
7385      if (length == 8 && dbr_sequence_length () == 0)
7386	nullify = 1;
7387
7388      /* If this is a short forward conditional branch which did not get
7389	 its delay slot filled, the delay slot can still be nullified.  */
7390      if (! nullify && length == 4 && dbr_sequence_length () == 0)
7391	nullify = forward_branch_p (insn);
7392
7393      switch (length)
7394	{
7395	case 4:
7396	  if (nullify)
7397	    {
7398	      if (branch_needs_nop_p (insn))
7399		return "movb,%C2,n %1,%0,%3%#";
7400	      else
7401		return "movb,%C2,n %1,%0,%3";
7402	    }
7403	  else
7404	    return "movb,%C2 %1,%0,%3";
7405
7406	case 8:
7407	  /* Handle weird backwards branch with a filled delay slot
7408	     which is nullified.  */
7409	  if (dbr_sequence_length () != 0
7410	      && ! forward_branch_p (insn)
7411	      && nullify)
7412	    return "movb,%N2,n %1,%0,.+12\n\tb %3";
7413
7414	  /* Handle short backwards branch with an unfilled delay slot.
7415	     Using a movb;nop rather than or;bl saves 1 cycle for both
7416	     taken and untaken branches.  */
7417	  else if (dbr_sequence_length () == 0
7418		   && ! forward_branch_p (insn)
7419		   && INSN_ADDRESSES_SET_P ()
7420		   && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7421				      - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7422	    return "movb,%C2 %1,%0,%3%#";
7423	  /* Handle normal cases.  */
7424	  if (nullify)
7425	    return "or,%N2 %1,%%r0,%0\n\tb,n %3";
7426	  else
7427	    return "or,%N2 %1,%%r0,%0\n\tb %3";
7428
7429	default:
7430	  /* The reversed conditional branch must branch over one additional
7431	     instruction if the delay slot is filled and needs to be extracted
7432	     by pa_output_lbranch.  If the delay slot is empty or this is a
7433	     nullified forward branch, the instruction after the reversed
7434	     condition branch must be nullified.  */
7435	  if (dbr_sequence_length () == 0
7436	      || (nullify && forward_branch_p (insn)))
7437	    {
7438	      nullify = 1;
7439	      xdelay = 0;
7440	      operands[4] = GEN_INT (length);
7441	    }
7442	  else
7443	    {
7444	      xdelay = 1;
7445	      operands[4] = GEN_INT (length + 4);
7446	    }
7447
7448	  if (nullify)
7449	    output_asm_insn ("movb,%N2,n %1,%0,.+%4", operands);
7450	  else
7451	    output_asm_insn ("movb,%N2 %1,%0,.+%4", operands);
7452
7453	  return pa_output_lbranch (operands[3], insn, xdelay);
7454	}
7455    }
7456  /* Deal with gross reload for FP destination register case.  */
7457  else if (which_alternative == 1)
7458    {
7459      /* Move source register to MEM, perform the branch test, then
7460	 finally load the FP register from MEM from within the branch's
7461	 delay slot.  */
7462      output_asm_insn ("stw %1,-16(%%r30)", operands);
7463      if (length == 12)
7464	return "{comb|cmpb},%S2 %%r0,%1,%3\n\t{fldws|fldw} -16(%%r30),%0";
7465      else if (length == 16)
7466	return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
7467      else
7468	{
7469	  operands[4] = GEN_INT (length - 4);
7470	  output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4", operands);
7471	  output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands);
7472	  return pa_output_lbranch (operands[3], insn, 0);
7473	}
7474    }
7475  /* Deal with gross reload from memory case.  */
7476  else if (which_alternative == 2)
7477    {
7478      /* Reload loop counter from memory, the store back to memory
7479	 happens in the branch's delay slot.  */
7480      if (length == 8)
7481	return "{comb|cmpb},%S2 %%r0,%1,%3\n\tstw %1,%0";
7482      else if (length == 12)
7483	return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tstw %1,%0";
7484      else
7485	{
7486	  operands[4] = GEN_INT (length);
7487	  output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tstw %1,%0",
7488			   operands);
7489	  return pa_output_lbranch (operands[3], insn, 0);
7490	}
7491    }
7492  /* Handle SAR as a destination.  */
7493  else
7494    {
7495      if (length == 8)
7496	return "{comb|cmpb},%S2 %%r0,%1,%3\n\tmtsar %r1";
7497      else if (length == 12)
7498	return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tmtsar %r1";
7499      else
7500	{
7501	  operands[4] = GEN_INT (length);
7502	  output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tmtsar %r1",
7503			   operands);
7504	  return pa_output_lbranch (operands[3], insn, 0);
7505	}
7506    }
7507}
7508
7509/* Copy any FP arguments in INSN into integer registers.  */
7510static void
7511copy_fp_args (rtx_insn *insn)
7512{
7513  rtx link;
7514  rtx xoperands[2];
7515
7516  for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
7517    {
7518      int arg_mode, regno;
7519      rtx use = XEXP (link, 0);
7520
7521      if (! (GET_CODE (use) == USE
7522	  && GET_CODE (XEXP (use, 0)) == REG
7523	  && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
7524	continue;
7525
7526      arg_mode = GET_MODE (XEXP (use, 0));
7527      regno = REGNO (XEXP (use, 0));
7528
7529      /* Is it a floating point register?  */
7530      if (regno >= 32 && regno <= 39)
7531	{
7532	  /* Copy the FP register into an integer register via memory.  */
7533	  if (arg_mode == SFmode)
7534	    {
7535	      xoperands[0] = XEXP (use, 0);
7536	      xoperands[1] = gen_rtx_REG (SImode, 26 - (regno - 32) / 2);
7537	      output_asm_insn ("{fstws|fstw} %0,-16(%%sr0,%%r30)", xoperands);
7538	      output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
7539	    }
7540	  else
7541	    {
7542	      xoperands[0] = XEXP (use, 0);
7543	      xoperands[1] = gen_rtx_REG (DImode, 25 - (regno - 34) / 2);
7544	      output_asm_insn ("{fstds|fstd} %0,-16(%%sr0,%%r30)", xoperands);
7545	      output_asm_insn ("ldw -12(%%sr0,%%r30),%R1", xoperands);
7546	      output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
7547	    }
7548	}
7549    }
7550}
7551
7552/* Compute length of the FP argument copy sequence for INSN.  */
7553static int
7554length_fp_args (rtx_insn *insn)
7555{
7556  int length = 0;
7557  rtx link;
7558
7559  for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
7560    {
7561      int arg_mode, regno;
7562      rtx use = XEXP (link, 0);
7563
7564      if (! (GET_CODE (use) == USE
7565	  && GET_CODE (XEXP (use, 0)) == REG
7566	  && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
7567	continue;
7568
7569      arg_mode = GET_MODE (XEXP (use, 0));
7570      regno = REGNO (XEXP (use, 0));
7571
7572      /* Is it a floating point register?  */
7573      if (regno >= 32 && regno <= 39)
7574	{
7575	  if (arg_mode == SFmode)
7576	    length += 8;
7577	  else
7578	    length += 12;
7579	}
7580    }
7581
7582  return length;
7583}
7584
7585/* Return the attribute length for the millicode call instruction INSN.
7586   The length must match the code generated by pa_output_millicode_call.
7587   We include the delay slot in the returned length as it is better to
7588   over estimate the length than to under estimate it.  */
7589
7590int
7591pa_attr_length_millicode_call (rtx_insn *insn)
7592{
7593  unsigned long distance = -1;
7594  unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7595
7596  if (INSN_ADDRESSES_SET_P ())
7597    {
7598      distance = (total + insn_current_reference_address (insn));
7599      if (distance < total)
7600	distance = -1;
7601    }
7602
7603  if (TARGET_64BIT)
7604    {
7605      if (!TARGET_LONG_CALLS && distance < 7600000)
7606	return 8;
7607
7608      return 20;
7609    }
7610  else if (TARGET_PORTABLE_RUNTIME)
7611    return 24;
7612  else
7613    {
7614      if (!TARGET_LONG_CALLS && distance < MAX_PCREL17F_OFFSET)
7615	return 8;
7616
7617      if (!flag_pic)
7618	return 12;
7619
7620      return 24;
7621    }
7622}
7623
7624/* INSN is a function call.
7625
7626   CALL_DEST is the routine we are calling.  */
7627
7628const char *
7629pa_output_millicode_call (rtx_insn *insn, rtx call_dest)
7630{
7631  int attr_length = get_attr_length (insn);
7632  int seq_length = dbr_sequence_length ();
7633  rtx xoperands[3];
7634
7635  xoperands[0] = call_dest;
7636  xoperands[2] = gen_rtx_REG (Pmode, TARGET_64BIT ? 2 : 31);
7637
7638  /* Handle the common case where we are sure that the branch will
7639     reach the beginning of the $CODE$ subspace.  The within reach
7640     form of the $$sh_func_adrs call has a length of 28.  Because it
7641     has an attribute type of sh_func_adrs, it never has a nonzero
7642     sequence length (i.e., the delay slot is never filled).  */
7643  if (!TARGET_LONG_CALLS
7644      && (attr_length == 8
7645	  || (attr_length == 28
7646	      && get_attr_type (insn) == TYPE_SH_FUNC_ADRS)))
7647    {
7648      output_asm_insn ("{bl|b,l} %0,%2", xoperands);
7649    }
7650  else
7651    {
7652      if (TARGET_64BIT)
7653	{
7654	  /* It might seem that one insn could be saved by accessing
7655	     the millicode function using the linkage table.  However,
7656	     this doesn't work in shared libraries and other dynamically
7657	     loaded objects.  Using a pc-relative sequence also avoids
7658	     problems related to the implicit use of the gp register.  */
7659	  output_asm_insn ("b,l .+8,%%r1", xoperands);
7660
7661	  if (TARGET_GAS)
7662	    {
7663	      output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands);
7664	      output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
7665	    }
7666	  else
7667	    {
7668	      xoperands[1] = gen_label_rtx ();
7669	      output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
7670	      targetm.asm_out.internal_label (asm_out_file, "L",
7671					 CODE_LABEL_NUMBER (xoperands[1]));
7672	      output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
7673	    }
7674
7675	  output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
7676	}
7677      else if (TARGET_PORTABLE_RUNTIME)
7678	{
7679	  /* Pure portable runtime doesn't allow be/ble; we also don't
7680	     have PIC support in the assembler/linker, so this sequence
7681	     is needed.  */
7682
7683	  /* Get the address of our target into %r1.  */
7684	  output_asm_insn ("ldil L'%0,%%r1", xoperands);
7685	  output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
7686
7687	  /* Get our return address into %r31.  */
7688	  output_asm_insn ("{bl|b,l} .+8,%%r31", xoperands);
7689	  output_asm_insn ("addi 8,%%r31,%%r31", xoperands);
7690
7691	  /* Jump to our target address in %r1.  */
7692	  output_asm_insn ("bv %%r0(%%r1)", xoperands);
7693	}
7694      else if (!flag_pic)
7695	{
7696	  output_asm_insn ("ldil L'%0,%%r1", xoperands);
7697	  if (TARGET_PA_20)
7698	    output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31", xoperands);
7699	  else
7700	    output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
7701	}
7702      else
7703	{
7704	  output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7705	  output_asm_insn ("addi 16,%%r1,%%r31", xoperands);
7706
7707	  if (TARGET_SOM || !TARGET_GAS)
7708	    {
7709	      /* The HP assembler can generate relocations for the
7710		 difference of two symbols.  GAS can do this for a
7711		 millicode symbol but not an arbitrary external
7712		 symbol when generating SOM output.  */
7713	      xoperands[1] = gen_label_rtx ();
7714	      targetm.asm_out.internal_label (asm_out_file, "L",
7715					 CODE_LABEL_NUMBER (xoperands[1]));
7716	      output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
7717	      output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
7718	    }
7719	  else
7720	    {
7721	      output_asm_insn ("addil L'%0-$PIC_pcrel$0+8,%%r1", xoperands);
7722	      output_asm_insn ("ldo R'%0-$PIC_pcrel$0+12(%%r1),%%r1",
7723			       xoperands);
7724	    }
7725
7726	  /* Jump to our target address in %r1.  */
7727	  output_asm_insn ("bv %%r0(%%r1)", xoperands);
7728	}
7729    }
7730
7731  if (seq_length == 0)
7732    output_asm_insn ("nop", xoperands);
7733
7734  return "";
7735}
7736
7737/* Return the attribute length of the call instruction INSN.  The SIBCALL
7738   flag indicates whether INSN is a regular call or a sibling call.  The
7739   length returned must be longer than the code actually generated by
7740   pa_output_call.  Since branch shortening is done before delay branch
7741   sequencing, there is no way to determine whether or not the delay
7742   slot will be filled during branch shortening.  Even when the delay
7743   slot is filled, we may have to add a nop if the delay slot contains
7744   a branch that can't reach its target.  Thus, we always have to include
7745   the delay slot in the length estimate.  This used to be done in
7746   pa_adjust_insn_length but we do it here now as some sequences always
7747   fill the delay slot and we can save four bytes in the estimate for
7748   these sequences.  */
7749
7750int
7751pa_attr_length_call (rtx_insn *insn, int sibcall)
7752{
7753  int local_call;
7754  rtx call, call_dest;
7755  tree call_decl;
7756  int length = 0;
7757  rtx pat = PATTERN (insn);
7758  unsigned long distance = -1;
7759
7760  gcc_assert (CALL_P (insn));
7761
7762  if (INSN_ADDRESSES_SET_P ())
7763    {
7764      unsigned long total;
7765
7766      total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7767      distance = (total + insn_current_reference_address (insn));
7768      if (distance < total)
7769	distance = -1;
7770    }
7771
7772  gcc_assert (GET_CODE (pat) == PARALLEL);
7773
7774  /* Get the call rtx.  */
7775  call = XVECEXP (pat, 0, 0);
7776  if (GET_CODE (call) == SET)
7777    call = SET_SRC (call);
7778
7779  gcc_assert (GET_CODE (call) == CALL);
7780
7781  /* Determine if this is a local call.  */
7782  call_dest = XEXP (XEXP (call, 0), 0);
7783  call_decl = SYMBOL_REF_DECL (call_dest);
7784  local_call = call_decl && targetm.binds_local_p (call_decl);
7785
7786  /* pc-relative branch.  */
7787  if (!TARGET_LONG_CALLS
7788      && ((TARGET_PA_20 && !sibcall && distance < 7600000)
7789	  || distance < MAX_PCREL17F_OFFSET))
7790    length += 8;
7791
7792  /* 64-bit plabel sequence.  */
7793  else if (TARGET_64BIT && !local_call)
7794    length += sibcall ? 28 : 24;
7795
7796  /* non-pic long absolute branch sequence.  */
7797  else if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7798    length += 12;
7799
7800  /* long pc-relative branch sequence.  */
7801  else if (TARGET_LONG_PIC_SDIFF_CALL
7802	   || (TARGET_GAS && !TARGET_SOM
7803	       && (TARGET_LONG_PIC_PCREL_CALL || local_call)))
7804    {
7805      length += 20;
7806
7807      if (!TARGET_PA_20 && !TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
7808	length += 8;
7809    }
7810
7811  /* 32-bit plabel sequence.  */
7812  else
7813    {
7814      length += 32;
7815
7816      if (TARGET_SOM)
7817	length += length_fp_args (insn);
7818
7819      if (flag_pic)
7820	length += 4;
7821
7822      if (!TARGET_PA_20)
7823	{
7824	  if (!sibcall)
7825	    length += 8;
7826
7827	  if (!TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
7828	    length += 8;
7829	}
7830    }
7831
7832  return length;
7833}
7834
7835/* INSN is a function call.
7836
7837   CALL_DEST is the routine we are calling.  */
7838
7839const char *
7840pa_output_call (rtx_insn *insn, rtx call_dest, int sibcall)
7841{
7842  int seq_length = dbr_sequence_length ();
7843  tree call_decl = SYMBOL_REF_DECL (call_dest);
7844  int local_call = call_decl && targetm.binds_local_p (call_decl);
7845  rtx xoperands[2];
7846
7847  xoperands[0] = call_dest;
7848
7849  /* Handle the common case where we're sure that the branch will reach
7850     the beginning of the "$CODE$" subspace.  This is the beginning of
7851     the current function if we are in a named section.  */
7852  if (!TARGET_LONG_CALLS && pa_attr_length_call (insn, sibcall) == 8)
7853    {
7854      xoperands[1] = gen_rtx_REG (word_mode, sibcall ? 0 : 2);
7855      output_asm_insn ("{bl|b,l} %0,%1", xoperands);
7856    }
7857  else
7858    {
7859      if (TARGET_64BIT && !local_call)
7860	{
7861	  /* ??? As far as I can tell, the HP linker doesn't support the
7862	     long pc-relative sequence described in the 64-bit runtime
7863	     architecture.  So, we use a slightly longer indirect call.  */
7864	  xoperands[0] = pa_get_deferred_plabel (call_dest);
7865	  xoperands[1] = gen_label_rtx ();
7866
7867	  /* If this isn't a sibcall, we put the load of %r27 into the
7868	     delay slot.  We can't do this in a sibcall as we don't
7869	     have a second call-clobbered scratch register available.
7870	     We don't need to do anything when generating fast indirect
7871	     calls.  */
7872	  if (seq_length != 0 && !sibcall)
7873	    {
7874	      final_scan_insn (NEXT_INSN (insn), asm_out_file,
7875			       optimize, 0, NULL);
7876
7877	      /* Now delete the delay insn.  */
7878	      SET_INSN_DELETED (NEXT_INSN (insn));
7879	      seq_length = 0;
7880	    }
7881
7882	  output_asm_insn ("addil LT'%0,%%r27", xoperands);
7883	  output_asm_insn ("ldd RT'%0(%%r1),%%r1", xoperands);
7884	  output_asm_insn ("ldd 0(%%r1),%%r1", xoperands);
7885
7886	  if (sibcall)
7887	    {
7888	      output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
7889	      output_asm_insn ("ldd 16(%%r1),%%r1", xoperands);
7890	      output_asm_insn ("bve (%%r1)", xoperands);
7891	    }
7892	  else
7893	    {
7894	      output_asm_insn ("ldd 16(%%r1),%%r2", xoperands);
7895	      output_asm_insn ("bve,l (%%r2),%%r2", xoperands);
7896	      output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
7897	      seq_length = 1;
7898	    }
7899	}
7900      else
7901	{
7902	  int indirect_call = 0;
7903
7904	  /* Emit a long call.  There are several different sequences
7905	     of increasing length and complexity.  In most cases,
7906             they don't allow an instruction in the delay slot.  */
7907	  if (!((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7908	      && !TARGET_LONG_PIC_SDIFF_CALL
7909	      && !(TARGET_GAS && !TARGET_SOM
7910		   && (TARGET_LONG_PIC_PCREL_CALL || local_call))
7911	      && !TARGET_64BIT)
7912	    indirect_call = 1;
7913
7914	  if (seq_length != 0
7915	      && !sibcall
7916	      && (!TARGET_PA_20
7917		  || indirect_call
7918		  || ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)))
7919	    {
7920	      /* A non-jump insn in the delay slot.  By definition we can
7921		 emit this insn before the call (and in fact before argument
7922		 relocating.  */
7923	      final_scan_insn (NEXT_INSN (insn), asm_out_file, optimize, 0,
7924			       NULL);
7925
7926	      /* Now delete the delay insn.  */
7927	      SET_INSN_DELETED (NEXT_INSN (insn));
7928	      seq_length = 0;
7929	    }
7930
7931	  if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7932	    {
7933	      /* This is the best sequence for making long calls in
7934		 non-pic code.  Unfortunately, GNU ld doesn't provide
7935		 the stub needed for external calls, and GAS's support
7936		 for this with the SOM linker is buggy.  It is safe
7937		 to use this for local calls.  */
7938	      output_asm_insn ("ldil L'%0,%%r1", xoperands);
7939	      if (sibcall)
7940		output_asm_insn ("be R'%0(%%sr4,%%r1)", xoperands);
7941	      else
7942		{
7943		  if (TARGET_PA_20)
7944		    output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31",
7945				     xoperands);
7946		  else
7947		    output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
7948
7949		  output_asm_insn ("copy %%r31,%%r2", xoperands);
7950		  seq_length = 1;
7951		}
7952	    }
7953	  else
7954	    {
7955	      if (TARGET_LONG_PIC_SDIFF_CALL)
7956		{
7957		  /* The HP assembler and linker can handle relocations
7958		     for the difference of two symbols.  The HP assembler
7959		     recognizes the sequence as a pc-relative call and
7960		     the linker provides stubs when needed.  */
7961		  xoperands[1] = gen_label_rtx ();
7962		  output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7963		  output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
7964		  targetm.asm_out.internal_label (asm_out_file, "L",
7965					     CODE_LABEL_NUMBER (xoperands[1]));
7966		  output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
7967		}
7968	      else if (TARGET_GAS && !TARGET_SOM
7969		       && (TARGET_LONG_PIC_PCREL_CALL || local_call))
7970		{
7971		  /*  GAS currently can't generate the relocations that
7972		      are needed for the SOM linker under HP-UX using this
7973		      sequence.  The GNU linker doesn't generate the stubs
7974		      that are needed for external calls on TARGET_ELF32
7975		      with this sequence.  For now, we have to use a
7976		      longer plabel sequence when using GAS.  */
7977		  output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7978		  output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1",
7979				   xoperands);
7980		  output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1",
7981				   xoperands);
7982		}
7983	      else
7984		{
7985		  /* Emit a long plabel-based call sequence.  This is
7986		     essentially an inline implementation of $$dyncall.
7987		     We don't actually try to call $$dyncall as this is
7988		     as difficult as calling the function itself.  */
7989		  xoperands[0] = pa_get_deferred_plabel (call_dest);
7990		  xoperands[1] = gen_label_rtx ();
7991
7992		  /* Since the call is indirect, FP arguments in registers
7993		     need to be copied to the general registers.  Then, the
7994		     argument relocation stub will copy them back.  */
7995		  if (TARGET_SOM)
7996		    copy_fp_args (insn);
7997
7998		  if (flag_pic)
7999		    {
8000		      output_asm_insn ("addil LT'%0,%%r19", xoperands);
8001		      output_asm_insn ("ldw RT'%0(%%r1),%%r1", xoperands);
8002		      output_asm_insn ("ldw 0(%%r1),%%r1", xoperands);
8003		    }
8004		  else
8005		    {
8006		      output_asm_insn ("addil LR'%0-$global$,%%r27",
8007				       xoperands);
8008		      output_asm_insn ("ldw RR'%0-$global$(%%r1),%%r1",
8009				       xoperands);
8010		    }
8011
8012		  output_asm_insn ("bb,>=,n %%r1,30,.+16", xoperands);
8013		  output_asm_insn ("depi 0,31,2,%%r1", xoperands);
8014		  output_asm_insn ("ldw 4(%%sr0,%%r1),%%r19", xoperands);
8015		  output_asm_insn ("ldw 0(%%sr0,%%r1),%%r1", xoperands);
8016
8017		  if (!sibcall && !TARGET_PA_20)
8018		    {
8019		      output_asm_insn ("{bl|b,l} .+8,%%r2", xoperands);
8020		      if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
8021			output_asm_insn ("addi 8,%%r2,%%r2", xoperands);
8022		      else
8023			output_asm_insn ("addi 16,%%r2,%%r2", xoperands);
8024		    }
8025		}
8026
8027	      if (TARGET_PA_20)
8028		{
8029		  if (sibcall)
8030		    output_asm_insn ("bve (%%r1)", xoperands);
8031		  else
8032		    {
8033		      if (indirect_call)
8034			{
8035			  output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
8036			  output_asm_insn ("stw %%r2,-24(%%sp)", xoperands);
8037			  seq_length = 1;
8038			}
8039		      else
8040			output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
8041		    }
8042		}
8043	      else
8044		{
8045		  if (!TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
8046		    output_asm_insn ("ldsid (%%r1),%%r31\n\tmtsp %%r31,%%sr0",
8047				     xoperands);
8048
8049		  if (sibcall)
8050		    {
8051		      if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
8052			output_asm_insn ("be 0(%%sr4,%%r1)", xoperands);
8053		      else
8054			output_asm_insn ("be 0(%%sr0,%%r1)", xoperands);
8055		    }
8056		  else
8057		    {
8058		      if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
8059			output_asm_insn ("ble 0(%%sr4,%%r1)", xoperands);
8060		      else
8061			output_asm_insn ("ble 0(%%sr0,%%r1)", xoperands);
8062
8063		      if (indirect_call)
8064			output_asm_insn ("stw %%r31,-24(%%sp)", xoperands);
8065		      else
8066			output_asm_insn ("copy %%r31,%%r2", xoperands);
8067		      seq_length = 1;
8068		    }
8069		}
8070	    }
8071	}
8072    }
8073
8074  if (seq_length == 0)
8075    output_asm_insn ("nop", xoperands);
8076
8077  return "";
8078}
8079
8080/* Return the attribute length of the indirect call instruction INSN.
8081   The length must match the code generated by output_indirect call.
8082   The returned length includes the delay slot.  Currently, the delay
8083   slot of an indirect call sequence is not exposed and it is used by
8084   the sequence itself.  */
8085
8086int
8087pa_attr_length_indirect_call (rtx_insn *insn)
8088{
8089  unsigned long distance = -1;
8090  unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
8091
8092  if (INSN_ADDRESSES_SET_P ())
8093    {
8094      distance = (total + insn_current_reference_address (insn));
8095      if (distance < total)
8096	distance = -1;
8097    }
8098
8099  if (TARGET_64BIT)
8100    return 12;
8101
8102  if (TARGET_FAST_INDIRECT_CALLS
8103      || (!TARGET_LONG_CALLS
8104	  && !TARGET_PORTABLE_RUNTIME
8105	  && ((TARGET_PA_20 && !TARGET_SOM && distance < 7600000)
8106	      || distance < MAX_PCREL17F_OFFSET)))
8107    return 8;
8108
8109  if (flag_pic)
8110    return 20;
8111
8112  if (TARGET_PORTABLE_RUNTIME)
8113    return 16;
8114
8115  /* Out of reach, can use ble.  */
8116  return 12;
8117}
8118
8119const char *
8120pa_output_indirect_call (rtx_insn *insn, rtx call_dest)
8121{
8122  rtx xoperands[1];
8123
8124  if (TARGET_64BIT)
8125    {
8126      xoperands[0] = call_dest;
8127      output_asm_insn ("ldd 16(%0),%%r2", xoperands);
8128      output_asm_insn ("bve,l (%%r2),%%r2\n\tldd 24(%0),%%r27", xoperands);
8129      return "";
8130    }
8131
8132  /* First the special case for kernels, level 0 systems, etc.  */
8133  if (TARGET_FAST_INDIRECT_CALLS)
8134    return "ble 0(%%sr4,%%r22)\n\tcopy %%r31,%%r2";
8135
8136  /* Now the normal case -- we can reach $$dyncall directly or
8137     we're sure that we can get there via a long-branch stub.
8138
8139     No need to check target flags as the length uniquely identifies
8140     the remaining cases.  */
8141  if (pa_attr_length_indirect_call (insn) == 8)
8142    {
8143      /* The HP linker sometimes substitutes a BLE for BL/B,L calls to
8144	 $$dyncall.  Since BLE uses %r31 as the link register, the 22-bit
8145	 variant of the B,L instruction can't be used on the SOM target.  */
8146      if (TARGET_PA_20 && !TARGET_SOM)
8147	return ".CALL\tARGW0=GR\n\tb,l $$dyncall,%%r2\n\tcopy %%r2,%%r31";
8148      else
8149	return ".CALL\tARGW0=GR\n\tbl $$dyncall,%%r31\n\tcopy %%r31,%%r2";
8150    }
8151
8152  /* Long millicode call, but we are not generating PIC or portable runtime
8153     code.  */
8154  if (pa_attr_length_indirect_call (insn) == 12)
8155    return ".CALL\tARGW0=GR\n\tldil L'$$dyncall,%%r2\n\tble R'$$dyncall(%%sr4,%%r2)\n\tcopy %%r31,%%r2";
8156
8157  /* Long millicode call for portable runtime.  */
8158  if (pa_attr_length_indirect_call (insn) == 16)
8159    return "ldil L'$$dyncall,%%r31\n\tldo R'$$dyncall(%%r31),%%r31\n\tblr %%r0,%%r2\n\tbv,n %%r0(%%r31)";
8160
8161  /* We need a long PIC call to $$dyncall.  */
8162  xoperands[0] = NULL_RTX;
8163  output_asm_insn ("{bl|b,l} .+8,%%r2", xoperands);
8164  if (TARGET_SOM || !TARGET_GAS)
8165    {
8166      xoperands[0] = gen_label_rtx ();
8167      output_asm_insn ("addil L'$$dyncall-%0,%%r2", xoperands);
8168      targetm.asm_out.internal_label (asm_out_file, "L",
8169				      CODE_LABEL_NUMBER (xoperands[0]));
8170      output_asm_insn ("ldo R'$$dyncall-%0(%%r1),%%r1", xoperands);
8171    }
8172  else
8173    {
8174      output_asm_insn ("addil L'$$dyncall-$PIC_pcrel$0+4,%%r2", xoperands);
8175      output_asm_insn ("ldo R'$$dyncall-$PIC_pcrel$0+8(%%r1),%%r1",
8176		       xoperands);
8177    }
8178  output_asm_insn ("bv %%r0(%%r1)", xoperands);
8179  output_asm_insn ("ldo 12(%%r2),%%r2", xoperands);
8180  return "";
8181}
8182
8183/* In HPUX 8.0's shared library scheme, special relocations are needed
8184   for function labels if they might be passed to a function
8185   in a shared library (because shared libraries don't live in code
8186   space), and special magic is needed to construct their address.  */
8187
8188void
8189pa_encode_label (rtx sym)
8190{
8191  const char *str = XSTR (sym, 0);
8192  int len = strlen (str) + 1;
8193  char *newstr, *p;
8194
8195  p = newstr = XALLOCAVEC (char, len + 1);
8196  *p++ = '@';
8197  strcpy (p, str);
8198
8199  XSTR (sym, 0) = ggc_alloc_string (newstr, len);
8200}
8201
8202static void
8203pa_encode_section_info (tree decl, rtx rtl, int first)
8204{
8205  int old_referenced = 0;
8206
8207  if (!first && MEM_P (rtl) && GET_CODE (XEXP (rtl, 0)) == SYMBOL_REF)
8208    old_referenced
8209      = SYMBOL_REF_FLAGS (XEXP (rtl, 0)) & SYMBOL_FLAG_REFERENCED;
8210
8211  default_encode_section_info (decl, rtl, first);
8212
8213  if (first && TEXT_SPACE_P (decl))
8214    {
8215      SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
8216      if (TREE_CODE (decl) == FUNCTION_DECL)
8217	pa_encode_label (XEXP (rtl, 0));
8218    }
8219  else if (old_referenced)
8220    SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= old_referenced;
8221}
8222
8223/* This is sort of inverse to pa_encode_section_info.  */
8224
8225static const char *
8226pa_strip_name_encoding (const char *str)
8227{
8228  str += (*str == '@');
8229  str += (*str == '*');
8230  return str;
8231}
8232
8233/* Returns 1 if OP is a function label involved in a simple addition
8234   with a constant.  Used to keep certain patterns from matching
8235   during instruction combination.  */
8236int
8237pa_is_function_label_plus_const (rtx op)
8238{
8239  /* Strip off any CONST.  */
8240  if (GET_CODE (op) == CONST)
8241    op = XEXP (op, 0);
8242
8243  return (GET_CODE (op) == PLUS
8244	  && function_label_operand (XEXP (op, 0), VOIDmode)
8245	  && GET_CODE (XEXP (op, 1)) == CONST_INT);
8246}
8247
8248/* Output assembly code for a thunk to FUNCTION.  */
8249
8250static void
8251pa_asm_output_mi_thunk (FILE *file, tree thunk_fndecl, HOST_WIDE_INT delta,
8252			HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
8253			tree function)
8254{
8255  static unsigned int current_thunk_number;
8256  int val_14 = VAL_14_BITS_P (delta);
8257  unsigned int old_last_address = last_address, nbytes = 0;
8258  char label[16];
8259  rtx xoperands[4];
8260
8261  xoperands[0] = XEXP (DECL_RTL (function), 0);
8262  xoperands[1] = XEXP (DECL_RTL (thunk_fndecl), 0);
8263  xoperands[2] = GEN_INT (delta);
8264
8265  final_start_function (emit_barrier (), file, 1);
8266
8267  /* Output the thunk.  We know that the function is in the same
8268     translation unit (i.e., the same space) as the thunk, and that
8269     thunks are output after their method.  Thus, we don't need an
8270     external branch to reach the function.  With SOM and GAS,
8271     functions and thunks are effectively in different sections.
8272     Thus, we can always use a IA-relative branch and the linker
8273     will add a long branch stub if necessary.
8274
8275     However, we have to be careful when generating PIC code on the
8276     SOM port to ensure that the sequence does not transfer to an
8277     import stub for the target function as this could clobber the
8278     return value saved at SP-24.  This would also apply to the
8279     32-bit linux port if the multi-space model is implemented.  */
8280  if ((!TARGET_LONG_CALLS && TARGET_SOM && !TARGET_PORTABLE_RUNTIME
8281       && !(flag_pic && TREE_PUBLIC (function))
8282       && (TARGET_GAS || last_address < 262132))
8283      || (!TARGET_LONG_CALLS && !TARGET_SOM && !TARGET_PORTABLE_RUNTIME
8284	  && ((targetm_common.have_named_sections
8285	       && DECL_SECTION_NAME (thunk_fndecl) != NULL
8286	       /* The GNU 64-bit linker has rather poor stub management.
8287		  So, we use a long branch from thunks that aren't in
8288		  the same section as the target function.  */
8289	       && ((!TARGET_64BIT
8290		    && (DECL_SECTION_NAME (thunk_fndecl)
8291			!= DECL_SECTION_NAME (function)))
8292		   || ((DECL_SECTION_NAME (thunk_fndecl)
8293			== DECL_SECTION_NAME (function))
8294		       && last_address < 262132)))
8295	      /* In this case, we need to be able to reach the start of
8296		 the stub table even though the function is likely closer
8297		 and can be jumped to directly.  */
8298	      || (targetm_common.have_named_sections
8299		  && DECL_SECTION_NAME (thunk_fndecl) == NULL
8300		  && DECL_SECTION_NAME (function) == NULL
8301		  && total_code_bytes < MAX_PCREL17F_OFFSET)
8302	      /* Likewise.  */
8303	      || (!targetm_common.have_named_sections
8304		  && total_code_bytes < MAX_PCREL17F_OFFSET))))
8305    {
8306      if (!val_14)
8307	output_asm_insn ("addil L'%2,%%r26", xoperands);
8308
8309      output_asm_insn ("b %0", xoperands);
8310
8311      if (val_14)
8312	{
8313	  output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8314	  nbytes += 8;
8315	}
8316      else
8317	{
8318	  output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8319	  nbytes += 12;
8320	}
8321    }
8322  else if (TARGET_64BIT)
8323    {
8324      /* We only have one call-clobbered scratch register, so we can't
8325         make use of the delay slot if delta doesn't fit in 14 bits.  */
8326      if (!val_14)
8327	{
8328	  output_asm_insn ("addil L'%2,%%r26", xoperands);
8329	  output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8330	}
8331
8332      output_asm_insn ("b,l .+8,%%r1", xoperands);
8333
8334      if (TARGET_GAS)
8335	{
8336	  output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands);
8337	  output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
8338	}
8339      else
8340	{
8341	  xoperands[3] = GEN_INT (val_14 ? 8 : 16);
8342	  output_asm_insn ("addil L'%0-%1-%3,%%r1", xoperands);
8343	}
8344
8345      if (val_14)
8346	{
8347	  output_asm_insn ("bv %%r0(%%r1)", xoperands);
8348	  output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8349	  nbytes += 20;
8350	}
8351      else
8352	{
8353	  output_asm_insn ("bv,n %%r0(%%r1)", xoperands);
8354	  nbytes += 24;
8355	}
8356    }
8357  else if (TARGET_PORTABLE_RUNTIME)
8358    {
8359      output_asm_insn ("ldil L'%0,%%r1", xoperands);
8360      output_asm_insn ("ldo R'%0(%%r1),%%r22", xoperands);
8361
8362      if (!val_14)
8363	output_asm_insn ("addil L'%2,%%r26", xoperands);
8364
8365      output_asm_insn ("bv %%r0(%%r22)", xoperands);
8366
8367      if (val_14)
8368	{
8369	  output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8370	  nbytes += 16;
8371	}
8372      else
8373	{
8374	  output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8375	  nbytes += 20;
8376	}
8377    }
8378  else if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
8379    {
8380      /* The function is accessible from outside this module.  The only
8381	 way to avoid an import stub between the thunk and function is to
8382	 call the function directly with an indirect sequence similar to
8383	 that used by $$dyncall.  This is possible because $$dyncall acts
8384	 as the import stub in an indirect call.  */
8385      ASM_GENERATE_INTERNAL_LABEL (label, "LTHN", current_thunk_number);
8386      xoperands[3] = gen_rtx_SYMBOL_REF (Pmode, label);
8387      output_asm_insn ("addil LT'%3,%%r19", xoperands);
8388      output_asm_insn ("ldw RT'%3(%%r1),%%r22", xoperands);
8389      output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8390      output_asm_insn ("bb,>=,n %%r22,30,.+16", xoperands);
8391      output_asm_insn ("depi 0,31,2,%%r22", xoperands);
8392      output_asm_insn ("ldw 4(%%sr0,%%r22),%%r19", xoperands);
8393      output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8394
8395      if (!val_14)
8396	{
8397	  output_asm_insn ("addil L'%2,%%r26", xoperands);
8398	  nbytes += 4;
8399	}
8400
8401      if (TARGET_PA_20)
8402	{
8403	  output_asm_insn ("bve (%%r22)", xoperands);
8404	  nbytes += 36;
8405	}
8406      else if (TARGET_NO_SPACE_REGS)
8407	{
8408	  output_asm_insn ("be 0(%%sr4,%%r22)", xoperands);
8409	  nbytes += 36;
8410	}
8411      else
8412	{
8413	  output_asm_insn ("ldsid (%%sr0,%%r22),%%r21", xoperands);
8414	  output_asm_insn ("mtsp %%r21,%%sr0", xoperands);
8415	  output_asm_insn ("be 0(%%sr0,%%r22)", xoperands);
8416	  nbytes += 44;
8417	}
8418
8419      if (val_14)
8420	output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8421      else
8422	output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8423    }
8424  else if (flag_pic)
8425    {
8426      output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
8427
8428      if (TARGET_SOM || !TARGET_GAS)
8429	{
8430	  output_asm_insn ("addil L'%0-%1-8,%%r1", xoperands);
8431	  output_asm_insn ("ldo R'%0-%1-8(%%r1),%%r22", xoperands);
8432	}
8433      else
8434	{
8435	  output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands);
8436	  output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r22", xoperands);
8437	}
8438
8439      if (!val_14)
8440	output_asm_insn ("addil L'%2,%%r26", xoperands);
8441
8442      output_asm_insn ("bv %%r0(%%r22)", xoperands);
8443
8444      if (val_14)
8445	{
8446	  output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8447	  nbytes += 20;
8448	}
8449      else
8450	{
8451	  output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8452	  nbytes += 24;
8453	}
8454    }
8455  else
8456    {
8457      if (!val_14)
8458	output_asm_insn ("addil L'%2,%%r26", xoperands);
8459
8460      output_asm_insn ("ldil L'%0,%%r22", xoperands);
8461      output_asm_insn ("be R'%0(%%sr4,%%r22)", xoperands);
8462
8463      if (val_14)
8464	{
8465	  output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8466	  nbytes += 12;
8467	}
8468      else
8469	{
8470	  output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8471	  nbytes += 16;
8472	}
8473    }
8474
8475  final_end_function ();
8476
8477  if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
8478    {
8479      switch_to_section (data_section);
8480      output_asm_insn (".align 4", xoperands);
8481      ASM_OUTPUT_LABEL (file, label);
8482      output_asm_insn (".word P'%0", xoperands);
8483    }
8484
8485  current_thunk_number++;
8486  nbytes = ((nbytes + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
8487	    & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
8488  last_address += nbytes;
8489  if (old_last_address > last_address)
8490    last_address = UINT_MAX;
8491  update_total_code_bytes (nbytes);
8492}
8493
8494/* Only direct calls to static functions are allowed to be sibling (tail)
8495   call optimized.
8496
8497   This restriction is necessary because some linker generated stubs will
8498   store return pointers into rp' in some cases which might clobber a
8499   live value already in rp'.
8500
8501   In a sibcall the current function and the target function share stack
8502   space.  Thus if the path to the current function and the path to the
8503   target function save a value in rp', they save the value into the
8504   same stack slot, which has undesirable consequences.
8505
8506   Because of the deferred binding nature of shared libraries any function
8507   with external scope could be in a different load module and thus require
8508   rp' to be saved when calling that function.  So sibcall optimizations
8509   can only be safe for static function.
8510
8511   Note that GCC never needs return value relocations, so we don't have to
8512   worry about static calls with return value relocations (which require
8513   saving rp').
8514
8515   It is safe to perform a sibcall optimization when the target function
8516   will never return.  */
8517static bool
8518pa_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
8519{
8520  if (TARGET_PORTABLE_RUNTIME)
8521    return false;
8522
8523  /* Sibcalls are not ok because the arg pointer register is not a fixed
8524     register.  This prevents the sibcall optimization from occurring.  In
8525     addition, there are problems with stub placement using GNU ld.  This
8526     is because a normal sibcall branch uses a 17-bit relocation while
8527     a regular call branch uses a 22-bit relocation.  As a result, more
8528     care needs to be taken in the placement of long-branch stubs.  */
8529  if (TARGET_64BIT)
8530    return false;
8531
8532  /* Sibcalls are only ok within a translation unit.  */
8533  return (decl && !TREE_PUBLIC (decl));
8534}
8535
8536/* ??? Addition is not commutative on the PA due to the weird implicit
8537   space register selection rules for memory addresses.  Therefore, we
8538   don't consider a + b == b + a, as this might be inside a MEM.  */
8539static bool
8540pa_commutative_p (const_rtx x, int outer_code)
8541{
8542  return (COMMUTATIVE_P (x)
8543	  && (TARGET_NO_SPACE_REGS
8544	      || (outer_code != UNKNOWN && outer_code != MEM)
8545	      || GET_CODE (x) != PLUS));
8546}
8547
8548/* Returns 1 if the 6 operands specified in OPERANDS are suitable for
8549   use in fmpyadd instructions.  */
8550int
8551pa_fmpyaddoperands (rtx *operands)
8552{
8553  machine_mode mode = GET_MODE (operands[0]);
8554
8555  /* Must be a floating point mode.  */
8556  if (mode != SFmode && mode != DFmode)
8557    return 0;
8558
8559  /* All modes must be the same.  */
8560  if (! (mode == GET_MODE (operands[1])
8561	 && mode == GET_MODE (operands[2])
8562	 && mode == GET_MODE (operands[3])
8563	 && mode == GET_MODE (operands[4])
8564	 && mode == GET_MODE (operands[5])))
8565    return 0;
8566
8567  /* All operands must be registers.  */
8568  if (! (GET_CODE (operands[1]) == REG
8569	 && GET_CODE (operands[2]) == REG
8570	 && GET_CODE (operands[3]) == REG
8571	 && GET_CODE (operands[4]) == REG
8572	 && GET_CODE (operands[5]) == REG))
8573    return 0;
8574
8575  /* Only 2 real operands to the addition.  One of the input operands must
8576     be the same as the output operand.  */
8577  if (! rtx_equal_p (operands[3], operands[4])
8578      && ! rtx_equal_p (operands[3], operands[5]))
8579    return 0;
8580
8581  /* Inout operand of add cannot conflict with any operands from multiply.  */
8582  if (rtx_equal_p (operands[3], operands[0])
8583     || rtx_equal_p (operands[3], operands[1])
8584     || rtx_equal_p (operands[3], operands[2]))
8585    return 0;
8586
8587  /* multiply cannot feed into addition operands.  */
8588  if (rtx_equal_p (operands[4], operands[0])
8589      || rtx_equal_p (operands[5], operands[0]))
8590    return 0;
8591
8592  /* SFmode limits the registers to the upper 32 of the 32bit FP regs.  */
8593  if (mode == SFmode
8594      && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
8595	  || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
8596	  || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
8597	  || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
8598	  || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
8599	  || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
8600    return 0;
8601
8602  /* Passed.  Operands are suitable for fmpyadd.  */
8603  return 1;
8604}
8605
8606#if !defined(USE_COLLECT2)
8607static void
8608pa_asm_out_constructor (rtx symbol, int priority)
8609{
8610  if (!function_label_operand (symbol, VOIDmode))
8611    pa_encode_label (symbol);
8612
8613#ifdef CTORS_SECTION_ASM_OP
8614  default_ctor_section_asm_out_constructor (symbol, priority);
8615#else
8616# ifdef TARGET_ASM_NAMED_SECTION
8617  default_named_section_asm_out_constructor (symbol, priority);
8618# else
8619  default_stabs_asm_out_constructor (symbol, priority);
8620# endif
8621#endif
8622}
8623
8624static void
8625pa_asm_out_destructor (rtx symbol, int priority)
8626{
8627  if (!function_label_operand (symbol, VOIDmode))
8628    pa_encode_label (symbol);
8629
8630#ifdef DTORS_SECTION_ASM_OP
8631  default_dtor_section_asm_out_destructor (symbol, priority);
8632#else
8633# ifdef TARGET_ASM_NAMED_SECTION
8634  default_named_section_asm_out_destructor (symbol, priority);
8635# else
8636  default_stabs_asm_out_destructor (symbol, priority);
8637# endif
8638#endif
8639}
8640#endif
8641
8642/* This function places uninitialized global data in the bss section.
8643   The ASM_OUTPUT_ALIGNED_BSS macro needs to be defined to call this
8644   function on the SOM port to prevent uninitialized global data from
8645   being placed in the data section.  */
8646
8647void
8648pa_asm_output_aligned_bss (FILE *stream,
8649			   const char *name,
8650			   unsigned HOST_WIDE_INT size,
8651			   unsigned int align)
8652{
8653  switch_to_section (bss_section);
8654  fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8655
8656#ifdef ASM_OUTPUT_TYPE_DIRECTIVE
8657  ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "object");
8658#endif
8659
8660#ifdef ASM_OUTPUT_SIZE_DIRECTIVE
8661  ASM_OUTPUT_SIZE_DIRECTIVE (stream, name, size);
8662#endif
8663
8664  fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8665  ASM_OUTPUT_LABEL (stream, name);
8666  fprintf (stream, "\t.block "HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
8667}
8668
8669/* Both the HP and GNU assemblers under HP-UX provide a .comm directive
8670   that doesn't allow the alignment of global common storage to be directly
8671   specified.  The SOM linker aligns common storage based on the rounded
8672   value of the NUM_BYTES parameter in the .comm directive.  It's not
8673   possible to use the .align directive as it doesn't affect the alignment
8674   of the label associated with a .comm directive.  */
8675
8676void
8677pa_asm_output_aligned_common (FILE *stream,
8678			      const char *name,
8679			      unsigned HOST_WIDE_INT size,
8680			      unsigned int align)
8681{
8682  unsigned int max_common_align;
8683
8684  max_common_align = TARGET_64BIT ? 128 : (size >= 4096 ? 256 : 64);
8685  if (align > max_common_align)
8686    {
8687      warning (0, "alignment (%u) for %s exceeds maximum alignment "
8688	       "for global common data.  Using %u",
8689	       align / BITS_PER_UNIT, name, max_common_align / BITS_PER_UNIT);
8690      align = max_common_align;
8691    }
8692
8693  switch_to_section (bss_section);
8694
8695  assemble_name (stream, name);
8696  fprintf (stream, "\t.comm "HOST_WIDE_INT_PRINT_UNSIGNED"\n",
8697           MAX (size, align / BITS_PER_UNIT));
8698}
8699
8700/* We can't use .comm for local common storage as the SOM linker effectively
8701   treats the symbol as universal and uses the same storage for local symbols
8702   with the same name in different object files.  The .block directive
8703   reserves an uninitialized block of storage.  However, it's not common
8704   storage.  Fortunately, GCC never requests common storage with the same
8705   name in any given translation unit.  */
8706
8707void
8708pa_asm_output_aligned_local (FILE *stream,
8709			     const char *name,
8710			     unsigned HOST_WIDE_INT size,
8711			     unsigned int align)
8712{
8713  switch_to_section (bss_section);
8714  fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8715
8716#ifdef LOCAL_ASM_OP
8717  fprintf (stream, "%s", LOCAL_ASM_OP);
8718  assemble_name (stream, name);
8719  fprintf (stream, "\n");
8720#endif
8721
8722  ASM_OUTPUT_LABEL (stream, name);
8723  fprintf (stream, "\t.block "HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
8724}
8725
8726/* Returns 1 if the 6 operands specified in OPERANDS are suitable for
8727   use in fmpysub instructions.  */
8728int
8729pa_fmpysuboperands (rtx *operands)
8730{
8731  machine_mode mode = GET_MODE (operands[0]);
8732
8733  /* Must be a floating point mode.  */
8734  if (mode != SFmode && mode != DFmode)
8735    return 0;
8736
8737  /* All modes must be the same.  */
8738  if (! (mode == GET_MODE (operands[1])
8739	 && mode == GET_MODE (operands[2])
8740	 && mode == GET_MODE (operands[3])
8741	 && mode == GET_MODE (operands[4])
8742	 && mode == GET_MODE (operands[5])))
8743    return 0;
8744
8745  /* All operands must be registers.  */
8746  if (! (GET_CODE (operands[1]) == REG
8747	 && GET_CODE (operands[2]) == REG
8748	 && GET_CODE (operands[3]) == REG
8749	 && GET_CODE (operands[4]) == REG
8750	 && GET_CODE (operands[5]) == REG))
8751    return 0;
8752
8753  /* Only 2 real operands to the subtraction.  Subtraction is not a commutative
8754     operation, so operands[4] must be the same as operand[3].  */
8755  if (! rtx_equal_p (operands[3], operands[4]))
8756    return 0;
8757
8758  /* multiply cannot feed into subtraction.  */
8759  if (rtx_equal_p (operands[5], operands[0]))
8760    return 0;
8761
8762  /* Inout operand of sub cannot conflict with any operands from multiply.  */
8763  if (rtx_equal_p (operands[3], operands[0])
8764     || rtx_equal_p (operands[3], operands[1])
8765     || rtx_equal_p (operands[3], operands[2]))
8766    return 0;
8767
8768  /* SFmode limits the registers to the upper 32 of the 32bit FP regs.  */
8769  if (mode == SFmode
8770      && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
8771	  || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
8772	  || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
8773	  || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
8774	  || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
8775	  || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
8776    return 0;
8777
8778  /* Passed.  Operands are suitable for fmpysub.  */
8779  return 1;
8780}
8781
8782/* Return 1 if the given constant is 2, 4, or 8.  These are the valid
8783   constants for shadd instructions.  */
8784int
8785pa_shadd_constant_p (int val)
8786{
8787  if (val == 2 || val == 4 || val == 8)
8788    return 1;
8789  else
8790    return 0;
8791}
8792
8793/* Return TRUE if INSN branches forward.  */
8794
8795static bool
8796forward_branch_p (rtx_insn *insn)
8797{
8798  rtx lab = JUMP_LABEL (insn);
8799
8800  /* The INSN must have a jump label.  */
8801  gcc_assert (lab != NULL_RTX);
8802
8803  if (INSN_ADDRESSES_SET_P ())
8804    return INSN_ADDRESSES (INSN_UID (lab)) > INSN_ADDRESSES (INSN_UID (insn));
8805
8806  while (insn)
8807    {
8808      if (insn == lab)
8809	return true;
8810      else
8811	insn = NEXT_INSN (insn);
8812    }
8813
8814  return false;
8815}
8816
8817/* Output an unconditional move and branch insn.  */
8818
8819const char *
8820pa_output_parallel_movb (rtx *operands, rtx_insn *insn)
8821{
8822  int length = get_attr_length (insn);
8823
8824  /* These are the cases in which we win.  */
8825  if (length == 4)
8826    return "mov%I1b,tr %1,%0,%2";
8827
8828  /* None of the following cases win, but they don't lose either.  */
8829  if (length == 8)
8830    {
8831      if (dbr_sequence_length () == 0)
8832	{
8833	  /* Nothing in the delay slot, fake it by putting the combined
8834	     insn (the copy or add) in the delay slot of a bl.  */
8835	  if (GET_CODE (operands[1]) == CONST_INT)
8836	    return "b %2\n\tldi %1,%0";
8837	  else
8838	    return "b %2\n\tcopy %1,%0";
8839	}
8840      else
8841	{
8842	  /* Something in the delay slot, but we've got a long branch.  */
8843	  if (GET_CODE (operands[1]) == CONST_INT)
8844	    return "ldi %1,%0\n\tb %2";
8845	  else
8846	    return "copy %1,%0\n\tb %2";
8847	}
8848    }
8849
8850  if (GET_CODE (operands[1]) == CONST_INT)
8851    output_asm_insn ("ldi %1,%0", operands);
8852  else
8853    output_asm_insn ("copy %1,%0", operands);
8854  return pa_output_lbranch (operands[2], insn, 1);
8855}
8856
8857/* Output an unconditional add and branch insn.  */
8858
8859const char *
8860pa_output_parallel_addb (rtx *operands, rtx_insn *insn)
8861{
8862  int length = get_attr_length (insn);
8863
8864  /* To make life easy we want operand0 to be the shared input/output
8865     operand and operand1 to be the readonly operand.  */
8866  if (operands[0] == operands[1])
8867    operands[1] = operands[2];
8868
8869  /* These are the cases in which we win.  */
8870  if (length == 4)
8871    return "add%I1b,tr %1,%0,%3";
8872
8873  /* None of the following cases win, but they don't lose either.  */
8874  if (length == 8)
8875    {
8876      if (dbr_sequence_length () == 0)
8877	/* Nothing in the delay slot, fake it by putting the combined
8878	   insn (the copy or add) in the delay slot of a bl.  */
8879	return "b %3\n\tadd%I1 %1,%0,%0";
8880      else
8881	/* Something in the delay slot, but we've got a long branch.  */
8882	return "add%I1 %1,%0,%0\n\tb %3";
8883    }
8884
8885  output_asm_insn ("add%I1 %1,%0,%0", operands);
8886  return pa_output_lbranch (operands[3], insn, 1);
8887}
8888
8889/* We use this hook to perform a PA specific optimization which is difficult
8890   to do in earlier passes.  */
8891
8892static void
8893pa_reorg (void)
8894{
8895  remove_useless_addtr_insns (1);
8896
8897  if (pa_cpu < PROCESSOR_8000)
8898    pa_combine_instructions ();
8899}
8900
8901/* The PA has a number of odd instructions which can perform multiple
8902   tasks at once.  On first generation PA machines (PA1.0 and PA1.1)
8903   it may be profitable to combine two instructions into one instruction
8904   with two outputs.  It's not profitable PA2.0 machines because the
8905   two outputs would take two slots in the reorder buffers.
8906
8907   This routine finds instructions which can be combined and combines
8908   them.  We only support some of the potential combinations, and we
8909   only try common ways to find suitable instructions.
8910
8911      * addb can add two registers or a register and a small integer
8912      and jump to a nearby (+-8k) location.  Normally the jump to the
8913      nearby location is conditional on the result of the add, but by
8914      using the "true" condition we can make the jump unconditional.
8915      Thus addb can perform two independent operations in one insn.
8916
8917      * movb is similar to addb in that it can perform a reg->reg
8918      or small immediate->reg copy and jump to a nearby (+-8k location).
8919
8920      * fmpyadd and fmpysub can perform a FP multiply and either an
8921      FP add or FP sub if the operands of the multiply and add/sub are
8922      independent (there are other minor restrictions).  Note both
8923      the fmpy and fadd/fsub can in theory move to better spots according
8924      to data dependencies, but for now we require the fmpy stay at a
8925      fixed location.
8926
8927      * Many of the memory operations can perform pre & post updates
8928      of index registers.  GCC's pre/post increment/decrement addressing
8929      is far too simple to take advantage of all the possibilities.  This
8930      pass may not be suitable since those insns may not be independent.
8931
8932      * comclr can compare two ints or an int and a register, nullify
8933      the following instruction and zero some other register.  This
8934      is more difficult to use as it's harder to find an insn which
8935      will generate a comclr than finding something like an unconditional
8936      branch.  (conditional moves & long branches create comclr insns).
8937
8938      * Most arithmetic operations can conditionally skip the next
8939      instruction.  They can be viewed as "perform this operation
8940      and conditionally jump to this nearby location" (where nearby
8941      is an insns away).  These are difficult to use due to the
8942      branch length restrictions.  */
8943
8944static void
8945pa_combine_instructions (void)
8946{
8947  rtx_insn *anchor;
8948
8949  /* This can get expensive since the basic algorithm is on the
8950     order of O(n^2) (or worse).  Only do it for -O2 or higher
8951     levels of optimization.  */
8952  if (optimize < 2)
8953    return;
8954
8955  /* Walk down the list of insns looking for "anchor" insns which
8956     may be combined with "floating" insns.  As the name implies,
8957     "anchor" instructions don't move, while "floating" insns may
8958     move around.  */
8959  rtx par = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, NULL_RTX, NULL_RTX));
8960  rtx_insn *new_rtx = make_insn_raw (par);
8961
8962  for (anchor = get_insns (); anchor; anchor = NEXT_INSN (anchor))
8963    {
8964      enum attr_pa_combine_type anchor_attr;
8965      enum attr_pa_combine_type floater_attr;
8966
8967      /* We only care about INSNs, JUMP_INSNs, and CALL_INSNs.
8968	 Also ignore any special USE insns.  */
8969      if ((! NONJUMP_INSN_P (anchor) && ! JUMP_P (anchor) && ! CALL_P (anchor))
8970	  || GET_CODE (PATTERN (anchor)) == USE
8971	  || GET_CODE (PATTERN (anchor)) == CLOBBER)
8972	continue;
8973
8974      anchor_attr = get_attr_pa_combine_type (anchor);
8975      /* See if anchor is an insn suitable for combination.  */
8976      if (anchor_attr == PA_COMBINE_TYPE_FMPY
8977	  || anchor_attr == PA_COMBINE_TYPE_FADDSUB
8978	  || (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
8979	      && ! forward_branch_p (anchor)))
8980	{
8981	  rtx_insn *floater;
8982
8983	  for (floater = PREV_INSN (anchor);
8984	       floater;
8985	       floater = PREV_INSN (floater))
8986	    {
8987	      if (NOTE_P (floater)
8988		  || (NONJUMP_INSN_P (floater)
8989		      && (GET_CODE (PATTERN (floater)) == USE
8990			  || GET_CODE (PATTERN (floater)) == CLOBBER)))
8991		continue;
8992
8993	      /* Anything except a regular INSN will stop our search.  */
8994	      if (! NONJUMP_INSN_P (floater))
8995		{
8996		  floater = NULL;
8997		  break;
8998		}
8999
9000	      /* See if FLOATER is suitable for combination with the
9001		 anchor.  */
9002	      floater_attr = get_attr_pa_combine_type (floater);
9003	      if ((anchor_attr == PA_COMBINE_TYPE_FMPY
9004		   && floater_attr == PA_COMBINE_TYPE_FADDSUB)
9005		  || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9006		      && floater_attr == PA_COMBINE_TYPE_FMPY))
9007		{
9008		  /* If ANCHOR and FLOATER can be combined, then we're
9009		     done with this pass.  */
9010		  if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9011					SET_DEST (PATTERN (floater)),
9012					XEXP (SET_SRC (PATTERN (floater)), 0),
9013					XEXP (SET_SRC (PATTERN (floater)), 1)))
9014		    break;
9015		}
9016
9017	      else if (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
9018		       && floater_attr == PA_COMBINE_TYPE_ADDMOVE)
9019		{
9020		  if (GET_CODE (SET_SRC (PATTERN (floater))) == PLUS)
9021		    {
9022		      if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9023					    SET_DEST (PATTERN (floater)),
9024					XEXP (SET_SRC (PATTERN (floater)), 0),
9025					XEXP (SET_SRC (PATTERN (floater)), 1)))
9026			break;
9027		    }
9028		  else
9029		    {
9030		      if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9031					    SET_DEST (PATTERN (floater)),
9032					    SET_SRC (PATTERN (floater)),
9033					    SET_SRC (PATTERN (floater))))
9034			break;
9035		    }
9036		}
9037	    }
9038
9039	  /* If we didn't find anything on the backwards scan try forwards.  */
9040	  if (!floater
9041	      && (anchor_attr == PA_COMBINE_TYPE_FMPY
9042		  || anchor_attr == PA_COMBINE_TYPE_FADDSUB))
9043	    {
9044	      for (floater = anchor; floater; floater = NEXT_INSN (floater))
9045		{
9046		  if (NOTE_P (floater)
9047		      || (NONJUMP_INSN_P (floater)
9048			  && (GET_CODE (PATTERN (floater)) == USE
9049			      || GET_CODE (PATTERN (floater)) == CLOBBER)))
9050
9051		    continue;
9052
9053		  /* Anything except a regular INSN will stop our search.  */
9054		  if (! NONJUMP_INSN_P (floater))
9055		    {
9056		      floater = NULL;
9057		      break;
9058		    }
9059
9060		  /* See if FLOATER is suitable for combination with the
9061		     anchor.  */
9062		  floater_attr = get_attr_pa_combine_type (floater);
9063		  if ((anchor_attr == PA_COMBINE_TYPE_FMPY
9064		       && floater_attr == PA_COMBINE_TYPE_FADDSUB)
9065		      || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9066			  && floater_attr == PA_COMBINE_TYPE_FMPY))
9067		    {
9068		      /* If ANCHOR and FLOATER can be combined, then we're
9069			 done with this pass.  */
9070		      if (pa_can_combine_p (new_rtx, anchor, floater, 1,
9071					    SET_DEST (PATTERN (floater)),
9072					    XEXP (SET_SRC (PATTERN (floater)),
9073						  0),
9074					    XEXP (SET_SRC (PATTERN (floater)),
9075						  1)))
9076			break;
9077		    }
9078		}
9079	    }
9080
9081	  /* FLOATER will be nonzero if we found a suitable floating
9082	     insn for combination with ANCHOR.  */
9083	  if (floater
9084	      && (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9085		  || anchor_attr == PA_COMBINE_TYPE_FMPY))
9086	    {
9087	      /* Emit the new instruction and delete the old anchor.  */
9088	      emit_insn_before (gen_rtx_PARALLEL
9089				(VOIDmode,
9090				 gen_rtvec (2, PATTERN (anchor),
9091					    PATTERN (floater))),
9092				anchor);
9093
9094	      SET_INSN_DELETED (anchor);
9095
9096	      /* Emit a special USE insn for FLOATER, then delete
9097		 the floating insn.  */
9098	      emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater);
9099	      delete_insn (floater);
9100
9101	      continue;
9102	    }
9103	  else if (floater
9104		   && anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH)
9105	    {
9106	      rtx temp;
9107	      /* Emit the new_jump instruction and delete the old anchor.  */
9108	      temp
9109		= emit_jump_insn_before (gen_rtx_PARALLEL
9110					 (VOIDmode,
9111					  gen_rtvec (2, PATTERN (anchor),
9112						     PATTERN (floater))),
9113					 anchor);
9114
9115	      JUMP_LABEL (temp) = JUMP_LABEL (anchor);
9116	      SET_INSN_DELETED (anchor);
9117
9118	      /* Emit a special USE insn for FLOATER, then delete
9119		 the floating insn.  */
9120	      emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater);
9121	      delete_insn (floater);
9122	      continue;
9123	    }
9124	}
9125    }
9126}
9127
9128static int
9129pa_can_combine_p (rtx_insn *new_rtx, rtx_insn *anchor, rtx_insn *floater,
9130		  int reversed, rtx dest,
9131		  rtx src1, rtx src2)
9132{
9133  int insn_code_number;
9134  rtx_insn *start, *end;
9135
9136  /* Create a PARALLEL with the patterns of ANCHOR and
9137     FLOATER, try to recognize it, then test constraints
9138     for the resulting pattern.
9139
9140     If the pattern doesn't match or the constraints
9141     aren't met keep searching for a suitable floater
9142     insn.  */
9143  XVECEXP (PATTERN (new_rtx), 0, 0) = PATTERN (anchor);
9144  XVECEXP (PATTERN (new_rtx), 0, 1) = PATTERN (floater);
9145  INSN_CODE (new_rtx) = -1;
9146  insn_code_number = recog_memoized (new_rtx);
9147  basic_block bb = BLOCK_FOR_INSN (anchor);
9148  if (insn_code_number < 0
9149      || (extract_insn (new_rtx),
9150	  !constrain_operands (1, get_preferred_alternatives (new_rtx, bb))))
9151    return 0;
9152
9153  if (reversed)
9154    {
9155      start = anchor;
9156      end = floater;
9157    }
9158  else
9159    {
9160      start = floater;
9161      end = anchor;
9162    }
9163
9164  /* There's up to three operands to consider.  One
9165     output and two inputs.
9166
9167     The output must not be used between FLOATER & ANCHOR
9168     exclusive.  The inputs must not be set between
9169     FLOATER and ANCHOR exclusive.  */
9170
9171  if (reg_used_between_p (dest, start, end))
9172    return 0;
9173
9174  if (reg_set_between_p (src1, start, end))
9175    return 0;
9176
9177  if (reg_set_between_p (src2, start, end))
9178    return 0;
9179
9180  /* If we get here, then everything is good.  */
9181  return 1;
9182}
9183
9184/* Return nonzero if references for INSN are delayed.
9185
9186   Millicode insns are actually function calls with some special
9187   constraints on arguments and register usage.
9188
9189   Millicode calls always expect their arguments in the integer argument
9190   registers, and always return their result in %r29 (ret1).  They
9191   are expected to clobber their arguments, %r1, %r29, and the return
9192   pointer which is %r31 on 32-bit and %r2 on 64-bit, and nothing else.
9193
9194   This function tells reorg that the references to arguments and
9195   millicode calls do not appear to happen until after the millicode call.
9196   This allows reorg to put insns which set the argument registers into the
9197   delay slot of the millicode call -- thus they act more like traditional
9198   CALL_INSNs.
9199
9200   Note we cannot consider side effects of the insn to be delayed because
9201   the branch and link insn will clobber the return pointer.  If we happened
9202   to use the return pointer in the delay slot of the call, then we lose.
9203
9204   get_attr_type will try to recognize the given insn, so make sure to
9205   filter out things it will not accept -- SEQUENCE, USE and CLOBBER insns
9206   in particular.  */
9207int
9208pa_insn_refs_are_delayed (rtx_insn *insn)
9209{
9210  return ((NONJUMP_INSN_P (insn)
9211	   && GET_CODE (PATTERN (insn)) != SEQUENCE
9212	   && GET_CODE (PATTERN (insn)) != USE
9213	   && GET_CODE (PATTERN (insn)) != CLOBBER
9214	   && get_attr_type (insn) == TYPE_MILLI));
9215}
9216
9217/* Promote the return value, but not the arguments.  */
9218
9219static machine_mode
9220pa_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
9221                          machine_mode mode,
9222                          int *punsignedp ATTRIBUTE_UNUSED,
9223                          const_tree fntype ATTRIBUTE_UNUSED,
9224                          int for_return)
9225{
9226  if (for_return == 0)
9227    return mode;
9228  return promote_mode (type, mode, punsignedp);
9229}
9230
9231/* On the HP-PA the value is found in register(s) 28(-29), unless
9232   the mode is SF or DF. Then the value is returned in fr4 (32).
9233
9234   This must perform the same promotions as PROMOTE_MODE, else promoting
9235   return values in TARGET_PROMOTE_FUNCTION_MODE will not work correctly.
9236
9237   Small structures must be returned in a PARALLEL on PA64 in order
9238   to match the HP Compiler ABI.  */
9239
9240static rtx
9241pa_function_value (const_tree valtype,
9242                   const_tree func ATTRIBUTE_UNUSED,
9243                   bool outgoing ATTRIBUTE_UNUSED)
9244{
9245  machine_mode valmode;
9246
9247  if (AGGREGATE_TYPE_P (valtype)
9248      || TREE_CODE (valtype) == COMPLEX_TYPE
9249      || TREE_CODE (valtype) == VECTOR_TYPE)
9250    {
9251      HOST_WIDE_INT valsize = int_size_in_bytes (valtype);
9252
9253      /* Handle aggregates that fit exactly in a word or double word.  */
9254      if ((valsize & (UNITS_PER_WORD - 1)) == 0)
9255	return gen_rtx_REG (TYPE_MODE (valtype), 28);
9256
9257      if (TARGET_64BIT)
9258	{
9259          /* Aggregates with a size less than or equal to 128 bits are
9260	     returned in GR 28(-29).  They are left justified.  The pad
9261	     bits are undefined.  Larger aggregates are returned in
9262	     memory.  */
9263	  rtx loc[2];
9264	  int i, offset = 0;
9265	  int ub = valsize <= UNITS_PER_WORD ? 1 : 2;
9266
9267	  for (i = 0; i < ub; i++)
9268	    {
9269	      loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
9270					  gen_rtx_REG (DImode, 28 + i),
9271					  GEN_INT (offset));
9272	      offset += 8;
9273	    }
9274
9275	  return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (ub, loc));
9276	}
9277      else if (valsize > UNITS_PER_WORD)
9278	{
9279	  /* Aggregates 5 to 8 bytes in size are returned in general
9280	     registers r28-r29 in the same manner as other non
9281	     floating-point objects.  The data is right-justified and
9282	     zero-extended to 64 bits.  This is opposite to the normal
9283	     justification used on big endian targets and requires
9284	     special treatment.  */
9285	  rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
9286				       gen_rtx_REG (DImode, 28), const0_rtx);
9287	  return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc));
9288	}
9289    }
9290
9291  if ((INTEGRAL_TYPE_P (valtype)
9292       && GET_MODE_BITSIZE (TYPE_MODE (valtype)) < BITS_PER_WORD)
9293      || POINTER_TYPE_P (valtype))
9294    valmode = word_mode;
9295  else
9296    valmode = TYPE_MODE (valtype);
9297
9298  if (TREE_CODE (valtype) == REAL_TYPE
9299      && !AGGREGATE_TYPE_P (valtype)
9300      && TYPE_MODE (valtype) != TFmode
9301      && !TARGET_SOFT_FLOAT)
9302    return gen_rtx_REG (valmode, 32);
9303
9304  return gen_rtx_REG (valmode, 28);
9305}
9306
9307/* Implement the TARGET_LIBCALL_VALUE hook.  */
9308
9309static rtx
9310pa_libcall_value (machine_mode mode,
9311		  const_rtx fun ATTRIBUTE_UNUSED)
9312{
9313  if (! TARGET_SOFT_FLOAT
9314      && (mode == SFmode || mode == DFmode))
9315    return  gen_rtx_REG (mode, 32);
9316  else
9317    return  gen_rtx_REG (mode, 28);
9318}
9319
9320/* Implement the TARGET_FUNCTION_VALUE_REGNO_P hook.  */
9321
9322static bool
9323pa_function_value_regno_p (const unsigned int regno)
9324{
9325  if (regno == 28
9326      || (! TARGET_SOFT_FLOAT &&  regno == 32))
9327    return true;
9328
9329  return false;
9330}
9331
9332/* Update the data in CUM to advance over an argument
9333   of mode MODE and data type TYPE.
9334   (TYPE is null for libcalls where that information may not be available.)  */
9335
9336static void
9337pa_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
9338			 const_tree type, bool named ATTRIBUTE_UNUSED)
9339{
9340  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9341  int arg_size = FUNCTION_ARG_SIZE (mode, type);
9342
9343  cum->nargs_prototype--;
9344  cum->words += (arg_size
9345		 + ((cum->words & 01)
9346		    && type != NULL_TREE
9347		    && arg_size > 1));
9348}
9349
9350/* Return the location of a parameter that is passed in a register or NULL
9351   if the parameter has any component that is passed in memory.
9352
9353   This is new code and will be pushed to into the net sources after
9354   further testing.
9355
9356   ??? We might want to restructure this so that it looks more like other
9357   ports.  */
9358static rtx
9359pa_function_arg (cumulative_args_t cum_v, machine_mode mode,
9360		 const_tree type, bool named ATTRIBUTE_UNUSED)
9361{
9362  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9363  int max_arg_words = (TARGET_64BIT ? 8 : 4);
9364  int alignment = 0;
9365  int arg_size;
9366  int fpr_reg_base;
9367  int gpr_reg_base;
9368  rtx retval;
9369
9370  if (mode == VOIDmode)
9371    return NULL_RTX;
9372
9373  arg_size = FUNCTION_ARG_SIZE (mode, type);
9374
9375  /* If this arg would be passed partially or totally on the stack, then
9376     this routine should return zero.  pa_arg_partial_bytes will
9377     handle arguments which are split between regs and stack slots if
9378     the ABI mandates split arguments.  */
9379  if (!TARGET_64BIT)
9380    {
9381      /* The 32-bit ABI does not split arguments.  */
9382      if (cum->words + arg_size > max_arg_words)
9383	return NULL_RTX;
9384    }
9385  else
9386    {
9387      if (arg_size > 1)
9388	alignment = cum->words & 1;
9389      if (cum->words + alignment >= max_arg_words)
9390	return NULL_RTX;
9391    }
9392
9393  /* The 32bit ABIs and the 64bit ABIs are rather different,
9394     particularly in their handling of FP registers.  We might
9395     be able to cleverly share code between them, but I'm not
9396     going to bother in the hope that splitting them up results
9397     in code that is more easily understood.  */
9398
9399  if (TARGET_64BIT)
9400    {
9401      /* Advance the base registers to their current locations.
9402
9403         Remember, gprs grow towards smaller register numbers while
9404	 fprs grow to higher register numbers.  Also remember that
9405	 although FP regs are 32-bit addressable, we pretend that
9406	 the registers are 64-bits wide.  */
9407      gpr_reg_base = 26 - cum->words;
9408      fpr_reg_base = 32 + cum->words;
9409
9410      /* Arguments wider than one word and small aggregates need special
9411	 treatment.  */
9412      if (arg_size > 1
9413	  || mode == BLKmode
9414	  || (type && (AGGREGATE_TYPE_P (type)
9415		       || TREE_CODE (type) == COMPLEX_TYPE
9416		       || TREE_CODE (type) == VECTOR_TYPE)))
9417	{
9418	  /* Double-extended precision (80-bit), quad-precision (128-bit)
9419	     and aggregates including complex numbers are aligned on
9420	     128-bit boundaries.  The first eight 64-bit argument slots
9421	     are associated one-to-one, with general registers r26
9422	     through r19, and also with floating-point registers fr4
9423	     through fr11.  Arguments larger than one word are always
9424	     passed in general registers.
9425
9426	     Using a PARALLEL with a word mode register results in left
9427	     justified data on a big-endian target.  */
9428
9429	  rtx loc[8];
9430	  int i, offset = 0, ub = arg_size;
9431
9432	  /* Align the base register.  */
9433	  gpr_reg_base -= alignment;
9434
9435	  ub = MIN (ub, max_arg_words - cum->words - alignment);
9436	  for (i = 0; i < ub; i++)
9437	    {
9438	      loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
9439					  gen_rtx_REG (DImode, gpr_reg_base),
9440					  GEN_INT (offset));
9441	      gpr_reg_base -= 1;
9442	      offset += 8;
9443	    }
9444
9445	  return gen_rtx_PARALLEL (mode, gen_rtvec_v (ub, loc));
9446	}
9447     }
9448  else
9449    {
9450      /* If the argument is larger than a word, then we know precisely
9451	 which registers we must use.  */
9452      if (arg_size > 1)
9453	{
9454	  if (cum->words)
9455	    {
9456	      gpr_reg_base = 23;
9457	      fpr_reg_base = 38;
9458	    }
9459	  else
9460	    {
9461	      gpr_reg_base = 25;
9462	      fpr_reg_base = 34;
9463	    }
9464
9465	  /* Structures 5 to 8 bytes in size are passed in the general
9466	     registers in the same manner as other non floating-point
9467	     objects.  The data is right-justified and zero-extended
9468	     to 64 bits.  This is opposite to the normal justification
9469	     used on big endian targets and requires special treatment.
9470	     We now define BLOCK_REG_PADDING to pad these objects.
9471	     Aggregates, complex and vector types are passed in the same
9472	     manner as structures.  */
9473	  if (mode == BLKmode
9474	      || (type && (AGGREGATE_TYPE_P (type)
9475			   || TREE_CODE (type) == COMPLEX_TYPE
9476			   || TREE_CODE (type) == VECTOR_TYPE)))
9477	    {
9478	      rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
9479					   gen_rtx_REG (DImode, gpr_reg_base),
9480					   const0_rtx);
9481	      return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc));
9482	    }
9483	}
9484      else
9485        {
9486	   /* We have a single word (32 bits).  A simple computation
9487	      will get us the register #s we need.  */
9488	   gpr_reg_base = 26 - cum->words;
9489	   fpr_reg_base = 32 + 2 * cum->words;
9490	}
9491    }
9492
9493  /* Determine if the argument needs to be passed in both general and
9494     floating point registers.  */
9495  if (((TARGET_PORTABLE_RUNTIME || TARGET_64BIT || TARGET_ELF32)
9496       /* If we are doing soft-float with portable runtime, then there
9497	  is no need to worry about FP regs.  */
9498       && !TARGET_SOFT_FLOAT
9499       /* The parameter must be some kind of scalar float, else we just
9500	  pass it in integer registers.  */
9501       && GET_MODE_CLASS (mode) == MODE_FLOAT
9502       /* The target function must not have a prototype.  */
9503       && cum->nargs_prototype <= 0
9504       /* libcalls do not need to pass items in both FP and general
9505	  registers.  */
9506       && type != NULL_TREE
9507       /* All this hair applies to "outgoing" args only.  This includes
9508	  sibcall arguments setup with FUNCTION_INCOMING_ARG.  */
9509       && !cum->incoming)
9510      /* Also pass outgoing floating arguments in both registers in indirect
9511	 calls with the 32 bit ABI and the HP assembler since there is no
9512	 way to the specify argument locations in static functions.  */
9513      || (!TARGET_64BIT
9514	  && !TARGET_GAS
9515	  && !cum->incoming
9516	  && cum->indirect
9517	  && GET_MODE_CLASS (mode) == MODE_FLOAT))
9518    {
9519      retval
9520	= gen_rtx_PARALLEL
9521	    (mode,
9522	     gen_rtvec (2,
9523			gen_rtx_EXPR_LIST (VOIDmode,
9524					   gen_rtx_REG (mode, fpr_reg_base),
9525					   const0_rtx),
9526			gen_rtx_EXPR_LIST (VOIDmode,
9527					   gen_rtx_REG (mode, gpr_reg_base),
9528					   const0_rtx)));
9529    }
9530  else
9531    {
9532      /* See if we should pass this parameter in a general register.  */
9533      if (TARGET_SOFT_FLOAT
9534	  /* Indirect calls in the normal 32bit ABI require all arguments
9535	     to be passed in general registers.  */
9536	  || (!TARGET_PORTABLE_RUNTIME
9537	      && !TARGET_64BIT
9538	      && !TARGET_ELF32
9539	      && cum->indirect)
9540	  /* If the parameter is not a scalar floating-point parameter,
9541	     then it belongs in GPRs.  */
9542	  || GET_MODE_CLASS (mode) != MODE_FLOAT
9543	  /* Structure with single SFmode field belongs in GPR.  */
9544	  || (type && AGGREGATE_TYPE_P (type)))
9545	retval = gen_rtx_REG (mode, gpr_reg_base);
9546      else
9547	retval = gen_rtx_REG (mode, fpr_reg_base);
9548    }
9549  return retval;
9550}
9551
9552/* Arguments larger than one word are double word aligned.  */
9553
9554static unsigned int
9555pa_function_arg_boundary (machine_mode mode, const_tree type)
9556{
9557  bool singleword = (type
9558		     ? (integer_zerop (TYPE_SIZE (type))
9559			|| !TREE_CONSTANT (TYPE_SIZE (type))
9560			|| int_size_in_bytes (type) <= UNITS_PER_WORD)
9561		     : GET_MODE_SIZE (mode) <= UNITS_PER_WORD);
9562
9563  return singleword ? PARM_BOUNDARY : MAX_PARM_BOUNDARY;
9564}
9565
9566/* If this arg would be passed totally in registers or totally on the stack,
9567   then this routine should return zero.  */
9568
9569static int
9570pa_arg_partial_bytes (cumulative_args_t cum_v, machine_mode mode,
9571		      tree type, bool named ATTRIBUTE_UNUSED)
9572{
9573  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9574  unsigned int max_arg_words = 8;
9575  unsigned int offset = 0;
9576
9577  if (!TARGET_64BIT)
9578    return 0;
9579
9580  if (FUNCTION_ARG_SIZE (mode, type) > 1 && (cum->words & 1))
9581    offset = 1;
9582
9583  if (cum->words + offset + FUNCTION_ARG_SIZE (mode, type) <= max_arg_words)
9584    /* Arg fits fully into registers.  */
9585    return 0;
9586  else if (cum->words + offset >= max_arg_words)
9587    /* Arg fully on the stack.  */
9588    return 0;
9589  else
9590    /* Arg is split.  */
9591    return (max_arg_words - cum->words - offset) * UNITS_PER_WORD;
9592}
9593
9594
9595/* A get_unnamed_section callback for switching to the text section.
9596
9597   This function is only used with SOM.  Because we don't support
9598   named subspaces, we can only create a new subspace or switch back
9599   to the default text subspace.  */
9600
9601static void
9602som_output_text_section_asm_op (const void *data ATTRIBUTE_UNUSED)
9603{
9604  gcc_assert (TARGET_SOM);
9605  if (TARGET_GAS)
9606    {
9607      if (cfun && cfun->machine && !cfun->machine->in_nsubspa)
9608	{
9609	  /* We only want to emit a .nsubspa directive once at the
9610	     start of the function.  */
9611	  cfun->machine->in_nsubspa = 1;
9612
9613	  /* Create a new subspace for the text.  This provides
9614	     better stub placement and one-only functions.  */
9615	  if (cfun->decl
9616	      && DECL_ONE_ONLY (cfun->decl)
9617	      && !DECL_WEAK (cfun->decl))
9618	    {
9619	      output_section_asm_op ("\t.SPACE $TEXT$\n"
9620				     "\t.NSUBSPA $CODE$,QUAD=0,ALIGN=8,"
9621				     "ACCESS=44,SORT=24,COMDAT");
9622	      return;
9623	    }
9624	}
9625      else
9626	{
9627	  /* There isn't a current function or the body of the current
9628	     function has been completed.  So, we are changing to the
9629	     text section to output debugging information.  Thus, we
9630	     need to forget that we are in the text section so that
9631	     varasm.c will call us when text_section is selected again.  */
9632	  gcc_assert (!cfun || !cfun->machine
9633		      || cfun->machine->in_nsubspa == 2);
9634	  in_section = NULL;
9635	}
9636      output_section_asm_op ("\t.SPACE $TEXT$\n\t.NSUBSPA $CODE$");
9637      return;
9638    }
9639  output_section_asm_op ("\t.SPACE $TEXT$\n\t.SUBSPA $CODE$");
9640}
9641
9642/* A get_unnamed_section callback for switching to comdat data
9643   sections.  This function is only used with SOM.  */
9644
9645static void
9646som_output_comdat_data_section_asm_op (const void *data)
9647{
9648  in_section = NULL;
9649  output_section_asm_op (data);
9650}
9651
9652/* Implement TARGET_ASM_INITIALIZE_SECTIONS  */
9653
9654static void
9655pa_som_asm_init_sections (void)
9656{
9657  text_section
9658    = get_unnamed_section (0, som_output_text_section_asm_op, NULL);
9659
9660  /* SOM puts readonly data in the default $LIT$ subspace when PIC code
9661     is not being generated.  */
9662  som_readonly_data_section
9663    = get_unnamed_section (0, output_section_asm_op,
9664			   "\t.SPACE $TEXT$\n\t.SUBSPA $LIT$");
9665
9666  /* When secondary definitions are not supported, SOM makes readonly
9667     data one-only by creating a new $LIT$ subspace in $TEXT$ with
9668     the comdat flag.  */
9669  som_one_only_readonly_data_section
9670    = get_unnamed_section (0, som_output_comdat_data_section_asm_op,
9671			   "\t.SPACE $TEXT$\n"
9672			   "\t.NSUBSPA $LIT$,QUAD=0,ALIGN=8,"
9673			   "ACCESS=0x2c,SORT=16,COMDAT");
9674
9675
9676  /* When secondary definitions are not supported, SOM makes data one-only
9677     by creating a new $DATA$ subspace in $PRIVATE$ with the comdat flag.  */
9678  som_one_only_data_section
9679    = get_unnamed_section (SECTION_WRITE,
9680			   som_output_comdat_data_section_asm_op,
9681			   "\t.SPACE $PRIVATE$\n"
9682			   "\t.NSUBSPA $DATA$,QUAD=1,ALIGN=8,"
9683			   "ACCESS=31,SORT=24,COMDAT");
9684
9685  if (flag_tm)
9686    som_tm_clone_table_section
9687      = get_unnamed_section (0, output_section_asm_op,
9688			     "\t.SPACE $PRIVATE$\n\t.SUBSPA $TM_CLONE_TABLE$");
9689
9690  /* FIXME: HPUX ld generates incorrect GOT entries for "T" fixups
9691     which reference data within the $TEXT$ space (for example constant
9692     strings in the $LIT$ subspace).
9693
9694     The assemblers (GAS and HP as) both have problems with handling
9695     the difference of two symbols which is the other correct way to
9696     reference constant data during PIC code generation.
9697
9698     So, there's no way to reference constant data which is in the
9699     $TEXT$ space during PIC generation.  Instead place all constant
9700     data into the $PRIVATE$ subspace (this reduces sharing, but it
9701     works correctly).  */
9702  readonly_data_section = flag_pic ? data_section : som_readonly_data_section;
9703
9704  /* We must not have a reference to an external symbol defined in a
9705     shared library in a readonly section, else the SOM linker will
9706     complain.
9707
9708     So, we force exception information into the data section.  */
9709  exception_section = data_section;
9710}
9711
9712/* Implement TARGET_ASM_TM_CLONE_TABLE_SECTION.  */
9713
9714static section *
9715pa_som_tm_clone_table_section (void)
9716{
9717  return som_tm_clone_table_section;
9718}
9719
9720/* On hpux10, the linker will give an error if we have a reference
9721   in the read-only data section to a symbol defined in a shared
9722   library.  Therefore, expressions that might require a reloc can
9723   not be placed in the read-only data section.  */
9724
9725static section *
9726pa_select_section (tree exp, int reloc,
9727		   unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
9728{
9729  if (TREE_CODE (exp) == VAR_DECL
9730      && TREE_READONLY (exp)
9731      && !TREE_THIS_VOLATILE (exp)
9732      && DECL_INITIAL (exp)
9733      && (DECL_INITIAL (exp) == error_mark_node
9734          || TREE_CONSTANT (DECL_INITIAL (exp)))
9735      && !reloc)
9736    {
9737      if (TARGET_SOM
9738	  && DECL_ONE_ONLY (exp)
9739	  && !DECL_WEAK (exp))
9740	return som_one_only_readonly_data_section;
9741      else
9742	return readonly_data_section;
9743    }
9744  else if (CONSTANT_CLASS_P (exp) && !reloc)
9745    return readonly_data_section;
9746  else if (TARGET_SOM
9747	   && TREE_CODE (exp) == VAR_DECL
9748	   && DECL_ONE_ONLY (exp)
9749	   && !DECL_WEAK (exp))
9750    return som_one_only_data_section;
9751  else
9752    return data_section;
9753}
9754
9755/* Implement pa_reloc_rw_mask.  */
9756
9757static int
9758pa_reloc_rw_mask (void)
9759{
9760  /* We force (const (plus (symbol) (const_int))) to memory when the
9761     const_int doesn't fit in a 14-bit integer.  The SOM linker can't
9762     handle this construct in read-only memory and we want to avoid
9763     this for ELF.  So, we always force an RTX needing relocation to
9764     the data section.  */
9765  return 3;
9766}
9767
9768static void
9769pa_globalize_label (FILE *stream, const char *name)
9770{
9771  /* We only handle DATA objects here, functions are globalized in
9772     ASM_DECLARE_FUNCTION_NAME.  */
9773  if (! FUNCTION_NAME_P (name))
9774  {
9775    fputs ("\t.EXPORT ", stream);
9776    assemble_name (stream, name);
9777    fputs (",DATA\n", stream);
9778  }
9779}
9780
9781/* Worker function for TARGET_STRUCT_VALUE_RTX.  */
9782
9783static rtx
9784pa_struct_value_rtx (tree fntype ATTRIBUTE_UNUSED,
9785		     int incoming ATTRIBUTE_UNUSED)
9786{
9787  return gen_rtx_REG (Pmode, PA_STRUCT_VALUE_REGNUM);
9788}
9789
9790/* Worker function for TARGET_RETURN_IN_MEMORY.  */
9791
9792bool
9793pa_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
9794{
9795  /* SOM ABI says that objects larger than 64 bits are returned in memory.
9796     PA64 ABI says that objects larger than 128 bits are returned in memory.
9797     Note, int_size_in_bytes can return -1 if the size of the object is
9798     variable or larger than the maximum value that can be expressed as
9799     a HOST_WIDE_INT.   It can also return zero for an empty type.  The
9800     simplest way to handle variable and empty types is to pass them in
9801     memory.  This avoids problems in defining the boundaries of argument
9802     slots, allocating registers, etc.  */
9803  return (int_size_in_bytes (type) > (TARGET_64BIT ? 16 : 8)
9804	  || int_size_in_bytes (type) <= 0);
9805}
9806
9807/* Structure to hold declaration and name of external symbols that are
9808   emitted by GCC.  We generate a vector of these symbols and output them
9809   at the end of the file if and only if SYMBOL_REF_REFERENCED_P is true.
9810   This avoids putting out names that are never really used.  */
9811
9812typedef struct GTY(()) extern_symbol
9813{
9814  tree decl;
9815  const char *name;
9816} extern_symbol;
9817
9818/* Define gc'd vector type for extern_symbol.  */
9819
9820/* Vector of extern_symbol pointers.  */
9821static GTY(()) vec<extern_symbol, va_gc> *extern_symbols;
9822
9823#ifdef ASM_OUTPUT_EXTERNAL_REAL
9824/* Mark DECL (name NAME) as an external reference (assembler output
9825   file FILE).  This saves the names to output at the end of the file
9826   if actually referenced.  */
9827
9828void
9829pa_hpux_asm_output_external (FILE *file, tree decl, const char *name)
9830{
9831  gcc_assert (file == asm_out_file);
9832  extern_symbol p = {decl, name};
9833  vec_safe_push (extern_symbols, p);
9834}
9835
9836/* Output text required at the end of an assembler file.
9837   This includes deferred plabels and .import directives for
9838   all external symbols that were actually referenced.  */
9839
9840static void
9841pa_hpux_file_end (void)
9842{
9843  unsigned int i;
9844  extern_symbol *p;
9845
9846  if (!NO_DEFERRED_PROFILE_COUNTERS)
9847    output_deferred_profile_counters ();
9848
9849  output_deferred_plabels ();
9850
9851  for (i = 0; vec_safe_iterate (extern_symbols, i, &p); i++)
9852    {
9853      tree decl = p->decl;
9854
9855      if (!TREE_ASM_WRITTEN (decl)
9856	  && SYMBOL_REF_REFERENCED_P (XEXP (DECL_RTL (decl), 0)))
9857	ASM_OUTPUT_EXTERNAL_REAL (asm_out_file, decl, p->name);
9858    }
9859
9860  vec_free (extern_symbols);
9861}
9862#endif
9863
9864/* Return true if a change from mode FROM to mode TO for a register
9865   in register class RCLASS is invalid.  */
9866
9867bool
9868pa_cannot_change_mode_class (machine_mode from, machine_mode to,
9869			     enum reg_class rclass)
9870{
9871  if (from == to)
9872    return false;
9873
9874  /* Reject changes to/from complex and vector modes.  */
9875  if (COMPLEX_MODE_P (from) || VECTOR_MODE_P (from)
9876      || COMPLEX_MODE_P (to) || VECTOR_MODE_P (to))
9877    return true;
9878
9879  if (GET_MODE_SIZE (from) == GET_MODE_SIZE (to))
9880    return false;
9881
9882  /* There is no way to load QImode or HImode values directly from
9883     memory.  SImode loads to the FP registers are not zero extended.
9884     On the 64-bit target, this conflicts with the definition of
9885     LOAD_EXTEND_OP.  Thus, we can't allow changing between modes
9886     with different sizes in the floating-point registers.  */
9887  if (MAYBE_FP_REG_CLASS_P (rclass))
9888    return true;
9889
9890  /* HARD_REGNO_MODE_OK places modes with sizes larger than a word
9891     in specific sets of registers.  Thus, we cannot allow changing
9892     to a larger mode when it's larger than a word.  */
9893  if (GET_MODE_SIZE (to) > UNITS_PER_WORD
9894      && GET_MODE_SIZE (to) > GET_MODE_SIZE (from))
9895    return true;
9896
9897  return false;
9898}
9899
9900/* Returns TRUE if it is a good idea to tie two pseudo registers
9901   when one has mode MODE1 and one has mode MODE2.
9902   If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2,
9903   for any hard reg, then this must be FALSE for correct output.
9904
9905   We should return FALSE for QImode and HImode because these modes
9906   are not ok in the floating-point registers.  However, this prevents
9907   tieing these modes to SImode and DImode in the general registers.
9908   So, this isn't a good idea.  We rely on HARD_REGNO_MODE_OK and
9909   CANNOT_CHANGE_MODE_CLASS to prevent these modes from being used
9910   in the floating-point registers.  */
9911
9912bool
9913pa_modes_tieable_p (machine_mode mode1, machine_mode mode2)
9914{
9915  /* Don't tie modes in different classes.  */
9916  if (GET_MODE_CLASS (mode1) != GET_MODE_CLASS (mode2))
9917    return false;
9918
9919  return true;
9920}
9921
9922
9923/* Length in units of the trampoline instruction code.  */
9924
9925#define TRAMPOLINE_CODE_SIZE (TARGET_64BIT ? 24 : (TARGET_PA_20 ? 32 : 40))
9926
9927
9928/* Output assembler code for a block containing the constant parts
9929   of a trampoline, leaving space for the variable parts.\
9930
9931   The trampoline sets the static chain pointer to STATIC_CHAIN_REGNUM
9932   and then branches to the specified routine.
9933
9934   This code template is copied from text segment to stack location
9935   and then patched with pa_trampoline_init to contain valid values,
9936   and then entered as a subroutine.
9937
9938   It is best to keep this as small as possible to avoid having to
9939   flush multiple lines in the cache.  */
9940
9941static void
9942pa_asm_trampoline_template (FILE *f)
9943{
9944  if (!TARGET_64BIT)
9945    {
9946      fputs ("\tldw	36(%r22),%r21\n", f);
9947      fputs ("\tbb,>=,n	%r21,30,.+16\n", f);
9948      if (ASSEMBLER_DIALECT == 0)
9949	fputs ("\tdepi	0,31,2,%r21\n", f);
9950      else
9951	fputs ("\tdepwi	0,31,2,%r21\n", f);
9952      fputs ("\tldw	4(%r21),%r19\n", f);
9953      fputs ("\tldw	0(%r21),%r21\n", f);
9954      if (TARGET_PA_20)
9955	{
9956	  fputs ("\tbve	(%r21)\n", f);
9957	  fputs ("\tldw	40(%r22),%r29\n", f);
9958	  fputs ("\t.word	0\n", f);
9959	  fputs ("\t.word	0\n", f);
9960	}
9961      else
9962	{
9963	  fputs ("\tldsid	(%r21),%r1\n", f);
9964	  fputs ("\tmtsp	%r1,%sr0\n", f);
9965	  fputs ("\tbe	0(%sr0,%r21)\n", f);
9966	  fputs ("\tldw	40(%r22),%r29\n", f);
9967	}
9968      fputs ("\t.word	0\n", f);
9969      fputs ("\t.word	0\n", f);
9970      fputs ("\t.word	0\n", f);
9971      fputs ("\t.word	0\n", f);
9972    }
9973  else
9974    {
9975      fputs ("\t.dword 0\n", f);
9976      fputs ("\t.dword 0\n", f);
9977      fputs ("\t.dword 0\n", f);
9978      fputs ("\t.dword 0\n", f);
9979      fputs ("\tmfia	%r31\n", f);
9980      fputs ("\tldd	24(%r31),%r1\n", f);
9981      fputs ("\tldd	24(%r1),%r27\n", f);
9982      fputs ("\tldd	16(%r1),%r1\n", f);
9983      fputs ("\tbve	(%r1)\n", f);
9984      fputs ("\tldd	32(%r31),%r31\n", f);
9985      fputs ("\t.dword 0  ; fptr\n", f);
9986      fputs ("\t.dword 0  ; static link\n", f);
9987    }
9988}
9989
9990/* Emit RTL insns to initialize the variable parts of a trampoline.
9991   FNADDR is an RTX for the address of the function's pure code.
9992   CXT is an RTX for the static chain value for the function.
9993
9994   Move the function address to the trampoline template at offset 36.
9995   Move the static chain value to trampoline template at offset 40.
9996   Move the trampoline address to trampoline template at offset 44.
9997   Move r19 to trampoline template at offset 48.  The latter two
9998   words create a plabel for the indirect call to the trampoline.
9999
10000   A similar sequence is used for the 64-bit port but the plabel is
10001   at the beginning of the trampoline.
10002
10003   Finally, the cache entries for the trampoline code are flushed.
10004   This is necessary to ensure that the trampoline instruction sequence
10005   is written to memory prior to any attempts at prefetching the code
10006   sequence.  */
10007
10008static void
10009pa_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
10010{
10011  rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
10012  rtx start_addr = gen_reg_rtx (Pmode);
10013  rtx end_addr = gen_reg_rtx (Pmode);
10014  rtx line_length = gen_reg_rtx (Pmode);
10015  rtx r_tramp, tmp;
10016
10017  emit_block_move (m_tramp, assemble_trampoline_template (),
10018		   GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
10019  r_tramp = force_reg (Pmode, XEXP (m_tramp, 0));
10020
10021  if (!TARGET_64BIT)
10022    {
10023      tmp = adjust_address (m_tramp, Pmode, 36);
10024      emit_move_insn (tmp, fnaddr);
10025      tmp = adjust_address (m_tramp, Pmode, 40);
10026      emit_move_insn (tmp, chain_value);
10027
10028      /* Create a fat pointer for the trampoline.  */
10029      tmp = adjust_address (m_tramp, Pmode, 44);
10030      emit_move_insn (tmp, r_tramp);
10031      tmp = adjust_address (m_tramp, Pmode, 48);
10032      emit_move_insn (tmp, gen_rtx_REG (Pmode, 19));
10033
10034      /* fdc and fic only use registers for the address to flush,
10035	 they do not accept integer displacements.  We align the
10036	 start and end addresses to the beginning of their respective
10037	 cache lines to minimize the number of lines flushed.  */
10038      emit_insn (gen_andsi3 (start_addr, r_tramp,
10039			     GEN_INT (-MIN_CACHELINE_SIZE)));
10040      tmp = force_reg (Pmode, plus_constant (Pmode, r_tramp,
10041					     TRAMPOLINE_CODE_SIZE-1));
10042      emit_insn (gen_andsi3 (end_addr, tmp,
10043			     GEN_INT (-MIN_CACHELINE_SIZE)));
10044      emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE));
10045      emit_insn (gen_dcacheflushsi (start_addr, end_addr, line_length));
10046      emit_insn (gen_icacheflushsi (start_addr, end_addr, line_length,
10047				    gen_reg_rtx (Pmode),
10048				    gen_reg_rtx (Pmode)));
10049    }
10050  else
10051    {
10052      tmp = adjust_address (m_tramp, Pmode, 56);
10053      emit_move_insn (tmp, fnaddr);
10054      tmp = adjust_address (m_tramp, Pmode, 64);
10055      emit_move_insn (tmp, chain_value);
10056
10057      /* Create a fat pointer for the trampoline.  */
10058      tmp = adjust_address (m_tramp, Pmode, 16);
10059      emit_move_insn (tmp, force_reg (Pmode, plus_constant (Pmode,
10060							    r_tramp, 32)));
10061      tmp = adjust_address (m_tramp, Pmode, 24);
10062      emit_move_insn (tmp, gen_rtx_REG (Pmode, 27));
10063
10064      /* fdc and fic only use registers for the address to flush,
10065	 they do not accept integer displacements.  We align the
10066	 start and end addresses to the beginning of their respective
10067	 cache lines to minimize the number of lines flushed.  */
10068      tmp = force_reg (Pmode, plus_constant (Pmode, r_tramp, 32));
10069      emit_insn (gen_anddi3 (start_addr, tmp,
10070			     GEN_INT (-MIN_CACHELINE_SIZE)));
10071      tmp = force_reg (Pmode, plus_constant (Pmode, tmp,
10072					     TRAMPOLINE_CODE_SIZE - 1));
10073      emit_insn (gen_anddi3 (end_addr, tmp,
10074			     GEN_INT (-MIN_CACHELINE_SIZE)));
10075      emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE));
10076      emit_insn (gen_dcacheflushdi (start_addr, end_addr, line_length));
10077      emit_insn (gen_icacheflushdi (start_addr, end_addr, line_length,
10078				    gen_reg_rtx (Pmode),
10079				    gen_reg_rtx (Pmode)));
10080    }
10081
10082#ifdef HAVE_ENABLE_EXECUTE_STACK
10083 ��emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
10084		�������� LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
10085#endif
10086}
10087
10088/* Perform any machine-specific adjustment in the address of the trampoline.
10089   ADDR contains the address that was passed to pa_trampoline_init.
10090   Adjust the trampoline address to point to the plabel at offset 44.  */
10091
10092static rtx
10093pa_trampoline_adjust_address (rtx addr)
10094{
10095  if (!TARGET_64BIT)
10096    addr = memory_address (Pmode, plus_constant (Pmode, addr, 46));
10097  return addr;
10098}
10099
10100static rtx
10101pa_delegitimize_address (rtx orig_x)
10102{
10103  rtx x = delegitimize_mem_from_attrs (orig_x);
10104
10105  if (GET_CODE (x) == LO_SUM
10106      && GET_CODE (XEXP (x, 1)) == UNSPEC
10107      && XINT (XEXP (x, 1), 1) == UNSPEC_DLTIND14R)
10108    return gen_const_mem (Pmode, XVECEXP (XEXP (x, 1), 0, 0));
10109  return x;
10110}
10111
10112static rtx
10113pa_internal_arg_pointer (void)
10114{
10115  /* The argument pointer and the hard frame pointer are the same in
10116     the 32-bit runtime, so we don't need a copy.  */
10117  if (TARGET_64BIT)
10118    return copy_to_reg (virtual_incoming_args_rtx);
10119  else
10120    return virtual_incoming_args_rtx;
10121}
10122
10123/* Given FROM and TO register numbers, say whether this elimination is allowed.
10124   Frame pointer elimination is automatically handled.  */
10125
10126static bool
10127pa_can_eliminate (const int from, const int to)
10128{
10129  /* The argument cannot be eliminated in the 64-bit runtime.  */
10130  if (TARGET_64BIT && from == ARG_POINTER_REGNUM)
10131    return false;
10132
10133  return (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
10134          ? ! frame_pointer_needed
10135          : true);
10136}
10137
10138/* Define the offset between two registers, FROM to be eliminated and its
10139   replacement TO, at the start of a routine.  */
10140HOST_WIDE_INT
10141pa_initial_elimination_offset (int from, int to)
10142{
10143  HOST_WIDE_INT offset;
10144
10145  if ((from == HARD_FRAME_POINTER_REGNUM || from == FRAME_POINTER_REGNUM)
10146      && to == STACK_POINTER_REGNUM)
10147    offset = -pa_compute_frame_size (get_frame_size (), 0);
10148  else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
10149    offset = 0;
10150  else
10151    gcc_unreachable ();
10152
10153  return offset;
10154}
10155
10156static void
10157pa_conditional_register_usage (void)
10158{
10159  int i;
10160
10161  if (!TARGET_64BIT && !TARGET_PA_11)
10162    {
10163      for (i = 56; i <= FP_REG_LAST; i++)
10164	fixed_regs[i] = call_used_regs[i] = 1;
10165      for (i = 33; i < 56; i += 2)
10166	fixed_regs[i] = call_used_regs[i] = 1;
10167    }
10168  if (TARGET_DISABLE_FPREGS || TARGET_SOFT_FLOAT)
10169    {
10170      for (i = FP_REG_FIRST; i <= FP_REG_LAST; i++)
10171	fixed_regs[i] = call_used_regs[i] = 1;
10172    }
10173  if (flag_pic)
10174    fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
10175}
10176
10177/* Target hook for c_mode_for_suffix.  */
10178
10179static machine_mode
10180pa_c_mode_for_suffix (char suffix)
10181{
10182  if (HPUX_LONG_DOUBLE_LIBRARY)
10183    {
10184      if (suffix == 'q')
10185	return TFmode;
10186    }
10187
10188  return VOIDmode;
10189}
10190
10191/* Target hook for function_section.  */
10192
10193static section *
10194pa_function_section (tree decl, enum node_frequency freq,
10195		     bool startup, bool exit)
10196{
10197  /* Put functions in text section if target doesn't have named sections.  */
10198  if (!targetm_common.have_named_sections)
10199    return text_section;
10200
10201  /* Force nested functions into the same section as the containing
10202     function.  */
10203  if (decl
10204      && DECL_SECTION_NAME (decl) == NULL
10205      && DECL_CONTEXT (decl) != NULL_TREE
10206      && TREE_CODE (DECL_CONTEXT (decl)) == FUNCTION_DECL
10207      && DECL_SECTION_NAME (DECL_CONTEXT (decl)) == NULL)
10208    return function_section (DECL_CONTEXT (decl));
10209
10210  /* Otherwise, use the default function section.  */
10211  return default_function_section (decl, freq, startup, exit);
10212}
10213
10214/* Implement TARGET_LEGITIMATE_CONSTANT_P.
10215
10216   In 64-bit mode, we reject CONST_DOUBLES.  We also reject CONST_INTS
10217   that need more than three instructions to load prior to reload.  This
10218   limit is somewhat arbitrary.  It takes three instructions to load a
10219   CONST_INT from memory but two are memory accesses.  It may be better
10220   to increase the allowed range for CONST_INTS.  We may also be able
10221   to handle CONST_DOUBLES.  */
10222
10223static bool
10224pa_legitimate_constant_p (machine_mode mode, rtx x)
10225{
10226  if (GET_MODE_CLASS (mode) == MODE_FLOAT && x != CONST0_RTX (mode))
10227    return false;
10228
10229  if (!NEW_HP_ASSEMBLER && !TARGET_GAS && GET_CODE (x) == LABEL_REF)
10230    return false;
10231
10232  /* TLS_MODEL_GLOBAL_DYNAMIC and TLS_MODEL_LOCAL_DYNAMIC are not
10233     legitimate constants.  The other variants can't be handled by
10234     the move patterns after reload starts.  */
10235  if (tls_referenced_p (x))
10236    return false;
10237
10238  if (TARGET_64BIT && GET_CODE (x) == CONST_DOUBLE)
10239    return false;
10240
10241  if (TARGET_64BIT
10242      && HOST_BITS_PER_WIDE_INT > 32
10243      && GET_CODE (x) == CONST_INT
10244      && !reload_in_progress
10245      && !reload_completed
10246      && !LEGITIMATE_64BIT_CONST_INT_P (INTVAL (x))
10247      && !pa_cint_ok_for_move (INTVAL (x)))
10248    return false;
10249
10250  if (function_label_operand (x, mode))
10251    return false;
10252
10253  return true;
10254}
10255
10256/* Implement TARGET_SECTION_TYPE_FLAGS.  */
10257
10258static unsigned int
10259pa_section_type_flags (tree decl, const char *name, int reloc)
10260{
10261  unsigned int flags;
10262
10263  flags = default_section_type_flags (decl, name, reloc);
10264
10265  /* Function labels are placed in the constant pool.  This can
10266     cause a section conflict if decls are put in ".data.rel.ro"
10267     or ".data.rel.ro.local" using the __attribute__ construct.  */
10268  if (strcmp (name, ".data.rel.ro") == 0
10269      || strcmp (name, ".data.rel.ro.local") == 0)
10270    flags |= SECTION_WRITE | SECTION_RELRO;
10271
10272  return flags;
10273}
10274
10275/* pa_legitimate_address_p recognizes an RTL expression that is a
10276   valid memory address for an instruction.  The MODE argument is the
10277   machine mode for the MEM expression that wants to use this address.
10278
10279   On HP PA-RISC, the legitimate address forms are REG+SMALLINT,
10280   REG+REG, and REG+(REG*SCALE).  The indexed address forms are only
10281   available with floating point loads and stores, and integer loads.
10282   We get better code by allowing indexed addresses in the initial
10283   RTL generation.
10284
10285   The acceptance of indexed addresses as legitimate implies that we
10286   must provide patterns for doing indexed integer stores, or the move
10287   expanders must force the address of an indexed store to a register.
10288   We have adopted the latter approach.
10289
10290   Another function of pa_legitimate_address_p is to ensure that
10291   the base register is a valid pointer for indexed instructions.
10292   On targets that have non-equivalent space registers, we have to
10293   know at the time of assembler output which register in a REG+REG
10294   pair is the base register.  The REG_POINTER flag is sometimes lost
10295   in reload and the following passes, so it can't be relied on during
10296   code generation.  Thus, we either have to canonicalize the order
10297   of the registers in REG+REG indexed addresses, or treat REG+REG
10298   addresses separately and provide patterns for both permutations.
10299
10300   The latter approach requires several hundred additional lines of
10301   code in pa.md.  The downside to canonicalizing is that a PLUS
10302   in the wrong order can't combine to form to make a scaled indexed
10303   memory operand.  As we won't need to canonicalize the operands if
10304   the REG_POINTER lossage can be fixed, it seems better canonicalize.
10305
10306   We initially break out scaled indexed addresses in canonical order
10307   in pa_emit_move_sequence.  LEGITIMIZE_ADDRESS also canonicalizes
10308   scaled indexed addresses during RTL generation.  However, fold_rtx
10309   has its own opinion on how the operands of a PLUS should be ordered.
10310   If one of the operands is equivalent to a constant, it will make
10311   that operand the second operand.  As the base register is likely to
10312   be equivalent to a SYMBOL_REF, we have made it the second operand.
10313
10314   pa_legitimate_address_p accepts REG+REG as legitimate when the
10315   operands are in the order INDEX+BASE on targets with non-equivalent
10316   space registers, and in any order on targets with equivalent space
10317   registers.  It accepts both MULT+BASE and BASE+MULT for scaled indexing.
10318
10319   We treat a SYMBOL_REF as legitimate if it is part of the current
10320   function's constant-pool, because such addresses can actually be
10321   output as REG+SMALLINT.  */
10322
10323static bool
10324pa_legitimate_address_p (machine_mode mode, rtx x, bool strict)
10325{
10326  if ((REG_P (x)
10327       && (strict ? STRICT_REG_OK_FOR_BASE_P (x)
10328		  : REG_OK_FOR_BASE_P (x)))
10329      || ((GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC
10330	   || GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC)
10331	  && REG_P (XEXP (x, 0))
10332	  && (strict ? STRICT_REG_OK_FOR_BASE_P (XEXP (x, 0))
10333		     : REG_OK_FOR_BASE_P (XEXP (x, 0)))))
10334    return true;
10335
10336  if (GET_CODE (x) == PLUS)
10337    {
10338      rtx base, index;
10339
10340      /* For REG+REG, the base register should be in XEXP (x, 1),
10341	 so check it first.  */
10342      if (REG_P (XEXP (x, 1))
10343	  && (strict ? STRICT_REG_OK_FOR_BASE_P (XEXP (x, 1))
10344		     : REG_OK_FOR_BASE_P (XEXP (x, 1))))
10345	base = XEXP (x, 1), index = XEXP (x, 0);
10346      else if (REG_P (XEXP (x, 0))
10347	       && (strict ? STRICT_REG_OK_FOR_BASE_P (XEXP (x, 0))
10348			  : REG_OK_FOR_BASE_P (XEXP (x, 0))))
10349	base = XEXP (x, 0), index = XEXP (x, 1);
10350      else
10351	return false;
10352
10353      if (GET_CODE (index) == CONST_INT)
10354	{
10355	  if (INT_5_BITS (index))
10356	    return true;
10357
10358	  /* When INT14_OK_STRICT is false, a secondary reload is needed
10359	     to adjust the displacement of SImode and DImode floating point
10360	     instructions but this may fail when the register also needs
10361	     reloading.  So, we return false when STRICT is true.  We
10362	     also reject long displacements for float mode addresses since
10363	     the majority of accesses will use floating point instructions
10364	     that don't support 14-bit offsets.  */
10365	  if (!INT14_OK_STRICT
10366	      && (strict || !(reload_in_progress || reload_completed))
10367	      && mode != QImode
10368	      && mode != HImode)
10369	    return false;
10370
10371	  return base14_operand (index, mode);
10372	}
10373
10374      if (!TARGET_DISABLE_INDEXING
10375	  /* Only accept the "canonical" INDEX+BASE operand order
10376	     on targets with non-equivalent space registers.  */
10377	  && (TARGET_NO_SPACE_REGS
10378	      ? REG_P (index)
10379	      : (base == XEXP (x, 1) && REG_P (index)
10380		 && (reload_completed
10381		     || (reload_in_progress && HARD_REGISTER_P (base))
10382		     || REG_POINTER (base))
10383		 && (reload_completed
10384		     || (reload_in_progress && HARD_REGISTER_P (index))
10385		     || !REG_POINTER (index))))
10386	  && MODE_OK_FOR_UNSCALED_INDEXING_P (mode)
10387	  && (strict ? STRICT_REG_OK_FOR_INDEX_P (index)
10388		     : REG_OK_FOR_INDEX_P (index))
10389	  && borx_reg_operand (base, Pmode)
10390	  && borx_reg_operand (index, Pmode))
10391	return true;
10392
10393      if (!TARGET_DISABLE_INDEXING
10394	  && GET_CODE (index) == MULT
10395	  && MODE_OK_FOR_SCALED_INDEXING_P (mode)
10396	  && REG_P (XEXP (index, 0))
10397	  && GET_MODE (XEXP (index, 0)) == Pmode
10398	  && (strict ? STRICT_REG_OK_FOR_INDEX_P (XEXP (index, 0))
10399		     : REG_OK_FOR_INDEX_P (XEXP (index, 0)))
10400	  && GET_CODE (XEXP (index, 1)) == CONST_INT
10401	  && INTVAL (XEXP (index, 1))
10402	     == (HOST_WIDE_INT) GET_MODE_SIZE (mode)
10403	  && borx_reg_operand (base, Pmode))
10404	return true;
10405
10406      return false;
10407    }
10408
10409  if (GET_CODE (x) == LO_SUM)
10410    {
10411      rtx y = XEXP (x, 0);
10412
10413      if (GET_CODE (y) == SUBREG)
10414	y = SUBREG_REG (y);
10415
10416      if (REG_P (y)
10417	  && (strict ? STRICT_REG_OK_FOR_BASE_P (y)
10418		     : REG_OK_FOR_BASE_P (y)))
10419	{
10420	  /* Needed for -fPIC */
10421	  if (mode == Pmode
10422	      && GET_CODE (XEXP (x, 1)) == UNSPEC)
10423	    return true;
10424
10425	  if (!INT14_OK_STRICT
10426	      && (strict || !(reload_in_progress || reload_completed))
10427	      && mode != QImode
10428	      && mode != HImode)
10429	    return false;
10430
10431	  if (CONSTANT_P (XEXP (x, 1)))
10432	    return true;
10433	}
10434      return false;
10435    }
10436
10437  if (GET_CODE (x) == CONST_INT && INT_5_BITS (x))
10438    return true;
10439
10440  return false;
10441}
10442
10443/* Look for machine dependent ways to make the invalid address AD a
10444   valid address.
10445
10446   For the PA, transform:
10447
10448        memory(X + <large int>)
10449
10450   into:
10451
10452        if (<large int> & mask) >= 16
10453          Y = (<large int> & ~mask) + mask + 1  Round up.
10454        else
10455          Y = (<large int> & ~mask)             Round down.
10456        Z = X + Y
10457        memory (Z + (<large int> - Y));
10458
10459   This makes reload inheritance and reload_cse work better since Z
10460   can be reused.
10461
10462   There may be more opportunities to improve code with this hook.  */
10463
10464rtx
10465pa_legitimize_reload_address (rtx ad, machine_mode mode,
10466			      int opnum, int type,
10467			      int ind_levels ATTRIBUTE_UNUSED)
10468{
10469  long offset, newoffset, mask;
10470  rtx new_rtx, temp = NULL_RTX;
10471
10472  mask = (GET_MODE_CLASS (mode) == MODE_FLOAT
10473	  && !INT14_OK_STRICT ? 0x1f : 0x3fff);
10474
10475  if (optimize && GET_CODE (ad) == PLUS)
10476    temp = simplify_binary_operation (PLUS, Pmode,
10477				      XEXP (ad, 0), XEXP (ad, 1));
10478
10479  new_rtx = temp ? temp : ad;
10480
10481  if (optimize
10482      && GET_CODE (new_rtx) == PLUS
10483      && GET_CODE (XEXP (new_rtx, 0)) == REG
10484      && GET_CODE (XEXP (new_rtx, 1)) == CONST_INT)
10485    {
10486      offset = INTVAL (XEXP ((new_rtx), 1));
10487
10488      /* Choose rounding direction.  Round up if we are >= halfway.  */
10489      if ((offset & mask) >= ((mask + 1) / 2))
10490	newoffset = (offset & ~mask) + mask + 1;
10491      else
10492	newoffset = offset & ~mask;
10493
10494      /* Ensure that long displacements are aligned.  */
10495      if (mask == 0x3fff
10496	  && (GET_MODE_CLASS (mode) == MODE_FLOAT
10497	      || (TARGET_64BIT && (mode) == DImode)))
10498	newoffset &= ~(GET_MODE_SIZE (mode) - 1);
10499
10500      if (newoffset != 0 && VAL_14_BITS_P (newoffset))
10501	{
10502	  temp = gen_rtx_PLUS (Pmode, XEXP (new_rtx, 0),
10503			       GEN_INT (newoffset));
10504	  ad = gen_rtx_PLUS (Pmode, temp, GEN_INT (offset - newoffset));
10505	  push_reload (XEXP (ad, 0), 0, &XEXP (ad, 0), 0,
10506		       BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
10507		       opnum, (enum reload_type) type);
10508	  return ad;
10509	}
10510    }
10511
10512  return NULL_RTX;
10513}
10514
10515/* Output address vector.  */
10516
10517void
10518pa_output_addr_vec (rtx lab, rtx body)
10519{
10520  int idx, vlen = XVECLEN (body, 0);
10521
10522  targetm.asm_out.internal_label (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
10523  if (TARGET_GAS)
10524    fputs ("\t.begin_brtab\n", asm_out_file);
10525  for (idx = 0; idx < vlen; idx++)
10526    {
10527      ASM_OUTPUT_ADDR_VEC_ELT
10528	(asm_out_file, CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 0, idx), 0)));
10529    }
10530  if (TARGET_GAS)
10531    fputs ("\t.end_brtab\n", asm_out_file);
10532}
10533
10534/* Output address difference vector.  */
10535
10536void
10537pa_output_addr_diff_vec (rtx lab, rtx body)
10538{
10539  rtx base = XEXP (XEXP (body, 0), 0);
10540  int idx, vlen = XVECLEN (body, 1);
10541
10542  targetm.asm_out.internal_label (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
10543  if (TARGET_GAS)
10544    fputs ("\t.begin_brtab\n", asm_out_file);
10545  for (idx = 0; idx < vlen; idx++)
10546    {
10547      ASM_OUTPUT_ADDR_DIFF_ELT
10548	(asm_out_file,
10549	 body,
10550	 CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 1, idx), 0)),
10551	 CODE_LABEL_NUMBER (base));
10552    }
10553  if (TARGET_GAS)
10554    fputs ("\t.end_brtab\n", asm_out_file);
10555}
10556
10557/* This is a helper function for the other atomic operations.  This function
10558   emits a loop that contains SEQ that iterates until a compare-and-swap
10559   operation at the end succeeds.  MEM is the memory to be modified.  SEQ is
10560   a set of instructions that takes a value from OLD_REG as an input and
10561   produces a value in NEW_REG as an output.  Before SEQ, OLD_REG will be
10562   set to the current contents of MEM.  After SEQ, a compare-and-swap will
10563   attempt to update MEM with NEW_REG.  The function returns true when the
10564   loop was generated successfully.  */
10565
10566static bool
10567pa_expand_compare_and_swap_loop (rtx mem, rtx old_reg, rtx new_reg, rtx seq)
10568{
10569  machine_mode mode = GET_MODE (mem);
10570  rtx_code_label *label;
10571  rtx cmp_reg, success, oldval;
10572
10573  /* The loop we want to generate looks like
10574
10575        cmp_reg = mem;
10576      label:
10577        old_reg = cmp_reg;
10578        seq;
10579        (success, cmp_reg) = compare-and-swap(mem, old_reg, new_reg)
10580        if (success)
10581          goto label;
10582
10583     Note that we only do the plain load from memory once.  Subsequent
10584     iterations use the value loaded by the compare-and-swap pattern.  */
10585
10586  label = gen_label_rtx ();
10587  cmp_reg = gen_reg_rtx (mode);
10588
10589  emit_move_insn (cmp_reg, mem);
10590  emit_label (label);
10591  emit_move_insn (old_reg, cmp_reg);
10592  if (seq)
10593    emit_insn (seq);
10594
10595  success = NULL_RTX;
10596  oldval = cmp_reg;
10597  if (!expand_atomic_compare_and_swap (&success, &oldval, mem, old_reg,
10598                                       new_reg, false, MEMMODEL_SYNC_SEQ_CST,
10599                                       MEMMODEL_RELAXED))
10600    return false;
10601
10602  if (oldval != cmp_reg)
10603    emit_move_insn (cmp_reg, oldval);
10604
10605  /* Mark this jump predicted not taken.  */
10606  emit_cmp_and_jump_insns (success, const0_rtx, EQ, const0_rtx,
10607                           GET_MODE (success), 1, label, 0);
10608  return true;
10609}
10610
10611/* This function tries to implement an atomic exchange operation using a
10612   compare_and_swap loop. VAL is written to *MEM.  The previous contents of
10613   *MEM are returned, using TARGET if possible.  No memory model is required
10614   since a compare_and_swap loop is seq-cst.  */
10615
10616rtx
10617pa_maybe_emit_compare_and_swap_exchange_loop (rtx target, rtx mem, rtx val)
10618{
10619  machine_mode mode = GET_MODE (mem);
10620
10621  if (can_compare_and_swap_p (mode, true))
10622    {
10623      if (!target || !register_operand (target, mode))
10624        target = gen_reg_rtx (mode);
10625      if (pa_expand_compare_and_swap_loop (mem, target, val, NULL_RTX))
10626        return target;
10627    }
10628
10629  return NULL_RTX;
10630}
10631
10632#include "gt-pa.h"
10633