1/* Output routines for GCC for ARM.
2   Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3   2002, 2003, 2004, 2005, 2006  Free Software Foundation, Inc.
4   Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
5   and Martin Simmons (@harleqn.co.uk).
6   More major hacks by Richard Earnshaw (rearnsha@arm.com).
7
8   This file is part of GCC.
9
10   GCC is free software; you can redistribute it and/or modify it
11   under the terms of the GNU General Public License as published
12   by the Free Software Foundation; either version 2, or (at your
13   option) any later version.
14
15   GCC is distributed in the hope that it will be useful, but WITHOUT
16   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
17   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
18   License for more details.
19
20   You should have received a copy of the GNU General Public License
21   along with GCC; see the file COPYING.  If not, write to
22   the Free Software Foundation, 51 Franklin Street, Fifth Floor,
23   Boston, MA 02110-1301, USA.  */
24
25#include "config.h"
26#include "system.h"
27#include "coretypes.h"
28#include "tm.h"
29#include "rtl.h"
30#include "tree.h"
31#include "obstack.h"
32#include "regs.h"
33#include "hard-reg-set.h"
34#include "real.h"
35#include "insn-config.h"
36#include "conditions.h"
37#include "output.h"
38#include "insn-attr.h"
39#include "flags.h"
40#include "reload.h"
41#include "function.h"
42#include "expr.h"
43#include "optabs.h"
44#include "toplev.h"
45#include "recog.h"
46#include "ggc.h"
47#include "except.h"
48#include "c-pragma.h"
49#include "integrate.h"
50#include "tm_p.h"
51#include "target.h"
52#include "target-def.h"
53#include "debug.h"
54#include "langhooks.h"
55
56/* Forward definitions of types.  */
57typedef struct minipool_node    Mnode;
58typedef struct minipool_fixup   Mfix;
59
60const struct attribute_spec arm_attribute_table[];
61
62/* Forward function declarations.  */
63static arm_stack_offsets *arm_get_frame_offsets (void);
64static void arm_add_gc_roots (void);
65static int arm_gen_constant (enum rtx_code, enum machine_mode, rtx,
66			     HOST_WIDE_INT, rtx, rtx, int, int);
67static unsigned bit_count (unsigned long);
68static int arm_address_register_rtx_p (rtx, int);
69static int arm_legitimate_index_p (enum machine_mode, rtx, RTX_CODE, int);
70static int thumb_base_register_rtx_p (rtx, enum machine_mode, int);
71inline static int thumb_index_register_rtx_p (rtx, int);
72static int thumb_far_jump_used_p (void);
73static bool thumb_force_lr_save (void);
74static int const_ok_for_op (HOST_WIDE_INT, enum rtx_code);
75static rtx emit_sfm (int, int);
76static int arm_size_return_regs (void);
77#ifndef AOF_ASSEMBLER
78static bool arm_assemble_integer (rtx, unsigned int, int);
79#endif
80static const char *fp_const_from_val (REAL_VALUE_TYPE *);
81static arm_cc get_arm_condition_code (rtx);
82static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
83static rtx is_jump_table (rtx);
84static const char *output_multi_immediate (rtx *, const char *, const char *,
85					   int, HOST_WIDE_INT);
86static const char *shift_op (rtx, HOST_WIDE_INT *);
87static struct machine_function *arm_init_machine_status (void);
88static void thumb_exit (FILE *, int);
89static rtx is_jump_table (rtx);
90static HOST_WIDE_INT get_jump_table_size (rtx);
91static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
92static Mnode *add_minipool_forward_ref (Mfix *);
93static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
94static Mnode *add_minipool_backward_ref (Mfix *);
95static void assign_minipool_offsets (Mfix *);
96static void arm_print_value (FILE *, rtx);
97static void dump_minipool (rtx);
98static int arm_barrier_cost (rtx);
99static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
100static void push_minipool_barrier (rtx, HOST_WIDE_INT);
101static void push_minipool_fix (rtx, HOST_WIDE_INT, rtx *, enum machine_mode,
102			       rtx);
103static void arm_reorg (void);
104static bool note_invalid_constants (rtx, HOST_WIDE_INT, int);
105static int current_file_function_operand (rtx);
106static unsigned long arm_compute_save_reg0_reg12_mask (void);
107static unsigned long arm_compute_save_reg_mask (void);
108static unsigned long arm_isr_value (tree);
109static unsigned long arm_compute_func_type (void);
110static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
111static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
112#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
113static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
114#endif
115static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
116static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
117static void thumb_output_function_prologue (FILE *, HOST_WIDE_INT);
118static int arm_comp_type_attributes (tree, tree);
119static void arm_set_default_type_attributes (tree);
120static int arm_adjust_cost (rtx, rtx, rtx, int);
121static int count_insns_for_constant (HOST_WIDE_INT, int);
122static int arm_get_strip_length (int);
123static bool arm_function_ok_for_sibcall (tree, tree);
124static void arm_internal_label (FILE *, const char *, unsigned long);
125static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
126				 tree);
127static int arm_rtx_costs_1 (rtx, enum rtx_code, enum rtx_code);
128static bool arm_size_rtx_costs (rtx, int, int, int *);
129static bool arm_slowmul_rtx_costs (rtx, int, int, int *);
130static bool arm_fastmul_rtx_costs (rtx, int, int, int *);
131static bool arm_xscale_rtx_costs (rtx, int, int, int *);
132static bool arm_9e_rtx_costs (rtx, int, int, int *);
133static int arm_address_cost (rtx);
134static bool arm_memory_load_p (rtx);
135static bool arm_cirrus_insn_p (rtx);
136static void cirrus_reorg (rtx);
137static void arm_init_builtins (void);
138static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
139static void arm_init_iwmmxt_builtins (void);
140static rtx safe_vector_operand (rtx, enum machine_mode);
141static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx);
142static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int);
143static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
144static void emit_constant_insn (rtx cond, rtx pattern);
145static rtx emit_set_insn (rtx, rtx);
146static int arm_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
147				  tree, bool);
148
149#ifdef OBJECT_FORMAT_ELF
150static void arm_elf_asm_constructor (rtx, int);
151#endif
152#ifndef ARM_PE
153static void arm_encode_section_info (tree, rtx, int);
154#endif
155
156static void arm_file_end (void);
157static void arm_file_start (void);
158
159#ifdef AOF_ASSEMBLER
160static void aof_globalize_label (FILE *, const char *);
161static void aof_dump_imports (FILE *);
162static void aof_dump_pic_table (FILE *);
163static void aof_file_start (void);
164static void aof_file_end (void);
165static void aof_asm_init_sections (void);
166#endif
167static void arm_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
168					tree, int *, int);
169static bool arm_pass_by_reference (CUMULATIVE_ARGS *,
170				   enum machine_mode, tree, bool);
171static bool arm_promote_prototypes (tree);
172static bool arm_default_short_enums (void);
173static bool arm_align_anon_bitfield (void);
174static bool arm_return_in_msb (tree);
175static bool arm_must_pass_in_stack (enum machine_mode, tree);
176#ifdef TARGET_UNWIND_INFO
177static void arm_unwind_emit (FILE *, rtx);
178static bool arm_output_ttype (rtx);
179#endif
180
181static tree arm_cxx_guard_type (void);
182static bool arm_cxx_guard_mask_bit (void);
183static tree arm_get_cookie_size (tree);
184static bool arm_cookie_has_size (void);
185static bool arm_cxx_cdtor_returns_this (void);
186static bool arm_cxx_key_method_may_be_inline (void);
187static void arm_cxx_determine_class_data_visibility (tree);
188static bool arm_cxx_class_data_always_comdat (void);
189static bool arm_cxx_use_aeabi_atexit (void);
190static void arm_init_libfuncs (void);
191static bool arm_handle_option (size_t, const char *, int);
192static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode);
193static bool arm_cannot_copy_insn_p (rtx);
194static bool arm_tls_symbol_p (rtx x);
195
196
197/* Initialize the GCC target structure.  */
198#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
199#undef  TARGET_MERGE_DECL_ATTRIBUTES
200#define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
201#endif
202
203#undef  TARGET_ATTRIBUTE_TABLE
204#define TARGET_ATTRIBUTE_TABLE arm_attribute_table
205
206#undef TARGET_ASM_FILE_START
207#define TARGET_ASM_FILE_START arm_file_start
208
209#undef TARGET_ASM_FILE_END
210#define TARGET_ASM_FILE_END arm_file_end
211
212#ifdef AOF_ASSEMBLER
213#undef  TARGET_ASM_BYTE_OP
214#define TARGET_ASM_BYTE_OP "\tDCB\t"
215#undef  TARGET_ASM_ALIGNED_HI_OP
216#define TARGET_ASM_ALIGNED_HI_OP "\tDCW\t"
217#undef  TARGET_ASM_ALIGNED_SI_OP
218#define TARGET_ASM_ALIGNED_SI_OP "\tDCD\t"
219#undef TARGET_ASM_GLOBALIZE_LABEL
220#define TARGET_ASM_GLOBALIZE_LABEL aof_globalize_label
221#undef TARGET_ASM_FILE_START
222#define TARGET_ASM_FILE_START aof_file_start
223#undef TARGET_ASM_FILE_END
224#define TARGET_ASM_FILE_END aof_file_end
225#else
226#undef  TARGET_ASM_ALIGNED_SI_OP
227#define TARGET_ASM_ALIGNED_SI_OP NULL
228#undef  TARGET_ASM_INTEGER
229#define TARGET_ASM_INTEGER arm_assemble_integer
230#endif
231
232#undef  TARGET_ASM_FUNCTION_PROLOGUE
233#define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
234
235#undef  TARGET_ASM_FUNCTION_EPILOGUE
236#define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
237
238#undef  TARGET_DEFAULT_TARGET_FLAGS
239#define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | MASK_SCHED_PROLOG)
240#undef  TARGET_HANDLE_OPTION
241#define TARGET_HANDLE_OPTION arm_handle_option
242
243#undef  TARGET_COMP_TYPE_ATTRIBUTES
244#define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
245
246#undef  TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
247#define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
248
249#undef  TARGET_SCHED_ADJUST_COST
250#define TARGET_SCHED_ADJUST_COST arm_adjust_cost
251
252#undef TARGET_ENCODE_SECTION_INFO
253#ifdef ARM_PE
254#define TARGET_ENCODE_SECTION_INFO  arm_pe_encode_section_info
255#else
256#define TARGET_ENCODE_SECTION_INFO  arm_encode_section_info
257#endif
258
259#undef  TARGET_STRIP_NAME_ENCODING
260#define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
261
262#undef  TARGET_ASM_INTERNAL_LABEL
263#define TARGET_ASM_INTERNAL_LABEL arm_internal_label
264
265#undef  TARGET_FUNCTION_OK_FOR_SIBCALL
266#define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
267
268#undef  TARGET_ASM_OUTPUT_MI_THUNK
269#define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
270#undef  TARGET_ASM_CAN_OUTPUT_MI_THUNK
271#define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
272
273/* This will be overridden in arm_override_options.  */
274#undef  TARGET_RTX_COSTS
275#define TARGET_RTX_COSTS arm_slowmul_rtx_costs
276#undef  TARGET_ADDRESS_COST
277#define TARGET_ADDRESS_COST arm_address_cost
278
279#undef TARGET_SHIFT_TRUNCATION_MASK
280#define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
281#undef TARGET_VECTOR_MODE_SUPPORTED_P
282#define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
283
284#undef  TARGET_MACHINE_DEPENDENT_REORG
285#define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
286
287#undef  TARGET_INIT_BUILTINS
288#define TARGET_INIT_BUILTINS  arm_init_builtins
289#undef  TARGET_EXPAND_BUILTIN
290#define TARGET_EXPAND_BUILTIN arm_expand_builtin
291
292#undef TARGET_INIT_LIBFUNCS
293#define TARGET_INIT_LIBFUNCS arm_init_libfuncs
294
295#undef TARGET_PROMOTE_FUNCTION_ARGS
296#define TARGET_PROMOTE_FUNCTION_ARGS hook_bool_tree_true
297#undef TARGET_PROMOTE_FUNCTION_RETURN
298#define TARGET_PROMOTE_FUNCTION_RETURN hook_bool_tree_true
299#undef TARGET_PROMOTE_PROTOTYPES
300#define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
301#undef TARGET_PASS_BY_REFERENCE
302#define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
303#undef TARGET_ARG_PARTIAL_BYTES
304#define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
305
306#undef  TARGET_SETUP_INCOMING_VARARGS
307#define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
308
309#undef TARGET_DEFAULT_SHORT_ENUMS
310#define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
311
312#undef TARGET_ALIGN_ANON_BITFIELD
313#define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
314
315#undef TARGET_NARROW_VOLATILE_BITFIELD
316#define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
317
318#undef TARGET_CXX_GUARD_TYPE
319#define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
320
321#undef TARGET_CXX_GUARD_MASK_BIT
322#define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
323
324#undef TARGET_CXX_GET_COOKIE_SIZE
325#define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
326
327#undef TARGET_CXX_COOKIE_HAS_SIZE
328#define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
329
330#undef TARGET_CXX_CDTOR_RETURNS_THIS
331#define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
332
333#undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
334#define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
335
336#undef TARGET_CXX_USE_AEABI_ATEXIT
337#define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
338
339#undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
340#define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
341  arm_cxx_determine_class_data_visibility
342
343#undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
344#define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
345
346#undef TARGET_RETURN_IN_MSB
347#define TARGET_RETURN_IN_MSB arm_return_in_msb
348
349#undef TARGET_MUST_PASS_IN_STACK
350#define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
351
352#ifdef TARGET_UNWIND_INFO
353#undef TARGET_UNWIND_EMIT
354#define TARGET_UNWIND_EMIT arm_unwind_emit
355
356/* EABI unwinding tables use a different format for the typeinfo tables.  */
357#undef TARGET_ASM_TTYPE
358#define TARGET_ASM_TTYPE arm_output_ttype
359
360#undef TARGET_ARM_EABI_UNWINDER
361#define TARGET_ARM_EABI_UNWINDER true
362#endif /* TARGET_UNWIND_INFO */
363
364#undef  TARGET_CANNOT_COPY_INSN_P
365#define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
366
367#ifdef HAVE_AS_TLS
368#undef TARGET_HAVE_TLS
369#define TARGET_HAVE_TLS true
370#endif
371
372#undef TARGET_CANNOT_FORCE_CONST_MEM
373#define TARGET_CANNOT_FORCE_CONST_MEM arm_tls_referenced_p
374
375struct gcc_target targetm = TARGET_INITIALIZER;
376
377/* Obstack for minipool constant handling.  */
378static struct obstack minipool_obstack;
379static char *         minipool_startobj;
380
381/* The maximum number of insns skipped which
382   will be conditionalised if possible.  */
383static int max_insns_skipped = 5;
384
385extern FILE * asm_out_file;
386
387/* True if we are currently building a constant table.  */
388int making_const_table;
389
390/* Define the information needed to generate branch insns.  This is
391   stored from the compare operation.  */
392rtx arm_compare_op0, arm_compare_op1;
393
394/* The processor for which instructions should be scheduled.  */
395enum processor_type arm_tune = arm_none;
396
397/* The default processor used if not overriden by commandline.  */
398static enum processor_type arm_default_cpu = arm_none;
399
400/* Which floating point model to use.  */
401enum arm_fp_model arm_fp_model;
402
403/* Which floating point hardware is available.  */
404enum fputype arm_fpu_arch;
405
406/* Which floating point hardware to schedule for.  */
407enum fputype arm_fpu_tune;
408
409/* Whether to use floating point hardware.  */
410enum float_abi_type arm_float_abi;
411
412/* Which ABI to use.  */
413enum arm_abi_type arm_abi;
414
415/* Which thread pointer model to use.  */
416enum arm_tp_type target_thread_pointer = TP_AUTO;
417
418/* Used to parse -mstructure_size_boundary command line option.  */
419int    arm_structure_size_boundary = DEFAULT_STRUCTURE_SIZE_BOUNDARY;
420
421/* Used for Thumb call_via trampolines.  */
422rtx thumb_call_via_label[14];
423static int thumb_call_reg_needed;
424
425/* Bit values used to identify processor capabilities.  */
426#define FL_CO_PROC    (1 << 0)        /* Has external co-processor bus */
427#define FL_ARCH3M     (1 << 1)        /* Extended multiply */
428#define FL_MODE26     (1 << 2)        /* 26-bit mode support */
429#define FL_MODE32     (1 << 3)        /* 32-bit mode support */
430#define FL_ARCH4      (1 << 4)        /* Architecture rel 4 */
431#define FL_ARCH5      (1 << 5)        /* Architecture rel 5 */
432#define FL_THUMB      (1 << 6)        /* Thumb aware */
433#define FL_LDSCHED    (1 << 7)	      /* Load scheduling necessary */
434#define FL_STRONG     (1 << 8)	      /* StrongARM */
435#define FL_ARCH5E     (1 << 9)        /* DSP extensions to v5 */
436#define FL_XSCALE     (1 << 10)	      /* XScale */
437#define FL_CIRRUS     (1 << 11)	      /* Cirrus/DSP.  */
438#define FL_ARCH6      (1 << 12)       /* Architecture rel 6.  Adds
439					 media instructions.  */
440#define FL_VFPV2      (1 << 13)       /* Vector Floating Point V2.  */
441#define FL_WBUF	      (1 << 14)	      /* Schedule for write buffer ops.
442					 Note: ARM6 & 7 derivatives only.  */
443#define FL_ARCH6K     (1 << 15)       /* Architecture rel 6 K extensions.  */
444
445#define FL_IWMMXT     (1 << 29)	      /* XScale v2 or "Intel Wireless MMX technology".  */
446
447#define FL_FOR_ARCH2	0
448#define FL_FOR_ARCH3	FL_MODE32
449#define FL_FOR_ARCH3M	(FL_FOR_ARCH3 | FL_ARCH3M)
450#define FL_FOR_ARCH4	(FL_FOR_ARCH3M | FL_ARCH4)
451#define FL_FOR_ARCH4T	(FL_FOR_ARCH4 | FL_THUMB)
452#define FL_FOR_ARCH5	(FL_FOR_ARCH4 | FL_ARCH5)
453#define FL_FOR_ARCH5T	(FL_FOR_ARCH5 | FL_THUMB)
454#define FL_FOR_ARCH5E	(FL_FOR_ARCH5 | FL_ARCH5E)
455#define FL_FOR_ARCH5TE	(FL_FOR_ARCH5E | FL_THUMB)
456#define FL_FOR_ARCH5TEJ	FL_FOR_ARCH5TE
457#define FL_FOR_ARCH6	(FL_FOR_ARCH5TE | FL_ARCH6)
458#define FL_FOR_ARCH6J	FL_FOR_ARCH6
459#define FL_FOR_ARCH6K	(FL_FOR_ARCH6 | FL_ARCH6K)
460#define FL_FOR_ARCH6Z	FL_FOR_ARCH6
461#define FL_FOR_ARCH6ZK	FL_FOR_ARCH6K
462
463/* The bits in this mask specify which
464   instructions we are allowed to generate.  */
465static unsigned long insn_flags = 0;
466
467/* The bits in this mask specify which instruction scheduling options should
468   be used.  */
469static unsigned long tune_flags = 0;
470
471/* The following are used in the arm.md file as equivalents to bits
472   in the above two flag variables.  */
473
474/* Nonzero if this chip supports the ARM Architecture 3M extensions.  */
475int arm_arch3m = 0;
476
477/* Nonzero if this chip supports the ARM Architecture 4 extensions.  */
478int arm_arch4 = 0;
479
480/* Nonzero if this chip supports the ARM Architecture 4t extensions.  */
481int arm_arch4t = 0;
482
483/* Nonzero if this chip supports the ARM Architecture 5 extensions.  */
484int arm_arch5 = 0;
485
486/* Nonzero if this chip supports the ARM Architecture 5E extensions.  */
487int arm_arch5e = 0;
488
489/* Nonzero if this chip supports the ARM Architecture 6 extensions.  */
490int arm_arch6 = 0;
491
492/* Nonzero if this chip supports the ARM 6K extensions.  */
493int arm_arch6k = 0;
494
495/* Nonzero if this chip can benefit from load scheduling.  */
496int arm_ld_sched = 0;
497
498/* Nonzero if this chip is a StrongARM.  */
499int arm_tune_strongarm = 0;
500
501/* Nonzero if this chip is a Cirrus variant.  */
502int arm_arch_cirrus = 0;
503
504/* Nonzero if this chip supports Intel Wireless MMX technology.  */
505int arm_arch_iwmmxt = 0;
506
507/* Nonzero if this chip is an XScale.  */
508int arm_arch_xscale = 0;
509
510/* Nonzero if tuning for XScale  */
511int arm_tune_xscale = 0;
512
513/* Nonzero if we want to tune for stores that access the write-buffer.
514   This typically means an ARM6 or ARM7 with MMU or MPU.  */
515int arm_tune_wbuf = 0;
516
517/* Nonzero if generating Thumb instructions.  */
518int thumb_code = 0;
519
520/* Nonzero if we should define __THUMB_INTERWORK__ in the
521   preprocessor.
522   XXX This is a bit of a hack, it's intended to help work around
523   problems in GLD which doesn't understand that armv5t code is
524   interworking clean.  */
525int arm_cpp_interwork = 0;
526
527/* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference, we
528   must report the mode of the memory reference from PRINT_OPERAND to
529   PRINT_OPERAND_ADDRESS.  */
530enum machine_mode output_memory_reference_mode;
531
532/* The register number to be used for the PIC offset register.  */
533unsigned arm_pic_register = INVALID_REGNUM;
534
535/* Set to 1 when a return insn is output, this means that the epilogue
536   is not needed.  */
537int return_used_this_function;
538
539/* Set to 1 after arm_reorg has started.  Reset to start at the start of
540   the next function.  */
541static int after_arm_reorg = 0;
542
543/* The maximum number of insns to be used when loading a constant.  */
544static int arm_constant_limit = 3;
545
546/* For an explanation of these variables, see final_prescan_insn below.  */
547int arm_ccfsm_state;
548enum arm_cond_code arm_current_cc;
549rtx arm_target_insn;
550int arm_target_label;
551
552/* The condition codes of the ARM, and the inverse function.  */
553static const char * const arm_condition_codes[] =
554{
555  "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
556  "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
557};
558
559#define streq(string1, string2) (strcmp (string1, string2) == 0)
560
561/* Initialization code.  */
562
563struct processors
564{
565  const char *const name;
566  enum processor_type core;
567  const char *arch;
568  const unsigned long flags;
569  bool (* rtx_costs) (rtx, int, int, int *);
570};
571
572/* Not all of these give usefully different compilation alternatives,
573   but there is no simple way of generalizing them.  */
574static const struct processors all_cores[] =
575{
576  /* ARM Cores */
577#define ARM_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
578  {NAME, arm_none, #ARCH, FLAGS | FL_FOR_ARCH##ARCH, arm_##COSTS##_rtx_costs},
579#include "arm-cores.def"
580#undef ARM_CORE
581  {NULL, arm_none, NULL, 0, NULL}
582};
583
584static const struct processors all_architectures[] =
585{
586  /* ARM Architectures */
587  /* We don't specify rtx_costs here as it will be figured out
588     from the core.  */
589
590  {"armv2",   arm2,       "2",   FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL},
591  {"armv2a",  arm2,       "2",   FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL},
592  {"armv3",   arm6,       "3",   FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3, NULL},
593  {"armv3m",  arm7m,      "3M",  FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3M, NULL},
594  {"armv4",   arm7tdmi,   "4",   FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH4, NULL},
595  /* Strictly, FL_MODE26 is a permitted option for v4t, but there are no
596     implementations that support it, so we will leave it out for now.  */
597  {"armv4t",  arm7tdmi,   "4T",  FL_CO_PROC |             FL_FOR_ARCH4T, NULL},
598  {"armv5",   arm10tdmi,  "5",   FL_CO_PROC |             FL_FOR_ARCH5, NULL},
599  {"armv5t",  arm10tdmi,  "5T",  FL_CO_PROC |             FL_FOR_ARCH5T, NULL},
600  {"armv5e",  arm1026ejs, "5E",  FL_CO_PROC |             FL_FOR_ARCH5E, NULL},
601  {"armv5te", arm1026ejs, "5TE", FL_CO_PROC |             FL_FOR_ARCH5TE, NULL},
602  {"armv6",   arm1136js,  "6",   FL_CO_PROC |             FL_FOR_ARCH6, NULL},
603  {"armv6j",  arm1136js,  "6J",  FL_CO_PROC |             FL_FOR_ARCH6J, NULL},
604  {"armv6k",  mpcore,	  "6K",  FL_CO_PROC |             FL_FOR_ARCH6K, NULL},
605  {"armv6z",  arm1176jzs, "6Z",  FL_CO_PROC |             FL_FOR_ARCH6Z, NULL},
606  {"armv6zk", arm1176jzs, "6ZK", FL_CO_PROC |             FL_FOR_ARCH6ZK, NULL},
607  {"ep9312",  ep9312,     "4T",  FL_LDSCHED | FL_CIRRUS | FL_FOR_ARCH4, NULL},
608  {"iwmmxt",  iwmmxt,     "5TE", FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT , NULL},
609  {NULL, arm_none, NULL, 0 , NULL}
610};
611
612struct arm_cpu_select
613{
614  const char *              string;
615  const char *              name;
616  const struct processors * processors;
617};
618
619/* This is a magic structure.  The 'string' field is magically filled in
620   with a pointer to the value specified by the user on the command line
621   assuming that the user has specified such a value.  */
622
623static struct arm_cpu_select arm_select[] =
624{
625  /* string	  name            processors  */
626  { NULL,	"-mcpu=",	all_cores  },
627  { NULL,	"-march=",	all_architectures },
628  { NULL,	"-mtune=",	all_cores }
629};
630
631/* Defines representing the indexes into the above table.  */
632#define ARM_OPT_SET_CPU 0
633#define ARM_OPT_SET_ARCH 1
634#define ARM_OPT_SET_TUNE 2
635
636/* The name of the preprocessor macro to define for this architecture.  */
637
638char arm_arch_name[] = "__ARM_ARCH_0UNK__";
639
640struct fpu_desc
641{
642  const char * name;
643  enum fputype fpu;
644};
645
646
647/* Available values for -mfpu=.  */
648
649static const struct fpu_desc all_fpus[] =
650{
651  {"fpa",	FPUTYPE_FPA},
652  {"fpe2",	FPUTYPE_FPA_EMU2},
653  {"fpe3",	FPUTYPE_FPA_EMU2},
654  {"maverick",	FPUTYPE_MAVERICK},
655  {"vfp",	FPUTYPE_VFP}
656};
657
658
659/* Floating point models used by the different hardware.
660   See fputype in arm.h.  */
661
662static const enum fputype fp_model_for_fpu[] =
663{
664  /* No FP hardware.  */
665  ARM_FP_MODEL_UNKNOWN,		/* FPUTYPE_NONE  */
666  ARM_FP_MODEL_FPA,		/* FPUTYPE_FPA  */
667  ARM_FP_MODEL_FPA,		/* FPUTYPE_FPA_EMU2  */
668  ARM_FP_MODEL_FPA,		/* FPUTYPE_FPA_EMU3  */
669  ARM_FP_MODEL_MAVERICK,	/* FPUTYPE_MAVERICK  */
670  ARM_FP_MODEL_VFP		/* FPUTYPE_VFP  */
671};
672
673
674struct float_abi
675{
676  const char * name;
677  enum float_abi_type abi_type;
678};
679
680
681/* Available values for -mfloat-abi=.  */
682
683static const struct float_abi all_float_abis[] =
684{
685  {"soft",	ARM_FLOAT_ABI_SOFT},
686  {"softfp",	ARM_FLOAT_ABI_SOFTFP},
687  {"hard",	ARM_FLOAT_ABI_HARD}
688};
689
690
691struct abi_name
692{
693  const char *name;
694  enum arm_abi_type abi_type;
695};
696
697
698/* Available values for -mabi=.  */
699
700static const struct abi_name arm_all_abis[] =
701{
702  {"apcs-gnu",    ARM_ABI_APCS},
703  {"atpcs",   ARM_ABI_ATPCS},
704  {"aapcs",   ARM_ABI_AAPCS},
705  {"iwmmxt",  ARM_ABI_IWMMXT},
706  {"aapcs-linux",   ARM_ABI_AAPCS_LINUX}
707};
708
709/* Supported TLS relocations.  */
710
711enum tls_reloc {
712  TLS_GD32,
713  TLS_LDM32,
714  TLS_LDO32,
715  TLS_IE32,
716  TLS_LE32
717};
718
719/* Emit an insn that's a simple single-set.  Both the operands must be known
720   to be valid.  */
721inline static rtx
722emit_set_insn (rtx x, rtx y)
723{
724  return emit_insn (gen_rtx_SET (VOIDmode, x, y));
725}
726
727/* Return the number of bits set in VALUE.  */
728static unsigned
729bit_count (unsigned long value)
730{
731  unsigned long count = 0;
732
733  while (value)
734    {
735      count++;
736      value &= value - 1;  /* Clear the least-significant set bit.  */
737    }
738
739  return count;
740}
741
742/* Set up library functions unique to ARM.  */
743
744static void
745arm_init_libfuncs (void)
746{
747  /* There are no special library functions unless we are using the
748     ARM BPABI.  */
749  if (!TARGET_BPABI)
750    return;
751
752  /* The functions below are described in Section 4 of the "Run-Time
753     ABI for the ARM architecture", Version 1.0.  */
754
755  /* Double-precision floating-point arithmetic.  Table 2.  */
756  set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
757  set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
758  set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
759  set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
760  set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
761
762  /* Double-precision comparisons.  Table 3.  */
763  set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
764  set_optab_libfunc (ne_optab, DFmode, NULL);
765  set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
766  set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
767  set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
768  set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
769  set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
770
771  /* Single-precision floating-point arithmetic.  Table 4.  */
772  set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
773  set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
774  set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
775  set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
776  set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
777
778  /* Single-precision comparisons.  Table 5.  */
779  set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
780  set_optab_libfunc (ne_optab, SFmode, NULL);
781  set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
782  set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
783  set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
784  set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
785  set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
786
787  /* Floating-point to integer conversions.  Table 6.  */
788  set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
789  set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
790  set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
791  set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
792  set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
793  set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
794  set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
795  set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
796
797  /* Conversions between floating types.  Table 7.  */
798  set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
799  set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
800
801  /* Integer to floating-point conversions.  Table 8.  */
802  set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
803  set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
804  set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
805  set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
806  set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
807  set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
808  set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
809  set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
810
811  /* Long long.  Table 9.  */
812  set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
813  set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
814  set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
815  set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
816  set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
817  set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
818  set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
819  set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
820
821  /* Integer (32/32->32) division.  \S 4.3.1.  */
822  set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
823  set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
824
825  /* The divmod functions are designed so that they can be used for
826     plain division, even though they return both the quotient and the
827     remainder.  The quotient is returned in the usual location (i.e.,
828     r0 for SImode, {r0, r1} for DImode), just as would be expected
829     for an ordinary division routine.  Because the AAPCS calling
830     conventions specify that all of { r0, r1, r2, r3 } are
831     callee-saved registers, there is no need to tell the compiler
832     explicitly that those registers are clobbered by these
833     routines.  */
834  set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
835  set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
836
837  /* For SImode division the ABI provides div-without-mod routines,
838     which are faster.  */
839  set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
840  set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
841
842  /* We don't have mod libcalls.  Fortunately gcc knows how to use the
843     divmod libcalls instead.  */
844  set_optab_libfunc (smod_optab, DImode, NULL);
845  set_optab_libfunc (umod_optab, DImode, NULL);
846  set_optab_libfunc (smod_optab, SImode, NULL);
847  set_optab_libfunc (umod_optab, SImode, NULL);
848}
849
850/* Implement TARGET_HANDLE_OPTION.  */
851
852static bool
853arm_handle_option (size_t code, const char *arg, int value ATTRIBUTE_UNUSED)
854{
855  switch (code)
856    {
857    case OPT_march_:
858      arm_select[1].string = arg;
859      return true;
860
861    case OPT_mcpu_:
862      arm_select[0].string = arg;
863      return true;
864
865    case OPT_mhard_float:
866      target_float_abi_name = "hard";
867      return true;
868
869    case OPT_msoft_float:
870      target_float_abi_name = "soft";
871      return true;
872
873    case OPT_mtune_:
874      arm_select[2].string = arg;
875      return true;
876
877    default:
878      return true;
879    }
880}
881
882/* Fix up any incompatible options that the user has specified.
883   This has now turned into a maze.  */
884void
885arm_override_options (void)
886{
887  unsigned i;
888  enum processor_type target_arch_cpu = arm_none;
889
890  /* Set up the flags based on the cpu/architecture selected by the user.  */
891  for (i = ARRAY_SIZE (arm_select); i--;)
892    {
893      struct arm_cpu_select * ptr = arm_select + i;
894
895      if (ptr->string != NULL && ptr->string[0] != '\0')
896        {
897	  const struct processors * sel;
898
899          for (sel = ptr->processors; sel->name != NULL; sel++)
900            if (streq (ptr->string, sel->name))
901              {
902		/* Set the architecture define.  */
903		if (i != ARM_OPT_SET_TUNE)
904		  sprintf (arm_arch_name, "__ARM_ARCH_%s__", sel->arch);
905
906		/* Determine the processor core for which we should
907		   tune code-generation.  */
908		if (/* -mcpu= is a sensible default.  */
909		    i == ARM_OPT_SET_CPU
910		    /* -mtune= overrides -mcpu= and -march=.  */
911		    || i == ARM_OPT_SET_TUNE)
912		  arm_tune = (enum processor_type) (sel - ptr->processors);
913
914		/* Remember the CPU associated with this architecture.
915		   If no other option is used to set the CPU type,
916		   we'll use this to guess the most suitable tuning
917		   options.  */
918		if (i == ARM_OPT_SET_ARCH)
919		  target_arch_cpu = sel->core;
920
921		if (i != ARM_OPT_SET_TUNE)
922		  {
923		    /* If we have been given an architecture and a processor
924		       make sure that they are compatible.  We only generate
925		       a warning though, and we prefer the CPU over the
926		       architecture.  */
927		    if (insn_flags != 0 && (insn_flags ^ sel->flags))
928		      warning (0, "switch -mcpu=%s conflicts with -march= switch",
929			       ptr->string);
930
931		    insn_flags = sel->flags;
932		  }
933
934                break;
935              }
936
937          if (sel->name == NULL)
938            error ("bad value (%s) for %s switch", ptr->string, ptr->name);
939        }
940    }
941
942  /* Guess the tuning options from the architecture if necessary.  */
943  if (arm_tune == arm_none)
944    arm_tune = target_arch_cpu;
945
946  /* If the user did not specify a processor, choose one for them.  */
947  if (insn_flags == 0)
948    {
949      const struct processors * sel;
950      unsigned int        sought;
951      enum processor_type cpu;
952
953      cpu = TARGET_CPU_DEFAULT;
954      if (cpu == arm_none)
955	{
956#ifdef SUBTARGET_CPU_DEFAULT
957	  /* Use the subtarget default CPU if none was specified by
958	     configure.  */
959	  cpu = SUBTARGET_CPU_DEFAULT;
960#endif
961	  /* Default to ARM6.  */
962	  if (cpu == arm_none)
963	    cpu = arm6;
964	}
965      sel = &all_cores[cpu];
966
967      insn_flags = sel->flags;
968
969      /* Now check to see if the user has specified some command line
970	 switch that require certain abilities from the cpu.  */
971      sought = 0;
972
973      if (TARGET_INTERWORK || TARGET_THUMB)
974	{
975	  sought |= (FL_THUMB | FL_MODE32);
976
977	  /* There are no ARM processors that support both APCS-26 and
978	     interworking.  Therefore we force FL_MODE26 to be removed
979	     from insn_flags here (if it was set), so that the search
980	     below will always be able to find a compatible processor.  */
981	  insn_flags &= ~FL_MODE26;
982	}
983
984      if (sought != 0 && ((sought & insn_flags) != sought))
985	{
986	  /* Try to locate a CPU type that supports all of the abilities
987	     of the default CPU, plus the extra abilities requested by
988	     the user.  */
989	  for (sel = all_cores; sel->name != NULL; sel++)
990	    if ((sel->flags & sought) == (sought | insn_flags))
991	      break;
992
993	  if (sel->name == NULL)
994	    {
995	      unsigned current_bit_count = 0;
996	      const struct processors * best_fit = NULL;
997
998	      /* Ideally we would like to issue an error message here
999		 saying that it was not possible to find a CPU compatible
1000		 with the default CPU, but which also supports the command
1001		 line options specified by the programmer, and so they
1002		 ought to use the -mcpu=<name> command line option to
1003		 override the default CPU type.
1004
1005		 If we cannot find a cpu that has both the
1006		 characteristics of the default cpu and the given
1007		 command line options we scan the array again looking
1008		 for a best match.  */
1009	      for (sel = all_cores; sel->name != NULL; sel++)
1010		if ((sel->flags & sought) == sought)
1011		  {
1012		    unsigned count;
1013
1014		    count = bit_count (sel->flags & insn_flags);
1015
1016		    if (count >= current_bit_count)
1017		      {
1018			best_fit = sel;
1019			current_bit_count = count;
1020		      }
1021		  }
1022
1023	      gcc_assert (best_fit);
1024	      sel = best_fit;
1025	    }
1026
1027	  insn_flags = sel->flags;
1028	}
1029      sprintf (arm_arch_name, "__ARM_ARCH_%s__", sel->arch);
1030      arm_default_cpu = (enum processor_type) (sel - all_cores);
1031      if (arm_tune == arm_none)
1032	arm_tune = arm_default_cpu;
1033    }
1034
1035  /* The processor for which we should tune should now have been
1036     chosen.  */
1037  gcc_assert (arm_tune != arm_none);
1038
1039  tune_flags = all_cores[(int)arm_tune].flags;
1040  if (optimize_size)
1041    targetm.rtx_costs = arm_size_rtx_costs;
1042  else
1043    targetm.rtx_costs = all_cores[(int)arm_tune].rtx_costs;
1044
1045  /* Make sure that the processor choice does not conflict with any of the
1046     other command line choices.  */
1047  if (TARGET_INTERWORK && !(insn_flags & FL_THUMB))
1048    {
1049      warning (0, "target CPU does not support interworking" );
1050      target_flags &= ~MASK_INTERWORK;
1051    }
1052
1053  if (TARGET_THUMB && !(insn_flags & FL_THUMB))
1054    {
1055      warning (0, "target CPU does not support THUMB instructions");
1056      target_flags &= ~MASK_THUMB;
1057    }
1058
1059  if (TARGET_APCS_FRAME && TARGET_THUMB)
1060    {
1061      /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
1062      target_flags &= ~MASK_APCS_FRAME;
1063    }
1064
1065  /* Callee super interworking implies thumb interworking.  Adding
1066     this to the flags here simplifies the logic elsewhere.  */
1067  if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
1068      target_flags |= MASK_INTERWORK;
1069
1070  /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
1071     from here where no function is being compiled currently.  */
1072  if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
1073    warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
1074
1075  if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
1076    warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
1077
1078  if (TARGET_ARM && TARGET_CALLER_INTERWORKING)
1079    warning (0, "enabling caller interworking support is only meaningful when compiling for the Thumb");
1080
1081  if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
1082    {
1083      warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
1084      target_flags |= MASK_APCS_FRAME;
1085    }
1086
1087  if (TARGET_POKE_FUNCTION_NAME)
1088    target_flags |= MASK_APCS_FRAME;
1089
1090  if (TARGET_APCS_REENT && flag_pic)
1091    error ("-fpic and -mapcs-reent are incompatible");
1092
1093  if (TARGET_APCS_REENT)
1094    warning (0, "APCS reentrant code not supported.  Ignored");
1095
1096  /* If this target is normally configured to use APCS frames, warn if they
1097     are turned off and debugging is turned on.  */
1098  if (TARGET_ARM
1099      && write_symbols != NO_DEBUG
1100      && !TARGET_APCS_FRAME
1101      && (TARGET_DEFAULT & MASK_APCS_FRAME))
1102    warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
1103
1104  /* If stack checking is disabled, we can use r10 as the PIC register,
1105     which keeps r9 available.  */
1106  if (flag_pic && TARGET_SINGLE_PIC_BASE)
1107    arm_pic_register = TARGET_APCS_STACK ? 9 : 10;
1108
1109  if (TARGET_APCS_FLOAT)
1110    warning (0, "passing floating point arguments in fp regs not yet supported");
1111
1112  /* Initialize boolean versions of the flags, for use in the arm.md file.  */
1113  arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
1114  arm_arch4 = (insn_flags & FL_ARCH4) != 0;
1115  arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
1116  arm_arch5 = (insn_flags & FL_ARCH5) != 0;
1117  arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
1118  arm_arch6 = (insn_flags & FL_ARCH6) != 0;
1119  arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
1120  arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
1121  arm_arch_cirrus = (insn_flags & FL_CIRRUS) != 0;
1122
1123  arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
1124  arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
1125  thumb_code = (TARGET_ARM == 0);
1126  arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
1127  arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
1128  arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
1129
1130  /* V5 code we generate is completely interworking capable, so we turn off
1131     TARGET_INTERWORK here to avoid many tests later on.  */
1132
1133  /* XXX However, we must pass the right pre-processor defines to CPP
1134     or GLD can get confused.  This is a hack.  */
1135  if (TARGET_INTERWORK)
1136    arm_cpp_interwork = 1;
1137
1138  if (arm_arch5)
1139    target_flags &= ~MASK_INTERWORK;
1140
1141  if (target_abi_name)
1142    {
1143      for (i = 0; i < ARRAY_SIZE (arm_all_abis); i++)
1144	{
1145	  if (streq (arm_all_abis[i].name, target_abi_name))
1146	    {
1147	      arm_abi = arm_all_abis[i].abi_type;
1148	      break;
1149	    }
1150	}
1151      if (i == ARRAY_SIZE (arm_all_abis))
1152	error ("invalid ABI option: -mabi=%s", target_abi_name);
1153    }
1154  else
1155    arm_abi = ARM_DEFAULT_ABI;
1156
1157  if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
1158    error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
1159
1160  if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
1161    error ("iwmmxt abi requires an iwmmxt capable cpu");
1162
1163  arm_fp_model = ARM_FP_MODEL_UNKNOWN;
1164  if (target_fpu_name == NULL && target_fpe_name != NULL)
1165    {
1166      if (streq (target_fpe_name, "2"))
1167	target_fpu_name = "fpe2";
1168      else if (streq (target_fpe_name, "3"))
1169	target_fpu_name = "fpe3";
1170      else
1171	error ("invalid floating point emulation option: -mfpe=%s",
1172	       target_fpe_name);
1173    }
1174  if (target_fpu_name != NULL)
1175    {
1176      /* The user specified a FPU.  */
1177      for (i = 0; i < ARRAY_SIZE (all_fpus); i++)
1178	{
1179	  if (streq (all_fpus[i].name, target_fpu_name))
1180	    {
1181	      arm_fpu_arch = all_fpus[i].fpu;
1182	      arm_fpu_tune = arm_fpu_arch;
1183	      arm_fp_model = fp_model_for_fpu[arm_fpu_arch];
1184	      break;
1185	    }
1186	}
1187      if (arm_fp_model == ARM_FP_MODEL_UNKNOWN)
1188	error ("invalid floating point option: -mfpu=%s", target_fpu_name);
1189    }
1190  else
1191    {
1192#ifdef FPUTYPE_DEFAULT
1193      /* Use the default if it is specified for this platform.  */
1194      arm_fpu_arch = FPUTYPE_DEFAULT;
1195      arm_fpu_tune = FPUTYPE_DEFAULT;
1196#else
1197      /* Pick one based on CPU type.  */
1198      /* ??? Some targets assume FPA is the default.
1199      if ((insn_flags & FL_VFP) != 0)
1200	arm_fpu_arch = FPUTYPE_VFP;
1201      else
1202      */
1203      if (arm_arch_cirrus)
1204	arm_fpu_arch = FPUTYPE_MAVERICK;
1205      else
1206	arm_fpu_arch = FPUTYPE_FPA_EMU2;
1207#endif
1208      if (tune_flags & FL_CO_PROC && arm_fpu_arch == FPUTYPE_FPA_EMU2)
1209	arm_fpu_tune = FPUTYPE_FPA;
1210      else
1211	arm_fpu_tune = arm_fpu_arch;
1212      arm_fp_model = fp_model_for_fpu[arm_fpu_arch];
1213      gcc_assert (arm_fp_model != ARM_FP_MODEL_UNKNOWN);
1214    }
1215
1216  if (target_float_abi_name != NULL)
1217    {
1218      /* The user specified a FP ABI.  */
1219      for (i = 0; i < ARRAY_SIZE (all_float_abis); i++)
1220	{
1221	  if (streq (all_float_abis[i].name, target_float_abi_name))
1222	    {
1223	      arm_float_abi = all_float_abis[i].abi_type;
1224	      break;
1225	    }
1226	}
1227      if (i == ARRAY_SIZE (all_float_abis))
1228	error ("invalid floating point abi: -mfloat-abi=%s",
1229	       target_float_abi_name);
1230    }
1231  else
1232    arm_float_abi = TARGET_DEFAULT_FLOAT_ABI;
1233
1234  if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
1235    sorry ("-mfloat-abi=hard and VFP");
1236
1237  /* FPA and iWMMXt are incompatible because the insn encodings overlap.
1238     VFP and iWMMXt can theoretically coexist, but it's unlikely such silicon
1239     will ever exist.  GCC makes no attempt to support this combination.  */
1240  if (TARGET_IWMMXT && !TARGET_SOFT_FLOAT)
1241    sorry ("iWMMXt and hardware floating point");
1242
1243  /* If soft-float is specified then don't use FPU.  */
1244  if (TARGET_SOFT_FLOAT)
1245    arm_fpu_arch = FPUTYPE_NONE;
1246
1247  /* For arm2/3 there is no need to do any scheduling if there is only
1248     a floating point emulator, or we are doing software floating-point.  */
1249  if ((TARGET_SOFT_FLOAT
1250       || arm_fpu_tune == FPUTYPE_FPA_EMU2
1251       || arm_fpu_tune == FPUTYPE_FPA_EMU3)
1252      && (tune_flags & FL_MODE32) == 0)
1253    flag_schedule_insns = flag_schedule_insns_after_reload = 0;
1254
1255  if (target_thread_switch)
1256    {
1257      if (strcmp (target_thread_switch, "soft") == 0)
1258	target_thread_pointer = TP_SOFT;
1259      else if (strcmp (target_thread_switch, "auto") == 0)
1260	target_thread_pointer = TP_AUTO;
1261      else if (strcmp (target_thread_switch, "cp15") == 0)
1262	target_thread_pointer = TP_CP15;
1263      else
1264	error ("invalid thread pointer option: -mtp=%s", target_thread_switch);
1265    }
1266
1267  /* Use the cp15 method if it is available.  */
1268  if (target_thread_pointer == TP_AUTO)
1269    {
1270      if (arm_arch6k && !TARGET_THUMB)
1271	target_thread_pointer = TP_CP15;
1272      else
1273	target_thread_pointer = TP_SOFT;
1274    }
1275
1276  if (TARGET_HARD_TP && TARGET_THUMB)
1277    error ("can not use -mtp=cp15 with -mthumb");
1278
1279  /* Override the default structure alignment for AAPCS ABI.  */
1280  if (TARGET_AAPCS_BASED)
1281    arm_structure_size_boundary = 8;
1282
1283  if (structure_size_string != NULL)
1284    {
1285      int size = strtol (structure_size_string, NULL, 0);
1286
1287      if (size == 8 || size == 32
1288	  || (ARM_DOUBLEWORD_ALIGN && size == 64))
1289	arm_structure_size_boundary = size;
1290      else
1291	warning (0, "structure size boundary can only be set to %s",
1292		 ARM_DOUBLEWORD_ALIGN ? "8, 32 or 64": "8 or 32");
1293    }
1294
1295  if (arm_pic_register_string != NULL)
1296    {
1297      int pic_register = decode_reg_name (arm_pic_register_string);
1298
1299      if (!flag_pic)
1300	warning (0, "-mpic-register= is useless without -fpic");
1301
1302      /* Prevent the user from choosing an obviously stupid PIC register.  */
1303      else if (pic_register < 0 || call_used_regs[pic_register]
1304	       || pic_register == HARD_FRAME_POINTER_REGNUM
1305	       || pic_register == STACK_POINTER_REGNUM
1306	       || pic_register >= PC_REGNUM)
1307	error ("unable to use '%s' for PIC register", arm_pic_register_string);
1308      else
1309	arm_pic_register = pic_register;
1310    }
1311
1312  if (TARGET_THUMB && flag_schedule_insns)
1313    {
1314      /* Don't warn since it's on by default in -O2.  */
1315      flag_schedule_insns = 0;
1316    }
1317
1318  if (optimize_size)
1319    {
1320      arm_constant_limit = 1;
1321
1322      /* If optimizing for size, bump the number of instructions that we
1323         are prepared to conditionally execute (even on a StrongARM).  */
1324      max_insns_skipped = 6;
1325    }
1326  else
1327    {
1328      /* For processors with load scheduling, it never costs more than
1329         2 cycles to load a constant, and the load scheduler may well
1330	 reduce that to 1.  */
1331      if (arm_ld_sched)
1332        arm_constant_limit = 1;
1333
1334      /* On XScale the longer latency of a load makes it more difficult
1335         to achieve a good schedule, so it's faster to synthesize
1336	 constants that can be done in two insns.  */
1337      if (arm_tune_xscale)
1338        arm_constant_limit = 2;
1339
1340      /* StrongARM has early execution of branches, so a sequence
1341         that is worth skipping is shorter.  */
1342      if (arm_tune_strongarm)
1343        max_insns_skipped = 3;
1344    }
1345
1346  /* Register global variables with the garbage collector.  */
1347  arm_add_gc_roots ();
1348}
1349
1350static void
1351arm_add_gc_roots (void)
1352{
1353  gcc_obstack_init(&minipool_obstack);
1354  minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
1355}
1356
1357/* A table of known ARM exception types.
1358   For use with the interrupt function attribute.  */
1359
1360typedef struct
1361{
1362  const char *const arg;
1363  const unsigned long return_value;
1364}
1365isr_attribute_arg;
1366
1367static const isr_attribute_arg isr_attribute_args [] =
1368{
1369  { "IRQ",   ARM_FT_ISR },
1370  { "irq",   ARM_FT_ISR },
1371  { "FIQ",   ARM_FT_FIQ },
1372  { "fiq",   ARM_FT_FIQ },
1373  { "ABORT", ARM_FT_ISR },
1374  { "abort", ARM_FT_ISR },
1375  { "ABORT", ARM_FT_ISR },
1376  { "abort", ARM_FT_ISR },
1377  { "UNDEF", ARM_FT_EXCEPTION },
1378  { "undef", ARM_FT_EXCEPTION },
1379  { "SWI",   ARM_FT_EXCEPTION },
1380  { "swi",   ARM_FT_EXCEPTION },
1381  { NULL,    ARM_FT_NORMAL }
1382};
1383
1384/* Returns the (interrupt) function type of the current
1385   function, or ARM_FT_UNKNOWN if the type cannot be determined.  */
1386
1387static unsigned long
1388arm_isr_value (tree argument)
1389{
1390  const isr_attribute_arg * ptr;
1391  const char *              arg;
1392
1393  /* No argument - default to IRQ.  */
1394  if (argument == NULL_TREE)
1395    return ARM_FT_ISR;
1396
1397  /* Get the value of the argument.  */
1398  if (TREE_VALUE (argument) == NULL_TREE
1399      || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
1400    return ARM_FT_UNKNOWN;
1401
1402  arg = TREE_STRING_POINTER (TREE_VALUE (argument));
1403
1404  /* Check it against the list of known arguments.  */
1405  for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
1406    if (streq (arg, ptr->arg))
1407      return ptr->return_value;
1408
1409  /* An unrecognized interrupt type.  */
1410  return ARM_FT_UNKNOWN;
1411}
1412
1413/* Computes the type of the current function.  */
1414
1415static unsigned long
1416arm_compute_func_type (void)
1417{
1418  unsigned long type = ARM_FT_UNKNOWN;
1419  tree a;
1420  tree attr;
1421
1422  gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
1423
1424  /* Decide if the current function is volatile.  Such functions
1425     never return, and many memory cycles can be saved by not storing
1426     register values that will never be needed again.  This optimization
1427     was added to speed up context switching in a kernel application.  */
1428  if (optimize > 0
1429      && (TREE_NOTHROW (current_function_decl)
1430          || !(flag_unwind_tables
1431               || (flag_exceptions && !USING_SJLJ_EXCEPTIONS)))
1432      && TREE_THIS_VOLATILE (current_function_decl))
1433    type |= ARM_FT_VOLATILE;
1434
1435  if (cfun->static_chain_decl != NULL)
1436    type |= ARM_FT_NESTED;
1437
1438  attr = DECL_ATTRIBUTES (current_function_decl);
1439
1440  a = lookup_attribute ("naked", attr);
1441  if (a != NULL_TREE)
1442    type |= ARM_FT_NAKED;
1443
1444  a = lookup_attribute ("isr", attr);
1445  if (a == NULL_TREE)
1446    a = lookup_attribute ("interrupt", attr);
1447
1448  if (a == NULL_TREE)
1449    type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
1450  else
1451    type |= arm_isr_value (TREE_VALUE (a));
1452
1453  return type;
1454}
1455
1456/* Returns the type of the current function.  */
1457
1458unsigned long
1459arm_current_func_type (void)
1460{
1461  if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
1462    cfun->machine->func_type = arm_compute_func_type ();
1463
1464  return cfun->machine->func_type;
1465}
1466
1467/* Return 1 if it is possible to return using a single instruction.
1468   If SIBLING is non-null, this is a test for a return before a sibling
1469   call.  SIBLING is the call insn, so we can examine its register usage.  */
1470
1471int
1472use_return_insn (int iscond, rtx sibling)
1473{
1474  int regno;
1475  unsigned int func_type;
1476  unsigned long saved_int_regs;
1477  unsigned HOST_WIDE_INT stack_adjust;
1478  arm_stack_offsets *offsets;
1479
1480  /* Never use a return instruction before reload has run.  */
1481  if (!reload_completed)
1482    return 0;
1483
1484  func_type = arm_current_func_type ();
1485
1486  /* Naked functions and volatile functions need special
1487     consideration.  */
1488  if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED))
1489    return 0;
1490
1491  /* So do interrupt functions that use the frame pointer.  */
1492  if (IS_INTERRUPT (func_type) && frame_pointer_needed)
1493    return 0;
1494
1495  offsets = arm_get_frame_offsets ();
1496  stack_adjust = offsets->outgoing_args - offsets->saved_regs;
1497
1498  /* As do variadic functions.  */
1499  if (current_function_pretend_args_size
1500      || cfun->machine->uses_anonymous_args
1501      /* Or if the function calls __builtin_eh_return () */
1502      || current_function_calls_eh_return
1503      /* Or if the function calls alloca */
1504      || current_function_calls_alloca
1505      /* Or if there is a stack adjustment.  However, if the stack pointer
1506	 is saved on the stack, we can use a pre-incrementing stack load.  */
1507      || !(stack_adjust == 0 || (frame_pointer_needed && stack_adjust == 4)))
1508    return 0;
1509
1510  saved_int_regs = arm_compute_save_reg_mask ();
1511
1512  /* Unfortunately, the insn
1513
1514       ldmib sp, {..., sp, ...}
1515
1516     triggers a bug on most SA-110 based devices, such that the stack
1517     pointer won't be correctly restored if the instruction takes a
1518     page fault.  We work around this problem by popping r3 along with
1519     the other registers, since that is never slower than executing
1520     another instruction.
1521
1522     We test for !arm_arch5 here, because code for any architecture
1523     less than this could potentially be run on one of the buggy
1524     chips.  */
1525  if (stack_adjust == 4 && !arm_arch5)
1526    {
1527      /* Validate that r3 is a call-clobbered register (always true in
1528	 the default abi) ...  */
1529      if (!call_used_regs[3])
1530	return 0;
1531
1532      /* ... that it isn't being used for a return value ... */
1533      if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
1534	return 0;
1535
1536      /* ... or for a tail-call argument ...  */
1537      if (sibling)
1538	{
1539	  gcc_assert (GET_CODE (sibling) == CALL_INSN);
1540
1541	  if (find_regno_fusage (sibling, USE, 3))
1542	    return 0;
1543	}
1544
1545      /* ... and that there are no call-saved registers in r0-r2
1546	 (always true in the default ABI).  */
1547      if (saved_int_regs & 0x7)
1548	return 0;
1549    }
1550
1551  /* Can't be done if interworking with Thumb, and any registers have been
1552     stacked.  */
1553  if (TARGET_INTERWORK && saved_int_regs != 0)
1554    return 0;
1555
1556  /* On StrongARM, conditional returns are expensive if they aren't
1557     taken and multiple registers have been stacked.  */
1558  if (iscond && arm_tune_strongarm)
1559    {
1560      /* Conditional return when just the LR is stored is a simple
1561	 conditional-load instruction, that's not expensive.  */
1562      if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
1563	return 0;
1564
1565      if (flag_pic
1566	  && arm_pic_register != INVALID_REGNUM
1567	  && regs_ever_live[PIC_OFFSET_TABLE_REGNUM])
1568	return 0;
1569    }
1570
1571  /* If there are saved registers but the LR isn't saved, then we need
1572     two instructions for the return.  */
1573  if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
1574    return 0;
1575
1576  /* Can't be done if any of the FPA regs are pushed,
1577     since this also requires an insn.  */
1578  if (TARGET_HARD_FLOAT && TARGET_FPA)
1579    for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
1580      if (regs_ever_live[regno] && !call_used_regs[regno])
1581	return 0;
1582
1583  /* Likewise VFP regs.  */
1584  if (TARGET_HARD_FLOAT && TARGET_VFP)
1585    for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
1586      if (regs_ever_live[regno] && !call_used_regs[regno])
1587	return 0;
1588
1589  if (TARGET_REALLY_IWMMXT)
1590    for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
1591      if (regs_ever_live[regno] && ! call_used_regs [regno])
1592	return 0;
1593
1594  return 1;
1595}
1596
1597/* Return TRUE if int I is a valid immediate ARM constant.  */
1598
1599int
1600const_ok_for_arm (HOST_WIDE_INT i)
1601{
1602  int lowbit;
1603
1604  /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
1605     be all zero, or all one.  */
1606  if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
1607      && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
1608	  != ((~(unsigned HOST_WIDE_INT) 0)
1609	      & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
1610    return FALSE;
1611
1612  i &= (unsigned HOST_WIDE_INT) 0xffffffff;
1613
1614  /* Fast return for 0 and small values.  We must do this for zero, since
1615     the code below can't handle that one case.  */
1616  if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
1617    return TRUE;
1618
1619  /* Get the number of trailing zeros, rounded down to the nearest even
1620     number.  */
1621  lowbit = (ffs ((int) i) - 1) & ~1;
1622
1623  if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
1624    return TRUE;
1625  else if (lowbit <= 4
1626	   && ((i & ~0xc000003f) == 0
1627	       || (i & ~0xf000000f) == 0
1628	       || (i & ~0xfc000003) == 0))
1629    return TRUE;
1630
1631  return FALSE;
1632}
1633
1634/* Return true if I is a valid constant for the operation CODE.  */
1635static int
1636const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
1637{
1638  if (const_ok_for_arm (i))
1639    return 1;
1640
1641  switch (code)
1642    {
1643    case PLUS:
1644      return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
1645
1646    case MINUS:		/* Should only occur with (MINUS I reg) => rsb */
1647    case XOR:
1648    case IOR:
1649      return 0;
1650
1651    case AND:
1652      return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
1653
1654    default:
1655      gcc_unreachable ();
1656    }
1657}
1658
1659/* Emit a sequence of insns to handle a large constant.
1660   CODE is the code of the operation required, it can be any of SET, PLUS,
1661   IOR, AND, XOR, MINUS;
1662   MODE is the mode in which the operation is being performed;
1663   VAL is the integer to operate on;
1664   SOURCE is the other operand (a register, or a null-pointer for SET);
1665   SUBTARGETS means it is safe to create scratch registers if that will
1666   either produce a simpler sequence, or we will want to cse the values.
1667   Return value is the number of insns emitted.  */
1668
1669int
1670arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn,
1671		    HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
1672{
1673  rtx cond;
1674
1675  if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
1676    cond = COND_EXEC_TEST (PATTERN (insn));
1677  else
1678    cond = NULL_RTX;
1679
1680  if (subtargets || code == SET
1681      || (GET_CODE (target) == REG && GET_CODE (source) == REG
1682	  && REGNO (target) != REGNO (source)))
1683    {
1684      /* After arm_reorg has been called, we can't fix up expensive
1685	 constants by pushing them into memory so we must synthesize
1686	 them in-line, regardless of the cost.  This is only likely to
1687	 be more costly on chips that have load delay slots and we are
1688	 compiling without running the scheduler (so no splitting
1689	 occurred before the final instruction emission).
1690
1691	 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
1692      */
1693      if (!after_arm_reorg
1694	  && !cond
1695	  && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
1696				1, 0)
1697	      > arm_constant_limit + (code != SET)))
1698	{
1699	  if (code == SET)
1700	    {
1701	      /* Currently SET is the only monadic value for CODE, all
1702		 the rest are diadic.  */
1703	      emit_set_insn (target, GEN_INT (val));
1704	      return 1;
1705	    }
1706	  else
1707	    {
1708	      rtx temp = subtargets ? gen_reg_rtx (mode) : target;
1709
1710	      emit_set_insn (temp, GEN_INT (val));
1711	      /* For MINUS, the value is subtracted from, since we never
1712		 have subtraction of a constant.  */
1713	      if (code == MINUS)
1714		emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
1715	      else
1716		emit_set_insn (target,
1717			       gen_rtx_fmt_ee (code, mode, source, temp));
1718	      return 2;
1719	    }
1720	}
1721    }
1722
1723  return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
1724			   1);
1725}
1726
1727static int
1728count_insns_for_constant (HOST_WIDE_INT remainder, int i)
1729{
1730  HOST_WIDE_INT temp1;
1731  int num_insns = 0;
1732  do
1733    {
1734      int end;
1735
1736      if (i <= 0)
1737	i += 32;
1738      if (remainder & (3 << (i - 2)))
1739	{
1740	  end = i - 8;
1741	  if (end < 0)
1742	    end += 32;
1743	  temp1 = remainder & ((0x0ff << end)
1744				    | ((i < end) ? (0xff >> (32 - end)) : 0));
1745	  remainder &= ~temp1;
1746	  num_insns++;
1747	  i -= 6;
1748	}
1749      i -= 2;
1750    } while (remainder);
1751  return num_insns;
1752}
1753
1754/* Emit an instruction with the indicated PATTERN.  If COND is
1755   non-NULL, conditionalize the execution of the instruction on COND
1756   being true.  */
1757
1758static void
1759emit_constant_insn (rtx cond, rtx pattern)
1760{
1761  if (cond)
1762    pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
1763  emit_insn (pattern);
1764}
1765
1766/* As above, but extra parameter GENERATE which, if clear, suppresses
1767   RTL generation.  */
1768
1769static int
1770arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
1771		  HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
1772		  int generate)
1773{
1774  int can_invert = 0;
1775  int can_negate = 0;
1776  int can_negate_initial = 0;
1777  int can_shift = 0;
1778  int i;
1779  int num_bits_set = 0;
1780  int set_sign_bit_copies = 0;
1781  int clear_sign_bit_copies = 0;
1782  int clear_zero_bit_copies = 0;
1783  int set_zero_bit_copies = 0;
1784  int insns = 0;
1785  unsigned HOST_WIDE_INT temp1, temp2;
1786  unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
1787
1788  /* Find out which operations are safe for a given CODE.  Also do a quick
1789     check for degenerate cases; these can occur when DImode operations
1790     are split.  */
1791  switch (code)
1792    {
1793    case SET:
1794      can_invert = 1;
1795      can_shift = 1;
1796      can_negate = 1;
1797      break;
1798
1799    case PLUS:
1800      can_negate = 1;
1801      can_negate_initial = 1;
1802      break;
1803
1804    case IOR:
1805      if (remainder == 0xffffffff)
1806	{
1807	  if (generate)
1808	    emit_constant_insn (cond,
1809				gen_rtx_SET (VOIDmode, target,
1810					     GEN_INT (ARM_SIGN_EXTEND (val))));
1811	  return 1;
1812	}
1813      if (remainder == 0)
1814	{
1815	  if (reload_completed && rtx_equal_p (target, source))
1816	    return 0;
1817	  if (generate)
1818	    emit_constant_insn (cond,
1819				gen_rtx_SET (VOIDmode, target, source));
1820	  return 1;
1821	}
1822      break;
1823
1824    case AND:
1825      if (remainder == 0)
1826	{
1827	  if (generate)
1828	    emit_constant_insn (cond,
1829				gen_rtx_SET (VOIDmode, target, const0_rtx));
1830	  return 1;
1831	}
1832      if (remainder == 0xffffffff)
1833	{
1834	  if (reload_completed && rtx_equal_p (target, source))
1835	    return 0;
1836	  if (generate)
1837	    emit_constant_insn (cond,
1838				gen_rtx_SET (VOIDmode, target, source));
1839	  return 1;
1840	}
1841      can_invert = 1;
1842      break;
1843
1844    case XOR:
1845      if (remainder == 0)
1846	{
1847	  if (reload_completed && rtx_equal_p (target, source))
1848	    return 0;
1849	  if (generate)
1850	    emit_constant_insn (cond,
1851				gen_rtx_SET (VOIDmode, target, source));
1852	  return 1;
1853	}
1854
1855      /* We don't know how to handle other cases yet.  */
1856      gcc_assert (remainder == 0xffffffff);
1857
1858      if (generate)
1859	emit_constant_insn (cond,
1860			    gen_rtx_SET (VOIDmode, target,
1861					 gen_rtx_NOT (mode, source)));
1862      return 1;
1863
1864    case MINUS:
1865      /* We treat MINUS as (val - source), since (source - val) is always
1866	 passed as (source + (-val)).  */
1867      if (remainder == 0)
1868	{
1869	  if (generate)
1870	    emit_constant_insn (cond,
1871				gen_rtx_SET (VOIDmode, target,
1872					     gen_rtx_NEG (mode, source)));
1873	  return 1;
1874	}
1875      if (const_ok_for_arm (val))
1876	{
1877	  if (generate)
1878	    emit_constant_insn (cond,
1879				gen_rtx_SET (VOIDmode, target,
1880					     gen_rtx_MINUS (mode, GEN_INT (val),
1881							    source)));
1882	  return 1;
1883	}
1884      can_negate = 1;
1885
1886      break;
1887
1888    default:
1889      gcc_unreachable ();
1890    }
1891
1892  /* If we can do it in one insn get out quickly.  */
1893  if (const_ok_for_arm (val)
1894      || (can_negate_initial && const_ok_for_arm (-val))
1895      || (can_invert && const_ok_for_arm (~val)))
1896    {
1897      if (generate)
1898	emit_constant_insn (cond,
1899			    gen_rtx_SET (VOIDmode, target,
1900					 (source
1901					  ? gen_rtx_fmt_ee (code, mode, source,
1902							    GEN_INT (val))
1903					  : GEN_INT (val))));
1904      return 1;
1905    }
1906
1907  /* Calculate a few attributes that may be useful for specific
1908     optimizations.  */
1909  for (i = 31; i >= 0; i--)
1910    {
1911      if ((remainder & (1 << i)) == 0)
1912	clear_sign_bit_copies++;
1913      else
1914	break;
1915    }
1916
1917  for (i = 31; i >= 0; i--)
1918    {
1919      if ((remainder & (1 << i)) != 0)
1920	set_sign_bit_copies++;
1921      else
1922	break;
1923    }
1924
1925  for (i = 0; i <= 31; i++)
1926    {
1927      if ((remainder & (1 << i)) == 0)
1928	clear_zero_bit_copies++;
1929      else
1930	break;
1931    }
1932
1933  for (i = 0; i <= 31; i++)
1934    {
1935      if ((remainder & (1 << i)) != 0)
1936	set_zero_bit_copies++;
1937      else
1938	break;
1939    }
1940
1941  switch (code)
1942    {
1943    case SET:
1944      /* See if we can do this by sign_extending a constant that is known
1945	 to be negative.  This is a good, way of doing it, since the shift
1946	 may well merge into a subsequent insn.  */
1947      if (set_sign_bit_copies > 1)
1948	{
1949	  if (const_ok_for_arm
1950	      (temp1 = ARM_SIGN_EXTEND (remainder
1951					<< (set_sign_bit_copies - 1))))
1952	    {
1953	      if (generate)
1954		{
1955		  rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
1956		  emit_constant_insn (cond,
1957				      gen_rtx_SET (VOIDmode, new_src,
1958						   GEN_INT (temp1)));
1959		  emit_constant_insn (cond,
1960				      gen_ashrsi3 (target, new_src,
1961						   GEN_INT (set_sign_bit_copies - 1)));
1962		}
1963	      return 2;
1964	    }
1965	  /* For an inverted constant, we will need to set the low bits,
1966	     these will be shifted out of harm's way.  */
1967	  temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
1968	  if (const_ok_for_arm (~temp1))
1969	    {
1970	      if (generate)
1971		{
1972		  rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
1973		  emit_constant_insn (cond,
1974				      gen_rtx_SET (VOIDmode, new_src,
1975						   GEN_INT (temp1)));
1976		  emit_constant_insn (cond,
1977				      gen_ashrsi3 (target, new_src,
1978						   GEN_INT (set_sign_bit_copies - 1)));
1979		}
1980	      return 2;
1981	    }
1982	}
1983
1984      /* See if we can calculate the value as the difference between two
1985	 valid immediates.  */
1986      if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
1987	{
1988	  int topshift = clear_sign_bit_copies & ~1;
1989
1990	  temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
1991				   & (0xff000000 >> topshift));
1992
1993	  /* If temp1 is zero, then that means the 9 most significant
1994	     bits of remainder were 1 and we've caused it to overflow.
1995	     When topshift is 0 we don't need to do anything since we
1996	     can borrow from 'bit 32'.  */
1997	  if (temp1 == 0 && topshift != 0)
1998	    temp1 = 0x80000000 >> (topshift - 1);
1999
2000	  temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
2001
2002	  if (const_ok_for_arm (temp2))
2003	    {
2004	      if (generate)
2005		{
2006		  rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2007		  emit_constant_insn (cond,
2008				      gen_rtx_SET (VOIDmode, new_src,
2009						   GEN_INT (temp1)));
2010		  emit_constant_insn (cond,
2011				      gen_addsi3 (target, new_src,
2012						  GEN_INT (-temp2)));
2013		}
2014
2015	      return 2;
2016	    }
2017	}
2018
2019      /* See if we can generate this by setting the bottom (or the top)
2020	 16 bits, and then shifting these into the other half of the
2021	 word.  We only look for the simplest cases, to do more would cost
2022	 too much.  Be careful, however, not to generate this when the
2023	 alternative would take fewer insns.  */
2024      if (val & 0xffff0000)
2025	{
2026	  temp1 = remainder & 0xffff0000;
2027	  temp2 = remainder & 0x0000ffff;
2028
2029	  /* Overlaps outside this range are best done using other methods.  */
2030	  for (i = 9; i < 24; i++)
2031	    {
2032	      if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
2033		  && !const_ok_for_arm (temp2))
2034		{
2035		  rtx new_src = (subtargets
2036				 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
2037				 : target);
2038		  insns = arm_gen_constant (code, mode, cond, temp2, new_src,
2039					    source, subtargets, generate);
2040		  source = new_src;
2041		  if (generate)
2042		    emit_constant_insn
2043		      (cond,
2044		       gen_rtx_SET
2045		       (VOIDmode, target,
2046			gen_rtx_IOR (mode,
2047				     gen_rtx_ASHIFT (mode, source,
2048						     GEN_INT (i)),
2049				     source)));
2050		  return insns + 1;
2051		}
2052	    }
2053
2054	  /* Don't duplicate cases already considered.  */
2055	  for (i = 17; i < 24; i++)
2056	    {
2057	      if (((temp1 | (temp1 >> i)) == remainder)
2058		  && !const_ok_for_arm (temp1))
2059		{
2060		  rtx new_src = (subtargets
2061				 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
2062				 : target);
2063		  insns = arm_gen_constant (code, mode, cond, temp1, new_src,
2064					    source, subtargets, generate);
2065		  source = new_src;
2066		  if (generate)
2067		    emit_constant_insn
2068		      (cond,
2069		       gen_rtx_SET (VOIDmode, target,
2070				    gen_rtx_IOR
2071				    (mode,
2072				     gen_rtx_LSHIFTRT (mode, source,
2073						       GEN_INT (i)),
2074				     source)));
2075		  return insns + 1;
2076		}
2077	    }
2078	}
2079      break;
2080
2081    case IOR:
2082    case XOR:
2083      /* If we have IOR or XOR, and the constant can be loaded in a
2084	 single instruction, and we can find a temporary to put it in,
2085	 then this can be done in two instructions instead of 3-4.  */
2086      if (subtargets
2087	  /* TARGET can't be NULL if SUBTARGETS is 0 */
2088	  || (reload_completed && !reg_mentioned_p (target, source)))
2089	{
2090	  if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
2091	    {
2092	      if (generate)
2093		{
2094		  rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2095
2096		  emit_constant_insn (cond,
2097				      gen_rtx_SET (VOIDmode, sub,
2098						   GEN_INT (val)));
2099		  emit_constant_insn (cond,
2100				      gen_rtx_SET (VOIDmode, target,
2101						   gen_rtx_fmt_ee (code, mode,
2102								   source, sub)));
2103		}
2104	      return 2;
2105	    }
2106	}
2107
2108      if (code == XOR)
2109	break;
2110
2111      if (set_sign_bit_copies > 8
2112	  && (val & (-1 << (32 - set_sign_bit_copies))) == val)
2113	{
2114	  if (generate)
2115	    {
2116	      rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2117	      rtx shift = GEN_INT (set_sign_bit_copies);
2118
2119	      emit_constant_insn
2120		(cond,
2121		 gen_rtx_SET (VOIDmode, sub,
2122			      gen_rtx_NOT (mode,
2123					   gen_rtx_ASHIFT (mode,
2124							   source,
2125							   shift))));
2126	      emit_constant_insn
2127		(cond,
2128		 gen_rtx_SET (VOIDmode, target,
2129			      gen_rtx_NOT (mode,
2130					   gen_rtx_LSHIFTRT (mode, sub,
2131							     shift))));
2132	    }
2133	  return 2;
2134	}
2135
2136      if (set_zero_bit_copies > 8
2137	  && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
2138	{
2139	  if (generate)
2140	    {
2141	      rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2142	      rtx shift = GEN_INT (set_zero_bit_copies);
2143
2144	      emit_constant_insn
2145		(cond,
2146		 gen_rtx_SET (VOIDmode, sub,
2147			      gen_rtx_NOT (mode,
2148					   gen_rtx_LSHIFTRT (mode,
2149							     source,
2150							     shift))));
2151	      emit_constant_insn
2152		(cond,
2153		 gen_rtx_SET (VOIDmode, target,
2154			      gen_rtx_NOT (mode,
2155					   gen_rtx_ASHIFT (mode, sub,
2156							   shift))));
2157	    }
2158	  return 2;
2159	}
2160
2161      if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
2162	{
2163	  if (generate)
2164	    {
2165	      rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2166	      emit_constant_insn (cond,
2167				  gen_rtx_SET (VOIDmode, sub,
2168					       gen_rtx_NOT (mode, source)));
2169	      source = sub;
2170	      if (subtargets)
2171		sub = gen_reg_rtx (mode);
2172	      emit_constant_insn (cond,
2173				  gen_rtx_SET (VOIDmode, sub,
2174					       gen_rtx_AND (mode, source,
2175							    GEN_INT (temp1))));
2176	      emit_constant_insn (cond,
2177				  gen_rtx_SET (VOIDmode, target,
2178					       gen_rtx_NOT (mode, sub)));
2179	    }
2180	  return 3;
2181	}
2182      break;
2183
2184    case AND:
2185      /* See if two shifts will do 2 or more insn's worth of work.  */
2186      if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
2187	{
2188	  HOST_WIDE_INT shift_mask = ((0xffffffff
2189				       << (32 - clear_sign_bit_copies))
2190				      & 0xffffffff);
2191
2192	  if ((remainder | shift_mask) != 0xffffffff)
2193	    {
2194	      if (generate)
2195		{
2196		  rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2197		  insns = arm_gen_constant (AND, mode, cond,
2198					    remainder | shift_mask,
2199					    new_src, source, subtargets, 1);
2200		  source = new_src;
2201		}
2202	      else
2203		{
2204		  rtx targ = subtargets ? NULL_RTX : target;
2205		  insns = arm_gen_constant (AND, mode, cond,
2206					    remainder | shift_mask,
2207					    targ, source, subtargets, 0);
2208		}
2209	    }
2210
2211	  if (generate)
2212	    {
2213	      rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2214	      rtx shift = GEN_INT (clear_sign_bit_copies);
2215
2216	      emit_insn (gen_ashlsi3 (new_src, source, shift));
2217	      emit_insn (gen_lshrsi3 (target, new_src, shift));
2218	    }
2219
2220	  return insns + 2;
2221	}
2222
2223      if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
2224	{
2225	  HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
2226
2227	  if ((remainder | shift_mask) != 0xffffffff)
2228	    {
2229	      if (generate)
2230		{
2231		  rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2232
2233		  insns = arm_gen_constant (AND, mode, cond,
2234					    remainder | shift_mask,
2235					    new_src, source, subtargets, 1);
2236		  source = new_src;
2237		}
2238	      else
2239		{
2240		  rtx targ = subtargets ? NULL_RTX : target;
2241
2242		  insns = arm_gen_constant (AND, mode, cond,
2243					    remainder | shift_mask,
2244					    targ, source, subtargets, 0);
2245		}
2246	    }
2247
2248	  if (generate)
2249	    {
2250	      rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2251	      rtx shift = GEN_INT (clear_zero_bit_copies);
2252
2253	      emit_insn (gen_lshrsi3 (new_src, source, shift));
2254	      emit_insn (gen_ashlsi3 (target, new_src, shift));
2255	    }
2256
2257	  return insns + 2;
2258	}
2259
2260      break;
2261
2262    default:
2263      break;
2264    }
2265
2266  for (i = 0; i < 32; i++)
2267    if (remainder & (1 << i))
2268      num_bits_set++;
2269
2270  if (code == AND || (can_invert && num_bits_set > 16))
2271    remainder = (~remainder) & 0xffffffff;
2272  else if (code == PLUS && num_bits_set > 16)
2273    remainder = (-remainder) & 0xffffffff;
2274  else
2275    {
2276      can_invert = 0;
2277      can_negate = 0;
2278    }
2279
2280  /* Now try and find a way of doing the job in either two or three
2281     instructions.
2282     We start by looking for the largest block of zeros that are aligned on
2283     a 2-bit boundary, we then fill up the temps, wrapping around to the
2284     top of the word when we drop off the bottom.
2285     In the worst case this code should produce no more than four insns.  */
2286  {
2287    int best_start = 0;
2288    int best_consecutive_zeros = 0;
2289
2290    for (i = 0; i < 32; i += 2)
2291      {
2292	int consecutive_zeros = 0;
2293
2294	if (!(remainder & (3 << i)))
2295	  {
2296	    while ((i < 32) && !(remainder & (3 << i)))
2297	      {
2298		consecutive_zeros += 2;
2299		i += 2;
2300	      }
2301	    if (consecutive_zeros > best_consecutive_zeros)
2302	      {
2303		best_consecutive_zeros = consecutive_zeros;
2304		best_start = i - consecutive_zeros;
2305	      }
2306	    i -= 2;
2307	  }
2308      }
2309
2310    /* So long as it won't require any more insns to do so, it's
2311       desirable to emit a small constant (in bits 0...9) in the last
2312       insn.  This way there is more chance that it can be combined with
2313       a later addressing insn to form a pre-indexed load or store
2314       operation.  Consider:
2315
2316	       *((volatile int *)0xe0000100) = 1;
2317	       *((volatile int *)0xe0000110) = 2;
2318
2319       We want this to wind up as:
2320
2321		mov rA, #0xe0000000
2322		mov rB, #1
2323		str rB, [rA, #0x100]
2324		mov rB, #2
2325		str rB, [rA, #0x110]
2326
2327       rather than having to synthesize both large constants from scratch.
2328
2329       Therefore, we calculate how many insns would be required to emit
2330       the constant starting from `best_start', and also starting from
2331       zero (i.e. with bit 31 first to be output).  If `best_start' doesn't
2332       yield a shorter sequence, we may as well use zero.  */
2333    if (best_start != 0
2334	&& ((((unsigned HOST_WIDE_INT) 1) << best_start) < remainder)
2335	&& (count_insns_for_constant (remainder, 0) <=
2336	    count_insns_for_constant (remainder, best_start)))
2337      best_start = 0;
2338
2339    /* Now start emitting the insns.  */
2340    i = best_start;
2341    do
2342      {
2343	int end;
2344
2345	if (i <= 0)
2346	  i += 32;
2347	if (remainder & (3 << (i - 2)))
2348	  {
2349	    end = i - 8;
2350	    if (end < 0)
2351	      end += 32;
2352	    temp1 = remainder & ((0x0ff << end)
2353				 | ((i < end) ? (0xff >> (32 - end)) : 0));
2354	    remainder &= ~temp1;
2355
2356	    if (generate)
2357	      {
2358		rtx new_src, temp1_rtx;
2359
2360		if (code == SET || code == MINUS)
2361		  {
2362		    new_src = (subtargets ? gen_reg_rtx (mode) : target);
2363		    if (can_invert && code != MINUS)
2364		      temp1 = ~temp1;
2365		  }
2366		else
2367		  {
2368		    if (remainder && subtargets)
2369		      new_src = gen_reg_rtx (mode);
2370		    else
2371		      new_src = target;
2372		    if (can_invert)
2373		      temp1 = ~temp1;
2374		    else if (can_negate)
2375		      temp1 = -temp1;
2376		  }
2377
2378		temp1 = trunc_int_for_mode (temp1, mode);
2379		temp1_rtx = GEN_INT (temp1);
2380
2381		if (code == SET)
2382		  ;
2383		else if (code == MINUS)
2384		  temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
2385		else
2386		  temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
2387
2388		emit_constant_insn (cond,
2389				    gen_rtx_SET (VOIDmode, new_src,
2390						 temp1_rtx));
2391		source = new_src;
2392	      }
2393
2394	    if (code == SET)
2395	      {
2396		can_invert = 0;
2397		code = PLUS;
2398	      }
2399	    else if (code == MINUS)
2400	      code = PLUS;
2401
2402	    insns++;
2403	    i -= 6;
2404	  }
2405	i -= 2;
2406      }
2407    while (remainder);
2408  }
2409
2410  return insns;
2411}
2412
2413/* Canonicalize a comparison so that we are more likely to recognize it.
2414   This can be done for a few constant compares, where we can make the
2415   immediate value easier to load.  */
2416
2417enum rtx_code
2418arm_canonicalize_comparison (enum rtx_code code, enum machine_mode mode,
2419			     rtx * op1)
2420{
2421  unsigned HOST_WIDE_INT i = INTVAL (*op1);
2422  unsigned HOST_WIDE_INT maxval;
2423  maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
2424
2425  switch (code)
2426    {
2427    case EQ:
2428    case NE:
2429      return code;
2430
2431    case GT:
2432    case LE:
2433      if (i != maxval
2434	  && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
2435	{
2436	  *op1 = GEN_INT (i + 1);
2437	  return code == GT ? GE : LT;
2438	}
2439      break;
2440
2441    case GE:
2442    case LT:
2443      if (i != ~maxval
2444	  && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
2445	{
2446	  *op1 = GEN_INT (i - 1);
2447	  return code == GE ? GT : LE;
2448	}
2449      break;
2450
2451    case GTU:
2452    case LEU:
2453      if (i != ~((unsigned HOST_WIDE_INT) 0)
2454	  && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
2455	{
2456	  *op1 = GEN_INT (i + 1);
2457	  return code == GTU ? GEU : LTU;
2458	}
2459      break;
2460
2461    case GEU:
2462    case LTU:
2463      if (i != 0
2464	  && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
2465	{
2466	  *op1 = GEN_INT (i - 1);
2467	  return code == GEU ? GTU : LEU;
2468	}
2469      break;
2470
2471    default:
2472      gcc_unreachable ();
2473    }
2474
2475  return code;
2476}
2477
2478
2479/* Define how to find the value returned by a function.  */
2480
2481rtx
2482arm_function_value(tree type, tree func ATTRIBUTE_UNUSED)
2483{
2484  enum machine_mode mode;
2485  int unsignedp ATTRIBUTE_UNUSED;
2486  rtx r ATTRIBUTE_UNUSED;
2487
2488  mode = TYPE_MODE (type);
2489  /* Promote integer types.  */
2490  if (INTEGRAL_TYPE_P (type))
2491    PROMOTE_FUNCTION_MODE (mode, unsignedp, type);
2492
2493  /* Promotes small structs returned in a register to full-word size
2494     for big-endian AAPCS.  */
2495  if (arm_return_in_msb (type))
2496    {
2497      HOST_WIDE_INT size = int_size_in_bytes (type);
2498      if (size % UNITS_PER_WORD != 0)
2499	{
2500	  size += UNITS_PER_WORD - size % UNITS_PER_WORD;
2501	  mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
2502	}
2503    }
2504
2505  return LIBCALL_VALUE(mode);
2506}
2507
2508/* Determine the amount of memory needed to store the possible return
2509   registers of an untyped call.  */
2510int
2511arm_apply_result_size (void)
2512{
2513  int size = 16;
2514
2515  if (TARGET_ARM)
2516    {
2517      if (TARGET_HARD_FLOAT_ABI)
2518	{
2519	  if (TARGET_FPA)
2520	    size += 12;
2521	  if (TARGET_MAVERICK)
2522	    size += 8;
2523	}
2524      if (TARGET_IWMMXT_ABI)
2525	size += 8;
2526    }
2527
2528  return size;
2529}
2530
2531/* Decide whether a type should be returned in memory (true)
2532   or in a register (false).  This is called by the macro
2533   RETURN_IN_MEMORY.  */
2534int
2535arm_return_in_memory (tree type)
2536{
2537  HOST_WIDE_INT size;
2538
2539  if (!AGGREGATE_TYPE_P (type) &&
2540      (TREE_CODE (type) != VECTOR_TYPE) &&
2541      !(TARGET_AAPCS_BASED && TREE_CODE (type) == COMPLEX_TYPE))
2542    /* All simple types are returned in registers.
2543       For AAPCS, complex types are treated the same as aggregates.  */
2544    return 0;
2545
2546  size = int_size_in_bytes (type);
2547
2548  if (arm_abi != ARM_ABI_APCS)
2549    {
2550      /* ATPCS and later return aggregate types in memory only if they are
2551	 larger than a word (or are variable size).  */
2552      return (size < 0 || size > UNITS_PER_WORD);
2553    }
2554
2555  /* To maximize backwards compatibility with previous versions of gcc,
2556     return vectors up to 4 words in registers.  */
2557  if (TREE_CODE (type) == VECTOR_TYPE)
2558    return (size < 0 || size > (4 * UNITS_PER_WORD));
2559
2560  /* For the arm-wince targets we choose to be compatible with Microsoft's
2561     ARM and Thumb compilers, which always return aggregates in memory.  */
2562#ifndef ARM_WINCE
2563  /* All structures/unions bigger than one word are returned in memory.
2564     Also catch the case where int_size_in_bytes returns -1.  In this case
2565     the aggregate is either huge or of variable size, and in either case
2566     we will want to return it via memory and not in a register.  */
2567  if (size < 0 || size > UNITS_PER_WORD)
2568    return 1;
2569
2570  if (TREE_CODE (type) == RECORD_TYPE)
2571    {
2572      tree field;
2573
2574      /* For a struct the APCS says that we only return in a register
2575	 if the type is 'integer like' and every addressable element
2576	 has an offset of zero.  For practical purposes this means
2577	 that the structure can have at most one non bit-field element
2578	 and that this element must be the first one in the structure.  */
2579
2580      /* Find the first field, ignoring non FIELD_DECL things which will
2581	 have been created by C++.  */
2582      for (field = TYPE_FIELDS (type);
2583	   field && TREE_CODE (field) != FIELD_DECL;
2584	   field = TREE_CHAIN (field))
2585	continue;
2586
2587      if (field == NULL)
2588	return 0; /* An empty structure.  Allowed by an extension to ANSI C.  */
2589
2590      /* Check that the first field is valid for returning in a register.  */
2591
2592      /* ... Floats are not allowed */
2593      if (FLOAT_TYPE_P (TREE_TYPE (field)))
2594	return 1;
2595
2596      /* ... Aggregates that are not themselves valid for returning in
2597	 a register are not allowed.  */
2598      if (RETURN_IN_MEMORY (TREE_TYPE (field)))
2599	return 1;
2600
2601      /* Now check the remaining fields, if any.  Only bitfields are allowed,
2602	 since they are not addressable.  */
2603      for (field = TREE_CHAIN (field);
2604	   field;
2605	   field = TREE_CHAIN (field))
2606	{
2607	  if (TREE_CODE (field) != FIELD_DECL)
2608	    continue;
2609
2610	  if (!DECL_BIT_FIELD_TYPE (field))
2611	    return 1;
2612	}
2613
2614      return 0;
2615    }
2616
2617  if (TREE_CODE (type) == UNION_TYPE)
2618    {
2619      tree field;
2620
2621      /* Unions can be returned in registers if every element is
2622	 integral, or can be returned in an integer register.  */
2623      for (field = TYPE_FIELDS (type);
2624	   field;
2625	   field = TREE_CHAIN (field))
2626	{
2627	  if (TREE_CODE (field) != FIELD_DECL)
2628	    continue;
2629
2630	  if (FLOAT_TYPE_P (TREE_TYPE (field)))
2631	    return 1;
2632
2633	  if (RETURN_IN_MEMORY (TREE_TYPE (field)))
2634	    return 1;
2635	}
2636
2637      return 0;
2638    }
2639#endif /* not ARM_WINCE */
2640
2641  /* Return all other types in memory.  */
2642  return 1;
2643}
2644
2645/* Indicate whether or not words of a double are in big-endian order.  */
2646
2647int
2648arm_float_words_big_endian (void)
2649{
2650  if (TARGET_MAVERICK)
2651    return 0;
2652
2653  /* For FPA, float words are always big-endian.  For VFP, floats words
2654     follow the memory system mode.  */
2655
2656  if (TARGET_FPA)
2657    {
2658      return 1;
2659    }
2660
2661  if (TARGET_VFP)
2662    return (TARGET_BIG_END ? 1 : 0);
2663
2664  return 1;
2665}
2666
2667/* Initialize a variable CUM of type CUMULATIVE_ARGS
2668   for a call to a function whose data type is FNTYPE.
2669   For a library call, FNTYPE is NULL.  */
2670void
2671arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
2672			  rtx libname  ATTRIBUTE_UNUSED,
2673			  tree fndecl ATTRIBUTE_UNUSED)
2674{
2675  /* On the ARM, the offset starts at 0.  */
2676  pcum->nregs = 0;
2677  pcum->iwmmxt_nregs = 0;
2678  pcum->can_split = true;
2679
2680  pcum->call_cookie = CALL_NORMAL;
2681
2682  if (TARGET_LONG_CALLS)
2683    pcum->call_cookie = CALL_LONG;
2684
2685  /* Check for long call/short call attributes.  The attributes
2686     override any command line option.  */
2687  if (fntype)
2688    {
2689      if (lookup_attribute ("short_call", TYPE_ATTRIBUTES (fntype)))
2690	pcum->call_cookie = CALL_SHORT;
2691      else if (lookup_attribute ("long_call", TYPE_ATTRIBUTES (fntype)))
2692	pcum->call_cookie = CALL_LONG;
2693    }
2694
2695  /* Varargs vectors are treated the same as long long.
2696     named_count avoids having to change the way arm handles 'named' */
2697  pcum->named_count = 0;
2698  pcum->nargs = 0;
2699
2700  if (TARGET_REALLY_IWMMXT && fntype)
2701    {
2702      tree fn_arg;
2703
2704      for (fn_arg = TYPE_ARG_TYPES (fntype);
2705	   fn_arg;
2706	   fn_arg = TREE_CHAIN (fn_arg))
2707	pcum->named_count += 1;
2708
2709      if (! pcum->named_count)
2710	pcum->named_count = INT_MAX;
2711    }
2712}
2713
2714
2715/* Return true if mode/type need doubleword alignment.  */
2716bool
2717arm_needs_doubleword_align (enum machine_mode mode, tree type)
2718{
2719  return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
2720	  || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
2721}
2722
2723
2724/* Determine where to put an argument to a function.
2725   Value is zero to push the argument on the stack,
2726   or a hard register in which to store the argument.
2727
2728   MODE is the argument's machine mode.
2729   TYPE is the data type of the argument (as a tree).
2730    This is null for libcalls where that information may
2731    not be available.
2732   CUM is a variable of type CUMULATIVE_ARGS which gives info about
2733    the preceding args and about the function being called.
2734   NAMED is nonzero if this argument is a named parameter
2735    (otherwise it is an extra parameter matching an ellipsis).  */
2736
2737rtx
2738arm_function_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
2739		  tree type, int named)
2740{
2741  int nregs;
2742
2743  /* Varargs vectors are treated the same as long long.
2744     named_count avoids having to change the way arm handles 'named' */
2745  if (TARGET_IWMMXT_ABI
2746      && arm_vector_mode_supported_p (mode)
2747      && pcum->named_count > pcum->nargs + 1)
2748    {
2749      if (pcum->iwmmxt_nregs <= 9)
2750	return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
2751      else
2752	{
2753	  pcum->can_split = false;
2754	  return NULL_RTX;
2755	}
2756    }
2757
2758  /* Put doubleword aligned quantities in even register pairs.  */
2759  if (pcum->nregs & 1
2760      && ARM_DOUBLEWORD_ALIGN
2761      && arm_needs_doubleword_align (mode, type))
2762    pcum->nregs++;
2763
2764  if (mode == VOIDmode)
2765    /* Compute operand 2 of the call insn.  */
2766    return GEN_INT (pcum->call_cookie);
2767
2768  /* Only allow splitting an arg between regs and memory if all preceding
2769     args were allocated to regs.  For args passed by reference we only count
2770     the reference pointer.  */
2771  if (pcum->can_split)
2772    nregs = 1;
2773  else
2774    nregs = ARM_NUM_REGS2 (mode, type);
2775
2776  if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
2777    return NULL_RTX;
2778
2779  return gen_rtx_REG (mode, pcum->nregs);
2780}
2781
2782static int
2783arm_arg_partial_bytes (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
2784		       tree type, bool named ATTRIBUTE_UNUSED)
2785{
2786  int nregs = pcum->nregs;
2787
2788  if (arm_vector_mode_supported_p (mode))
2789    return 0;
2790
2791  if (NUM_ARG_REGS > nregs
2792      && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
2793      && pcum->can_split)
2794    return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
2795
2796  return 0;
2797}
2798
2799/* Variable sized types are passed by reference.  This is a GCC
2800   extension to the ARM ABI.  */
2801
2802static bool
2803arm_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
2804		       enum machine_mode mode ATTRIBUTE_UNUSED,
2805		       tree type, bool named ATTRIBUTE_UNUSED)
2806{
2807  return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
2808}
2809
2810/* Encode the current state of the #pragma [no_]long_calls.  */
2811typedef enum
2812{
2813  OFF,		/* No #pragma [no_]long_calls is in effect.  */
2814  LONG,		/* #pragma long_calls is in effect.  */
2815  SHORT		/* #pragma no_long_calls is in effect.  */
2816} arm_pragma_enum;
2817
2818static arm_pragma_enum arm_pragma_long_calls = OFF;
2819
2820void
2821arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
2822{
2823  arm_pragma_long_calls = LONG;
2824}
2825
2826void
2827arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
2828{
2829  arm_pragma_long_calls = SHORT;
2830}
2831
2832void
2833arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
2834{
2835  arm_pragma_long_calls = OFF;
2836}
2837
2838/* Table of machine attributes.  */
2839const struct attribute_spec arm_attribute_table[] =
2840{
2841  /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
2842  /* Function calls made to this symbol must be done indirectly, because
2843     it may lie outside of the 26 bit addressing range of a normal function
2844     call.  */
2845  { "long_call",    0, 0, false, true,  true,  NULL },
2846  /* Whereas these functions are always known to reside within the 26 bit
2847     addressing range.  */
2848  { "short_call",   0, 0, false, true,  true,  NULL },
2849  /* Interrupt Service Routines have special prologue and epilogue requirements.  */
2850  { "isr",          0, 1, false, false, false, arm_handle_isr_attribute },
2851  { "interrupt",    0, 1, false, false, false, arm_handle_isr_attribute },
2852  { "naked",        0, 0, true,  false, false, arm_handle_fndecl_attribute },
2853#ifdef ARM_PE
2854  /* ARM/PE has three new attributes:
2855     interfacearm - ?
2856     dllexport - for exporting a function/variable that will live in a dll
2857     dllimport - for importing a function/variable from a dll
2858
2859     Microsoft allows multiple declspecs in one __declspec, separating
2860     them with spaces.  We do NOT support this.  Instead, use __declspec
2861     multiple times.
2862  */
2863  { "dllimport",    0, 0, true,  false, false, NULL },
2864  { "dllexport",    0, 0, true,  false, false, NULL },
2865  { "interfacearm", 0, 0, true,  false, false, arm_handle_fndecl_attribute },
2866#elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
2867  { "dllimport",    0, 0, false, false, false, handle_dll_attribute },
2868  { "dllexport",    0, 0, false, false, false, handle_dll_attribute },
2869  { "notshared",    0, 0, false, true, false, arm_handle_notshared_attribute },
2870#endif
2871  { NULL,           0, 0, false, false, false, NULL }
2872};
2873
2874/* Handle an attribute requiring a FUNCTION_DECL;
2875   arguments as in struct attribute_spec.handler.  */
2876static tree
2877arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
2878			     int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
2879{
2880  if (TREE_CODE (*node) != FUNCTION_DECL)
2881    {
2882      warning (OPT_Wattributes, "%qs attribute only applies to functions",
2883	       IDENTIFIER_POINTER (name));
2884      *no_add_attrs = true;
2885    }
2886
2887  return NULL_TREE;
2888}
2889
2890/* Handle an "interrupt" or "isr" attribute;
2891   arguments as in struct attribute_spec.handler.  */
2892static tree
2893arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
2894			  bool *no_add_attrs)
2895{
2896  if (DECL_P (*node))
2897    {
2898      if (TREE_CODE (*node) != FUNCTION_DECL)
2899	{
2900	  warning (OPT_Wattributes, "%qs attribute only applies to functions",
2901		   IDENTIFIER_POINTER (name));
2902	  *no_add_attrs = true;
2903	}
2904      /* FIXME: the argument if any is checked for type attributes;
2905	 should it be checked for decl ones?  */
2906    }
2907  else
2908    {
2909      if (TREE_CODE (*node) == FUNCTION_TYPE
2910	  || TREE_CODE (*node) == METHOD_TYPE)
2911	{
2912	  if (arm_isr_value (args) == ARM_FT_UNKNOWN)
2913	    {
2914	      warning (OPT_Wattributes, "%qs attribute ignored",
2915		       IDENTIFIER_POINTER (name));
2916	      *no_add_attrs = true;
2917	    }
2918	}
2919      else if (TREE_CODE (*node) == POINTER_TYPE
2920	       && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
2921		   || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
2922	       && arm_isr_value (args) != ARM_FT_UNKNOWN)
2923	{
2924	  *node = build_variant_type_copy (*node);
2925	  TREE_TYPE (*node) = build_type_attribute_variant
2926	    (TREE_TYPE (*node),
2927	     tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
2928	  *no_add_attrs = true;
2929	}
2930      else
2931	{
2932	  /* Possibly pass this attribute on from the type to a decl.  */
2933	  if (flags & ((int) ATTR_FLAG_DECL_NEXT
2934		       | (int) ATTR_FLAG_FUNCTION_NEXT
2935		       | (int) ATTR_FLAG_ARRAY_NEXT))
2936	    {
2937	      *no_add_attrs = true;
2938	      return tree_cons (name, args, NULL_TREE);
2939	    }
2940	  else
2941	    {
2942	      warning (OPT_Wattributes, "%qs attribute ignored",
2943		       IDENTIFIER_POINTER (name));
2944	    }
2945	}
2946    }
2947
2948  return NULL_TREE;
2949}
2950
2951#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2952/* Handle the "notshared" attribute.  This attribute is another way of
2953   requesting hidden visibility.  ARM's compiler supports
2954   "__declspec(notshared)"; we support the same thing via an
2955   attribute.  */
2956
2957static tree
2958arm_handle_notshared_attribute (tree *node,
2959				tree name ATTRIBUTE_UNUSED,
2960				tree args ATTRIBUTE_UNUSED,
2961				int flags ATTRIBUTE_UNUSED,
2962				bool *no_add_attrs)
2963{
2964  tree decl = TYPE_NAME (*node);
2965
2966  if (decl)
2967    {
2968      DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
2969      DECL_VISIBILITY_SPECIFIED (decl) = 1;
2970      *no_add_attrs = false;
2971    }
2972  return NULL_TREE;
2973}
2974#endif
2975
2976/* Return 0 if the attributes for two types are incompatible, 1 if they
2977   are compatible, and 2 if they are nearly compatible (which causes a
2978   warning to be generated).  */
2979static int
2980arm_comp_type_attributes (tree type1, tree type2)
2981{
2982  int l1, l2, s1, s2;
2983
2984  /* Check for mismatch of non-default calling convention.  */
2985  if (TREE_CODE (type1) != FUNCTION_TYPE)
2986    return 1;
2987
2988  /* Check for mismatched call attributes.  */
2989  l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
2990  l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
2991  s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
2992  s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
2993
2994  /* Only bother to check if an attribute is defined.  */
2995  if (l1 | l2 | s1 | s2)
2996    {
2997      /* If one type has an attribute, the other must have the same attribute.  */
2998      if ((l1 != l2) || (s1 != s2))
2999	return 0;
3000
3001      /* Disallow mixed attributes.  */
3002      if ((l1 & s2) || (l2 & s1))
3003	return 0;
3004    }
3005
3006  /* Check for mismatched ISR attribute.  */
3007  l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
3008  if (! l1)
3009    l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
3010  l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
3011  if (! l2)
3012    l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
3013  if (l1 != l2)
3014    return 0;
3015
3016  return 1;
3017}
3018
3019/*  Encode long_call or short_call attribute by prefixing
3020    symbol name in DECL with a special character FLAG.  */
3021void
3022arm_encode_call_attribute (tree decl, int flag)
3023{
3024  const char * str = XSTR (XEXP (DECL_RTL (decl), 0), 0);
3025  int          len = strlen (str);
3026  char *       newstr;
3027
3028  /* Do not allow weak functions to be treated as short call.  */
3029  if (DECL_WEAK (decl) && flag == SHORT_CALL_FLAG_CHAR)
3030    return;
3031
3032  newstr = alloca (len + 2);
3033  newstr[0] = flag;
3034  strcpy (newstr + 1, str);
3035
3036  newstr = (char *) ggc_alloc_string (newstr, len + 1);
3037  XSTR (XEXP (DECL_RTL (decl), 0), 0) = newstr;
3038}
3039
3040/*  Assigns default attributes to newly defined type.  This is used to
3041    set short_call/long_call attributes for function types of
3042    functions defined inside corresponding #pragma scopes.  */
3043static void
3044arm_set_default_type_attributes (tree type)
3045{
3046  /* Add __attribute__ ((long_call)) to all functions, when
3047     inside #pragma long_calls or __attribute__ ((short_call)),
3048     when inside #pragma no_long_calls.  */
3049  if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
3050    {
3051      tree type_attr_list, attr_name;
3052      type_attr_list = TYPE_ATTRIBUTES (type);
3053
3054      if (arm_pragma_long_calls == LONG)
3055 	attr_name = get_identifier ("long_call");
3056      else if (arm_pragma_long_calls == SHORT)
3057 	attr_name = get_identifier ("short_call");
3058      else
3059 	return;
3060
3061      type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
3062      TYPE_ATTRIBUTES (type) = type_attr_list;
3063    }
3064}
3065
3066/* Return 1 if the operand is a SYMBOL_REF for a function known to be
3067   defined within the current compilation unit.  If this cannot be
3068   determined, then 0 is returned.  */
3069static int
3070current_file_function_operand (rtx sym_ref)
3071{
3072  /* This is a bit of a fib.  A function will have a short call flag
3073     applied to its name if it has the short call attribute, or it has
3074     already been defined within the current compilation unit.  */
3075  if (ENCODED_SHORT_CALL_ATTR_P (XSTR (sym_ref, 0)))
3076    return 1;
3077
3078  /* The current function is always defined within the current compilation
3079     unit.  If it s a weak definition however, then this may not be the real
3080     definition of the function, and so we have to say no.  */
3081  if (sym_ref == XEXP (DECL_RTL (current_function_decl), 0)
3082      && !DECL_WEAK (current_function_decl))
3083    return 1;
3084
3085  /* We cannot make the determination - default to returning 0.  */
3086  return 0;
3087}
3088
3089/* Return nonzero if a 32 bit "long_call" should be generated for
3090   this call.  We generate a long_call if the function:
3091
3092        a.  has an __attribute__((long call))
3093     or b.  is within the scope of a #pragma long_calls
3094     or c.  the -mlong-calls command line switch has been specified
3095         .  and either:
3096                1. -ffunction-sections is in effect
3097	     or 2. the current function has __attribute__ ((section))
3098	     or 3. the target function has __attribute__ ((section))
3099
3100   However we do not generate a long call if the function:
3101
3102        d.  has an __attribute__ ((short_call))
3103     or e.  is inside the scope of a #pragma no_long_calls
3104     or f.  is defined within the current compilation unit.
3105
3106   This function will be called by C fragments contained in the machine
3107   description file.  SYM_REF and CALL_COOKIE correspond to the matched
3108   rtl operands.  CALL_SYMBOL is used to distinguish between
3109   two different callers of the function.  It is set to 1 in the
3110   "call_symbol" and "call_symbol_value" patterns and to 0 in the "call"
3111   and "call_value" patterns.  This is because of the difference in the
3112   SYM_REFs passed by these patterns.  */
3113int
3114arm_is_longcall_p (rtx sym_ref, int call_cookie, int call_symbol)
3115{
3116  if (!call_symbol)
3117    {
3118      if (GET_CODE (sym_ref) != MEM)
3119	return 0;
3120
3121      sym_ref = XEXP (sym_ref, 0);
3122    }
3123
3124  if (GET_CODE (sym_ref) != SYMBOL_REF)
3125    return 0;
3126
3127  if (call_cookie & CALL_SHORT)
3128    return 0;
3129
3130  if (TARGET_LONG_CALLS)
3131    {
3132      if (flag_function_sections
3133	  || DECL_SECTION_NAME (current_function_decl))
3134	/* c.3 is handled by the definition of the
3135	   ARM_DECLARE_FUNCTION_SIZE macro.  */
3136	return 1;
3137    }
3138
3139  if (current_file_function_operand (sym_ref))
3140    return 0;
3141
3142  return (call_cookie & CALL_LONG)
3143    || ENCODED_LONG_CALL_ATTR_P (XSTR (sym_ref, 0))
3144    || TARGET_LONG_CALLS;
3145}
3146
3147/* Return nonzero if it is ok to make a tail-call to DECL.  */
3148static bool
3149arm_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
3150{
3151  int call_type = TARGET_LONG_CALLS ? CALL_LONG : CALL_NORMAL;
3152
3153  if (cfun->machine->sibcall_blocked)
3154    return false;
3155
3156  /* Never tailcall something for which we have no decl, or if we
3157     are in Thumb mode.  */
3158  if (decl == NULL || TARGET_THUMB)
3159    return false;
3160
3161  /* Get the calling method.  */
3162  if (lookup_attribute ("short_call", TYPE_ATTRIBUTES (TREE_TYPE (decl))))
3163    call_type = CALL_SHORT;
3164  else if (lookup_attribute ("long_call", TYPE_ATTRIBUTES (TREE_TYPE (decl))))
3165    call_type = CALL_LONG;
3166
3167  /* Cannot tail-call to long calls, since these are out of range of
3168     a branch instruction.  However, if not compiling PIC, we know
3169     we can reach the symbol if it is in this compilation unit.  */
3170  if (call_type == CALL_LONG && (flag_pic || !TREE_ASM_WRITTEN (decl)))
3171    return false;
3172
3173  /* If we are interworking and the function is not declared static
3174     then we can't tail-call it unless we know that it exists in this
3175     compilation unit (since it might be a Thumb routine).  */
3176  if (TARGET_INTERWORK && TREE_PUBLIC (decl) && !TREE_ASM_WRITTEN (decl))
3177    return false;
3178
3179  /* Never tailcall from an ISR routine - it needs a special exit sequence.  */
3180  if (IS_INTERRUPT (arm_current_func_type ()))
3181    return false;
3182
3183  /* Everything else is ok.  */
3184  return true;
3185}
3186
3187
3188/* Addressing mode support functions.  */
3189
3190/* Return nonzero if X is a legitimate immediate operand when compiling
3191   for PIC.  We know that X satisfies CONSTANT_P and flag_pic is true.  */
3192int
3193legitimate_pic_operand_p (rtx x)
3194{
3195  if (GET_CODE (x) == SYMBOL_REF
3196      || (GET_CODE (x) == CONST
3197	  && GET_CODE (XEXP (x, 0)) == PLUS
3198	  && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
3199    return 0;
3200
3201  return 1;
3202}
3203
3204rtx
3205legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
3206{
3207  if (GET_CODE (orig) == SYMBOL_REF
3208      || GET_CODE (orig) == LABEL_REF)
3209    {
3210#ifndef AOF_ASSEMBLER
3211      rtx pic_ref, address;
3212#endif
3213      rtx insn;
3214      int subregs = 0;
3215
3216      /* If this function doesn't have a pic register, create one now.
3217	 A lot of the logic here is made obscure by the fact that this
3218	 routine gets called as part of the rtx cost estimation
3219	 process.  We don't want those calls to affect any assumptions
3220	 about the real function; and further, we can't call
3221	 entry_of_function() until we start the real expansion
3222	 process.  */
3223      if (!current_function_uses_pic_offset_table)
3224	{
3225	  gcc_assert (!no_new_pseudos);
3226	  if (arm_pic_register != INVALID_REGNUM)
3227	    {
3228	      if (!cfun->machine->pic_reg)
3229		cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
3230
3231	      /* Play games to avoid marking the function as needing pic
3232		 if we are being called as part of the cost-estimation
3233		 process.  */
3234	      if (!ir_type())
3235		current_function_uses_pic_offset_table = 1;
3236	    }
3237	  else
3238	    {
3239	      rtx seq;
3240
3241	      if (!cfun->machine->pic_reg)
3242		  cfun->machine->pic_reg = gen_reg_rtx (Pmode);
3243
3244	      /* Play games to avoid marking the function as needing pic
3245		 if we are being called as part of the cost-estimation
3246		 process.  */
3247	      if (!ir_type())
3248		{
3249		  current_function_uses_pic_offset_table = 1;
3250		  start_sequence ();
3251
3252		  arm_load_pic_register (0UL);
3253
3254		  seq = get_insns ();
3255		  end_sequence ();
3256		  emit_insn_after (seq, entry_of_function ());
3257		}
3258	    }
3259	}
3260
3261      if (reg == 0)
3262	{
3263	  gcc_assert (!no_new_pseudos);
3264	  reg = gen_reg_rtx (Pmode);
3265
3266	  subregs = 1;
3267	}
3268
3269#ifdef AOF_ASSEMBLER
3270      /* The AOF assembler can generate relocations for these directly, and
3271	 understands that the PIC register has to be added into the offset.  */
3272      insn = emit_insn (gen_pic_load_addr_based (reg, orig));
3273#else
3274      if (subregs)
3275	address = gen_reg_rtx (Pmode);
3276      else
3277	address = reg;
3278
3279      if (TARGET_ARM)
3280	emit_insn (gen_pic_load_addr_arm (address, orig));
3281      else
3282	emit_insn (gen_pic_load_addr_thumb (address, orig));
3283
3284      if ((GET_CODE (orig) == LABEL_REF
3285	   || (GET_CODE (orig) == SYMBOL_REF &&
3286	       SYMBOL_REF_LOCAL_P (orig)))
3287	  && NEED_GOT_RELOC)
3288	pic_ref = gen_rtx_PLUS (Pmode, cfun->machine->pic_reg, address);
3289      else
3290	{
3291	  pic_ref = gen_const_mem (Pmode,
3292				   gen_rtx_PLUS (Pmode, cfun->machine->pic_reg,
3293					         address));
3294	}
3295
3296      insn = emit_move_insn (reg, pic_ref);
3297#endif
3298      /* Put a REG_EQUAL note on this insn, so that it can be optimized
3299	 by loop.  */
3300      REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_EQUAL, orig,
3301					    REG_NOTES (insn));
3302      return reg;
3303    }
3304  else if (GET_CODE (orig) == CONST)
3305    {
3306      rtx base, offset;
3307
3308      if (GET_CODE (XEXP (orig, 0)) == PLUS
3309	  && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
3310	return orig;
3311
3312      if (GET_CODE (XEXP (orig, 0)) == UNSPEC
3313	  && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
3314	return orig;
3315
3316      if (reg == 0)
3317	{
3318	  gcc_assert (!no_new_pseudos);
3319	  reg = gen_reg_rtx (Pmode);
3320	}
3321
3322      gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
3323
3324      base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
3325      offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
3326				       base == reg ? 0 : reg);
3327
3328      if (GET_CODE (offset) == CONST_INT)
3329	{
3330	  /* The base register doesn't really matter, we only want to
3331	     test the index for the appropriate mode.  */
3332	  if (!arm_legitimate_index_p (mode, offset, SET, 0))
3333	    {
3334	      gcc_assert (!no_new_pseudos);
3335	      offset = force_reg (Pmode, offset);
3336	    }
3337
3338	  if (GET_CODE (offset) == CONST_INT)
3339	    return plus_constant (base, INTVAL (offset));
3340	}
3341
3342      if (GET_MODE_SIZE (mode) > 4
3343	  && (GET_MODE_CLASS (mode) == MODE_INT
3344	      || TARGET_SOFT_FLOAT))
3345	{
3346	  emit_insn (gen_addsi3 (reg, base, offset));
3347	  return reg;
3348	}
3349
3350      return gen_rtx_PLUS (Pmode, base, offset);
3351    }
3352
3353  return orig;
3354}
3355
3356
3357/* Find a spare low register to use during the prolog of a function.  */
3358
3359static int
3360thumb_find_work_register (unsigned long pushed_regs_mask)
3361{
3362  int reg;
3363
3364  /* Check the argument registers first as these are call-used.  The
3365     register allocation order means that sometimes r3 might be used
3366     but earlier argument registers might not, so check them all.  */
3367  for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
3368    if (!regs_ever_live[reg])
3369      return reg;
3370
3371  /* Before going on to check the call-saved registers we can try a couple
3372     more ways of deducing that r3 is available.  The first is when we are
3373     pushing anonymous arguments onto the stack and we have less than 4
3374     registers worth of fixed arguments(*).  In this case r3 will be part of
3375     the variable argument list and so we can be sure that it will be
3376     pushed right at the start of the function.  Hence it will be available
3377     for the rest of the prologue.
3378     (*): ie current_function_pretend_args_size is greater than 0.  */
3379  if (cfun->machine->uses_anonymous_args
3380      && current_function_pretend_args_size > 0)
3381    return LAST_ARG_REGNUM;
3382
3383  /* The other case is when we have fixed arguments but less than 4 registers
3384     worth.  In this case r3 might be used in the body of the function, but
3385     it is not being used to convey an argument into the function.  In theory
3386     we could just check current_function_args_size to see how many bytes are
3387     being passed in argument registers, but it seems that it is unreliable.
3388     Sometimes it will have the value 0 when in fact arguments are being
3389     passed.  (See testcase execute/20021111-1.c for an example).  So we also
3390     check the args_info.nregs field as well.  The problem with this field is
3391     that it makes no allowances for arguments that are passed to the
3392     function but which are not used.  Hence we could miss an opportunity
3393     when a function has an unused argument in r3.  But it is better to be
3394     safe than to be sorry.  */
3395  if (! cfun->machine->uses_anonymous_args
3396      && current_function_args_size >= 0
3397      && current_function_args_size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
3398      && cfun->args_info.nregs < 4)
3399    return LAST_ARG_REGNUM;
3400
3401  /* Otherwise look for a call-saved register that is going to be pushed.  */
3402  for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
3403    if (pushed_regs_mask & (1 << reg))
3404      return reg;
3405
3406  /* Something went wrong - thumb_compute_save_reg_mask()
3407     should have arranged for a suitable register to be pushed.  */
3408  gcc_unreachable ();
3409}
3410
3411static GTY(()) int pic_labelno;
3412
3413/* Generate code to load the PIC register.  In thumb mode SCRATCH is a
3414   low register.  */
3415
3416void
3417arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
3418{
3419#ifndef AOF_ASSEMBLER
3420  rtx l1, labelno, pic_tmp, pic_tmp2, pic_rtx;
3421  rtx global_offset_table;
3422
3423  if (current_function_uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
3424    return;
3425
3426  gcc_assert (flag_pic);
3427
3428  /* We use an UNSPEC rather than a LABEL_REF because this label never appears
3429     in the code stream.  */
3430
3431  labelno = GEN_INT (pic_labelno++);
3432  l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
3433  l1 = gen_rtx_CONST (VOIDmode, l1);
3434
3435  global_offset_table = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
3436  /* On the ARM the PC register contains 'dot + 8' at the time of the
3437     addition, on the Thumb it is 'dot + 4'.  */
3438  pic_tmp = plus_constant (l1, TARGET_ARM ? 8 : 4);
3439  if (GOT_PCREL)
3440    pic_tmp2 = gen_rtx_CONST (VOIDmode,
3441			    gen_rtx_PLUS (Pmode, global_offset_table, pc_rtx));
3442  else
3443    pic_tmp2 = gen_rtx_CONST (VOIDmode, global_offset_table);
3444
3445  pic_rtx = gen_rtx_CONST (Pmode, gen_rtx_MINUS (Pmode, pic_tmp2, pic_tmp));
3446
3447  if (TARGET_ARM)
3448    {
3449      emit_insn (gen_pic_load_addr_arm (cfun->machine->pic_reg, pic_rtx));
3450      emit_insn (gen_pic_add_dot_plus_eight (cfun->machine->pic_reg,
3451					     cfun->machine->pic_reg, labelno));
3452    }
3453  else
3454    {
3455      if (arm_pic_register != INVALID_REGNUM
3456	  && REGNO (cfun->machine->pic_reg) > LAST_LO_REGNUM)
3457	{
3458	  /* We will have pushed the pic register, so we should always be
3459	     able to find a work register.  */
3460	  pic_tmp = gen_rtx_REG (SImode,
3461				 thumb_find_work_register (saved_regs));
3462	  emit_insn (gen_pic_load_addr_thumb (pic_tmp, pic_rtx));
3463	  emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
3464	}
3465      else
3466	emit_insn (gen_pic_load_addr_thumb (cfun->machine->pic_reg, pic_rtx));
3467      emit_insn (gen_pic_add_dot_plus_four (cfun->machine->pic_reg,
3468					    cfun->machine->pic_reg, labelno));
3469    }
3470
3471  /* Need to emit this whether or not we obey regdecls,
3472     since setjmp/longjmp can cause life info to screw up.  */
3473  emit_insn (gen_rtx_USE (VOIDmode, cfun->machine->pic_reg));
3474#endif /* AOF_ASSEMBLER */
3475}
3476
3477
3478/* Return nonzero if X is valid as an ARM state addressing register.  */
3479static int
3480arm_address_register_rtx_p (rtx x, int strict_p)
3481{
3482  int regno;
3483
3484  if (GET_CODE (x) != REG)
3485    return 0;
3486
3487  regno = REGNO (x);
3488
3489  if (strict_p)
3490    return ARM_REGNO_OK_FOR_BASE_P (regno);
3491
3492  return (regno <= LAST_ARM_REGNUM
3493	  || regno >= FIRST_PSEUDO_REGISTER
3494	  || regno == FRAME_POINTER_REGNUM
3495	  || regno == ARG_POINTER_REGNUM);
3496}
3497
3498/* Return TRUE if this rtx is the difference of a symbol and a label,
3499   and will reduce to a PC-relative relocation in the object file.
3500   Expressions like this can be left alone when generating PIC, rather
3501   than forced through the GOT.  */
3502static int
3503pcrel_constant_p (rtx x)
3504{
3505  if (GET_CODE (x) == MINUS)
3506    return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
3507
3508  return FALSE;
3509}
3510
3511/* Return nonzero if X is a valid ARM state address operand.  */
3512int
3513arm_legitimate_address_p (enum machine_mode mode, rtx x, RTX_CODE outer,
3514			  int strict_p)
3515{
3516  bool use_ldrd;
3517  enum rtx_code code = GET_CODE (x);
3518
3519  if (arm_address_register_rtx_p (x, strict_p))
3520    return 1;
3521
3522  use_ldrd = (TARGET_LDRD
3523	      && (mode == DImode
3524		  || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
3525
3526  if (code == POST_INC || code == PRE_DEC
3527      || ((code == PRE_INC || code == POST_DEC)
3528	  && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
3529    return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
3530
3531  else if ((code == POST_MODIFY || code == PRE_MODIFY)
3532	   && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
3533	   && GET_CODE (XEXP (x, 1)) == PLUS
3534	   && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
3535    {
3536      rtx addend = XEXP (XEXP (x, 1), 1);
3537
3538      /* Don't allow ldrd post increment by register because it's hard
3539	 to fixup invalid register choices.  */
3540      if (use_ldrd
3541	  && GET_CODE (x) == POST_MODIFY
3542	  && GET_CODE (addend) == REG)
3543	return 0;
3544
3545      return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
3546	      && arm_legitimate_index_p (mode, addend, outer, strict_p));
3547    }
3548
3549  /* After reload constants split into minipools will have addresses
3550     from a LABEL_REF.  */
3551  else if (reload_completed
3552	   && (code == LABEL_REF
3553	       || (code == CONST
3554		   && GET_CODE (XEXP (x, 0)) == PLUS
3555		   && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
3556		   && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
3557    return 1;
3558
3559  else if (mode == TImode)
3560    return 0;
3561
3562  else if (code == PLUS)
3563    {
3564      rtx xop0 = XEXP (x, 0);
3565      rtx xop1 = XEXP (x, 1);
3566
3567      return ((arm_address_register_rtx_p (xop0, strict_p)
3568	       && arm_legitimate_index_p (mode, xop1, outer, strict_p))
3569	      || (arm_address_register_rtx_p (xop1, strict_p)
3570		  && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
3571    }
3572
3573#if 0
3574  /* Reload currently can't handle MINUS, so disable this for now */
3575  else if (GET_CODE (x) == MINUS)
3576    {
3577      rtx xop0 = XEXP (x, 0);
3578      rtx xop1 = XEXP (x, 1);
3579
3580      return (arm_address_register_rtx_p (xop0, strict_p)
3581	      && arm_legitimate_index_p (mode, xop1, outer, strict_p));
3582    }
3583#endif
3584
3585  else if (GET_MODE_CLASS (mode) != MODE_FLOAT
3586	   && code == SYMBOL_REF
3587	   && CONSTANT_POOL_ADDRESS_P (x)
3588	   && ! (flag_pic
3589		 && symbol_mentioned_p (get_pool_constant (x))
3590		 && ! pcrel_constant_p (get_pool_constant (x))))
3591    return 1;
3592
3593  return 0;
3594}
3595
3596/* Return nonzero if INDEX is valid for an address index operand in
3597   ARM state.  */
3598static int
3599arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer,
3600			int strict_p)
3601{
3602  HOST_WIDE_INT range;
3603  enum rtx_code code = GET_CODE (index);
3604
3605  /* Standard coprocessor addressing modes.  */
3606  if (TARGET_HARD_FLOAT
3607      && (TARGET_FPA || TARGET_MAVERICK)
3608      && (GET_MODE_CLASS (mode) == MODE_FLOAT
3609	  || (TARGET_MAVERICK && mode == DImode)))
3610    return (code == CONST_INT && INTVAL (index) < 1024
3611	    && INTVAL (index) > -1024
3612	    && (INTVAL (index) & 3) == 0);
3613
3614  if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
3615    {
3616      /* For DImode assume values will usually live in core regs
3617	 and only allow LDRD addressing modes.  */
3618      if (!TARGET_LDRD || mode != DImode)
3619	return (code == CONST_INT
3620		&& INTVAL (index) < 1024
3621		&& INTVAL (index) > -1024
3622		&& (INTVAL (index) & 3) == 0);
3623    }
3624
3625  if (arm_address_register_rtx_p (index, strict_p)
3626      && (GET_MODE_SIZE (mode) <= 4))
3627    return 1;
3628
3629  if (mode == DImode || mode == DFmode)
3630    {
3631      if (code == CONST_INT)
3632	{
3633	  HOST_WIDE_INT val = INTVAL (index);
3634
3635	  if (TARGET_LDRD)
3636	    return val > -256 && val < 256;
3637	  else
3638	    return val > -4096 && val < 4092;
3639	}
3640
3641      return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
3642    }
3643
3644  if (GET_MODE_SIZE (mode) <= 4
3645      && ! (arm_arch4
3646	    && (mode == HImode
3647		|| (mode == QImode && outer == SIGN_EXTEND))))
3648    {
3649      if (code == MULT)
3650	{
3651	  rtx xiop0 = XEXP (index, 0);
3652	  rtx xiop1 = XEXP (index, 1);
3653
3654	  return ((arm_address_register_rtx_p (xiop0, strict_p)
3655		   && power_of_two_operand (xiop1, SImode))
3656		  || (arm_address_register_rtx_p (xiop1, strict_p)
3657		      && power_of_two_operand (xiop0, SImode)));
3658	}
3659      else if (code == LSHIFTRT || code == ASHIFTRT
3660	       || code == ASHIFT || code == ROTATERT)
3661	{
3662	  rtx op = XEXP (index, 1);
3663
3664	  return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
3665		  && GET_CODE (op) == CONST_INT
3666		  && INTVAL (op) > 0
3667		  && INTVAL (op) <= 31);
3668	}
3669    }
3670
3671  /* For ARM v4 we may be doing a sign-extend operation during the
3672     load.  */
3673  if (arm_arch4)
3674    {
3675      if (mode == HImode || (outer == SIGN_EXTEND && mode == QImode))
3676	range = 256;
3677      else
3678	range = 4096;
3679    }
3680  else
3681    range = (mode == HImode) ? 4095 : 4096;
3682
3683  return (code == CONST_INT
3684	  && INTVAL (index) < range
3685	  && INTVAL (index) > -range);
3686}
3687
3688/* Return nonzero if X is valid as a Thumb state base register.  */
3689static int
3690thumb_base_register_rtx_p (rtx x, enum machine_mode mode, int strict_p)
3691{
3692  int regno;
3693
3694  if (GET_CODE (x) != REG)
3695    return 0;
3696
3697  regno = REGNO (x);
3698
3699  if (strict_p)
3700    return THUMB_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
3701
3702  return (regno <= LAST_LO_REGNUM
3703	  || regno > LAST_VIRTUAL_REGISTER
3704	  || regno == FRAME_POINTER_REGNUM
3705	  || (GET_MODE_SIZE (mode) >= 4
3706	      && (regno == STACK_POINTER_REGNUM
3707		  || regno >= FIRST_PSEUDO_REGISTER
3708		  || x == hard_frame_pointer_rtx
3709		  || x == arg_pointer_rtx)));
3710}
3711
3712/* Return nonzero if x is a legitimate index register.  This is the case
3713   for any base register that can access a QImode object.  */
3714inline static int
3715thumb_index_register_rtx_p (rtx x, int strict_p)
3716{
3717  return thumb_base_register_rtx_p (x, QImode, strict_p);
3718}
3719
3720/* Return nonzero if x is a legitimate Thumb-state address.
3721
3722   The AP may be eliminated to either the SP or the FP, so we use the
3723   least common denominator, e.g. SImode, and offsets from 0 to 64.
3724
3725   ??? Verify whether the above is the right approach.
3726
3727   ??? Also, the FP may be eliminated to the SP, so perhaps that
3728   needs special handling also.
3729
3730   ??? Look at how the mips16 port solves this problem.  It probably uses
3731   better ways to solve some of these problems.
3732
3733   Although it is not incorrect, we don't accept QImode and HImode
3734   addresses based on the frame pointer or arg pointer until the
3735   reload pass starts.  This is so that eliminating such addresses
3736   into stack based ones won't produce impossible code.  */
3737int
3738thumb_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
3739{
3740  /* ??? Not clear if this is right.  Experiment.  */
3741  if (GET_MODE_SIZE (mode) < 4
3742      && !(reload_in_progress || reload_completed)
3743      && (reg_mentioned_p (frame_pointer_rtx, x)
3744	  || reg_mentioned_p (arg_pointer_rtx, x)
3745	  || reg_mentioned_p (virtual_incoming_args_rtx, x)
3746	  || reg_mentioned_p (virtual_outgoing_args_rtx, x)
3747	  || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
3748	  || reg_mentioned_p (virtual_stack_vars_rtx, x)))
3749    return 0;
3750
3751  /* Accept any base register.  SP only in SImode or larger.  */
3752  else if (thumb_base_register_rtx_p (x, mode, strict_p))
3753    return 1;
3754
3755  /* This is PC relative data before arm_reorg runs.  */
3756  else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
3757	   && GET_CODE (x) == SYMBOL_REF
3758           && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
3759    return 1;
3760
3761  /* This is PC relative data after arm_reorg runs.  */
3762  else if (GET_MODE_SIZE (mode) >= 4 && reload_completed
3763	   && (GET_CODE (x) == LABEL_REF
3764	       || (GET_CODE (x) == CONST
3765		   && GET_CODE (XEXP (x, 0)) == PLUS
3766		   && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
3767		   && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
3768    return 1;
3769
3770  /* Post-inc indexing only supported for SImode and larger.  */
3771  else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
3772	   && thumb_index_register_rtx_p (XEXP (x, 0), strict_p))
3773    return 1;
3774
3775  else if (GET_CODE (x) == PLUS)
3776    {
3777      /* REG+REG address can be any two index registers.  */
3778      /* We disallow FRAME+REG addressing since we know that FRAME
3779	 will be replaced with STACK, and SP relative addressing only
3780	 permits SP+OFFSET.  */
3781      if (GET_MODE_SIZE (mode) <= 4
3782	  && XEXP (x, 0) != frame_pointer_rtx
3783	  && XEXP (x, 1) != frame_pointer_rtx
3784	  && thumb_index_register_rtx_p (XEXP (x, 0), strict_p)
3785	  && thumb_index_register_rtx_p (XEXP (x, 1), strict_p))
3786	return 1;
3787
3788      /* REG+const has 5-7 bit offset for non-SP registers.  */
3789      else if ((thumb_index_register_rtx_p (XEXP (x, 0), strict_p)
3790		|| XEXP (x, 0) == arg_pointer_rtx)
3791	       && GET_CODE (XEXP (x, 1)) == CONST_INT
3792	       && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
3793	return 1;
3794
3795      /* REG+const has 10 bit offset for SP, but only SImode and
3796	 larger is supported.  */
3797      /* ??? Should probably check for DI/DFmode overflow here
3798	 just like GO_IF_LEGITIMATE_OFFSET does.  */
3799      else if (GET_CODE (XEXP (x, 0)) == REG
3800	       && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
3801	       && GET_MODE_SIZE (mode) >= 4
3802	       && GET_CODE (XEXP (x, 1)) == CONST_INT
3803	       && INTVAL (XEXP (x, 1)) >= 0
3804	       && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
3805	       && (INTVAL (XEXP (x, 1)) & 3) == 0)
3806	return 1;
3807
3808      else if (GET_CODE (XEXP (x, 0)) == REG
3809	       && REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
3810	       && GET_MODE_SIZE (mode) >= 4
3811	       && GET_CODE (XEXP (x, 1)) == CONST_INT
3812	       && (INTVAL (XEXP (x, 1)) & 3) == 0)
3813	return 1;
3814    }
3815
3816  else if (GET_MODE_CLASS (mode) != MODE_FLOAT
3817	   && GET_MODE_SIZE (mode) == 4
3818	   && GET_CODE (x) == SYMBOL_REF
3819	   && CONSTANT_POOL_ADDRESS_P (x)
3820	   && ! (flag_pic
3821		 && symbol_mentioned_p (get_pool_constant (x))
3822		 && ! pcrel_constant_p (get_pool_constant (x))))
3823    return 1;
3824
3825  return 0;
3826}
3827
3828/* Return nonzero if VAL can be used as an offset in a Thumb-state address
3829   instruction of mode MODE.  */
3830int
3831thumb_legitimate_offset_p (enum machine_mode mode, HOST_WIDE_INT val)
3832{
3833  switch (GET_MODE_SIZE (mode))
3834    {
3835    case 1:
3836      return val >= 0 && val < 32;
3837
3838    case 2:
3839      return val >= 0 && val < 64 && (val & 1) == 0;
3840
3841    default:
3842      return (val >= 0
3843	      && (val + GET_MODE_SIZE (mode)) <= 128
3844	      && (val & 3) == 0);
3845    }
3846}
3847
3848/* Build the SYMBOL_REF for __tls_get_addr.  */
3849
3850static GTY(()) rtx tls_get_addr_libfunc;
3851
3852static rtx
3853get_tls_get_addr (void)
3854{
3855  if (!tls_get_addr_libfunc)
3856    tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
3857  return tls_get_addr_libfunc;
3858}
3859
3860static rtx
3861arm_load_tp (rtx target)
3862{
3863  if (!target)
3864    target = gen_reg_rtx (SImode);
3865
3866  if (TARGET_HARD_TP)
3867    {
3868      /* Can return in any reg.  */
3869      emit_insn (gen_load_tp_hard (target));
3870    }
3871  else
3872    {
3873      /* Always returned in r0.  Immediately copy the result into a pseudo,
3874	 otherwise other uses of r0 (e.g. setting up function arguments) may
3875	 clobber the value.  */
3876
3877      rtx tmp;
3878
3879      emit_insn (gen_load_tp_soft ());
3880
3881      tmp = gen_rtx_REG (SImode, 0);
3882      emit_move_insn (target, tmp);
3883    }
3884  return target;
3885}
3886
3887static rtx
3888load_tls_operand (rtx x, rtx reg)
3889{
3890  rtx tmp;
3891
3892  if (reg == NULL_RTX)
3893    reg = gen_reg_rtx (SImode);
3894
3895  tmp = gen_rtx_CONST (SImode, x);
3896
3897  emit_move_insn (reg, tmp);
3898
3899  return reg;
3900}
3901
3902static rtx
3903arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
3904{
3905  rtx insns, label, labelno, sum;
3906
3907  start_sequence ();
3908
3909  labelno = GEN_INT (pic_labelno++);
3910  label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
3911  label = gen_rtx_CONST (VOIDmode, label);
3912
3913  sum = gen_rtx_UNSPEC (Pmode,
3914			gen_rtvec (4, x, GEN_INT (reloc), label,
3915				   GEN_INT (TARGET_ARM ? 8 : 4)),
3916			UNSPEC_TLS);
3917  reg = load_tls_operand (sum, reg);
3918
3919  if (TARGET_ARM)
3920    emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
3921  else
3922    emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
3923
3924  *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX, LCT_PURE, /* LCT_CONST?  */
3925				     Pmode, 1, reg, Pmode);
3926
3927  insns = get_insns ();
3928  end_sequence ();
3929
3930  return insns;
3931}
3932
3933rtx
3934legitimize_tls_address (rtx x, rtx reg)
3935{
3936  rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
3937  unsigned int model = SYMBOL_REF_TLS_MODEL (x);
3938
3939  switch (model)
3940    {
3941    case TLS_MODEL_GLOBAL_DYNAMIC:
3942      insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
3943      dest = gen_reg_rtx (Pmode);
3944      emit_libcall_block (insns, dest, ret, x);
3945      return dest;
3946
3947    case TLS_MODEL_LOCAL_DYNAMIC:
3948      insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
3949
3950      /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
3951	 share the LDM result with other LD model accesses.  */
3952      eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
3953			    UNSPEC_TLS);
3954      dest = gen_reg_rtx (Pmode);
3955      emit_libcall_block (insns, dest, ret, eqv);
3956
3957      /* Load the addend.  */
3958      addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x, GEN_INT (TLS_LDO32)),
3959			       UNSPEC_TLS);
3960      addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
3961      return gen_rtx_PLUS (Pmode, dest, addend);
3962
3963    case TLS_MODEL_INITIAL_EXEC:
3964      labelno = GEN_INT (pic_labelno++);
3965      label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
3966      label = gen_rtx_CONST (VOIDmode, label);
3967      sum = gen_rtx_UNSPEC (Pmode,
3968			    gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
3969				       GEN_INT (TARGET_ARM ? 8 : 4)),
3970			    UNSPEC_TLS);
3971      reg = load_tls_operand (sum, reg);
3972
3973      if (TARGET_ARM)
3974	emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
3975      else
3976	{
3977	  emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
3978	  emit_move_insn (reg, gen_const_mem (SImode, reg));
3979	}
3980
3981      tp = arm_load_tp (NULL_RTX);
3982
3983      return gen_rtx_PLUS (Pmode, tp, reg);
3984
3985    case TLS_MODEL_LOCAL_EXEC:
3986      tp = arm_load_tp (NULL_RTX);
3987
3988      reg = gen_rtx_UNSPEC (Pmode,
3989			    gen_rtvec (2, x, GEN_INT (TLS_LE32)),
3990			    UNSPEC_TLS);
3991      reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
3992
3993      return gen_rtx_PLUS (Pmode, tp, reg);
3994
3995    default:
3996      abort ();
3997    }
3998}
3999
4000/* Try machine-dependent ways of modifying an illegitimate address
4001   to be legitimate.  If we find one, return the new, valid address.  */
4002rtx
4003arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
4004{
4005  if (arm_tls_symbol_p (x))
4006    return legitimize_tls_address (x, NULL_RTX);
4007
4008  if (GET_CODE (x) == PLUS)
4009    {
4010      rtx xop0 = XEXP (x, 0);
4011      rtx xop1 = XEXP (x, 1);
4012
4013      if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
4014	xop0 = force_reg (SImode, xop0);
4015
4016      if (CONSTANT_P (xop1) && !symbol_mentioned_p (xop1))
4017	xop1 = force_reg (SImode, xop1);
4018
4019      if (ARM_BASE_REGISTER_RTX_P (xop0)
4020	  && GET_CODE (xop1) == CONST_INT)
4021	{
4022	  HOST_WIDE_INT n, low_n;
4023	  rtx base_reg, val;
4024	  n = INTVAL (xop1);
4025
4026	  /* VFP addressing modes actually allow greater offsets, but for
4027	     now we just stick with the lowest common denominator.  */
4028	  if (mode == DImode
4029	      || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
4030	    {
4031	      low_n = n & 0x0f;
4032	      n &= ~0x0f;
4033	      if (low_n > 4)
4034		{
4035		  n += 16;
4036		  low_n -= 16;
4037		}
4038	    }
4039	  else
4040	    {
4041	      low_n = ((mode) == TImode ? 0
4042		       : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
4043	      n -= low_n;
4044	    }
4045
4046	  base_reg = gen_reg_rtx (SImode);
4047	  val = force_operand (plus_constant (xop0, n), NULL_RTX);
4048	  emit_move_insn (base_reg, val);
4049	  x = plus_constant (base_reg, low_n);
4050	}
4051      else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
4052	x = gen_rtx_PLUS (SImode, xop0, xop1);
4053    }
4054
4055  /* XXX We don't allow MINUS any more -- see comment in
4056     arm_legitimate_address_p ().  */
4057  else if (GET_CODE (x) == MINUS)
4058    {
4059      rtx xop0 = XEXP (x, 0);
4060      rtx xop1 = XEXP (x, 1);
4061
4062      if (CONSTANT_P (xop0))
4063	xop0 = force_reg (SImode, xop0);
4064
4065      if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
4066	xop1 = force_reg (SImode, xop1);
4067
4068      if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
4069	x = gen_rtx_MINUS (SImode, xop0, xop1);
4070    }
4071
4072  /* Make sure to take full advantage of the pre-indexed addressing mode
4073     with absolute addresses which often allows for the base register to
4074     be factorized for multiple adjacent memory references, and it might
4075     even allows for the mini pool to be avoided entirely. */
4076  else if (GET_CODE (x) == CONST_INT && optimize > 0)
4077    {
4078      unsigned int bits;
4079      HOST_WIDE_INT mask, base, index;
4080      rtx base_reg;
4081
4082      /* ldr and ldrb can use a 12 bit index, ldrsb and the rest can only
4083         use a 8 bit index. So let's use a 12 bit index for SImode only and
4084         hope that arm_gen_constant will enable ldrb to use more bits. */
4085      bits = (mode == SImode) ? 12 : 8;
4086      mask = (1 << bits) - 1;
4087      base = INTVAL (x) & ~mask;
4088      index = INTVAL (x) & mask;
4089      if (bit_count (base & 0xffffffff) > (32 - bits)/2)
4090        {
4091	  /* It'll most probably be more efficient to generate the base
4092	     with more bits set and use a negative index instead. */
4093	  base |= mask;
4094	  index -= mask;
4095	}
4096      base_reg = force_reg (SImode, GEN_INT (base));
4097      x = plus_constant (base_reg, index);
4098    }
4099
4100  if (flag_pic)
4101    {
4102      /* We need to find and carefully transform any SYMBOL and LABEL
4103	 references; so go back to the original address expression.  */
4104      rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
4105
4106      if (new_x != orig_x)
4107	x = new_x;
4108    }
4109
4110  return x;
4111}
4112
4113
4114/* Try machine-dependent ways of modifying an illegitimate Thumb address
4115   to be legitimate.  If we find one, return the new, valid address.  */
4116rtx
4117thumb_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
4118{
4119  if (arm_tls_symbol_p (x))
4120    return legitimize_tls_address (x, NULL_RTX);
4121
4122  if (GET_CODE (x) == PLUS
4123      && GET_CODE (XEXP (x, 1)) == CONST_INT
4124      && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
4125	  || INTVAL (XEXP (x, 1)) < 0))
4126    {
4127      rtx xop0 = XEXP (x, 0);
4128      rtx xop1 = XEXP (x, 1);
4129      HOST_WIDE_INT offset = INTVAL (xop1);
4130
4131      /* Try and fold the offset into a biasing of the base register and
4132	 then offsetting that.  Don't do this when optimizing for space
4133	 since it can cause too many CSEs.  */
4134      if (optimize_size && offset >= 0
4135	  && offset < 256 + 31 * GET_MODE_SIZE (mode))
4136	{
4137	  HOST_WIDE_INT delta;
4138
4139	  if (offset >= 256)
4140	    delta = offset - (256 - GET_MODE_SIZE (mode));
4141	  else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
4142	    delta = 31 * GET_MODE_SIZE (mode);
4143	  else
4144	    delta = offset & (~31 * GET_MODE_SIZE (mode));
4145
4146	  xop0 = force_operand (plus_constant (xop0, offset - delta),
4147				NULL_RTX);
4148	  x = plus_constant (xop0, delta);
4149	}
4150      else if (offset < 0 && offset > -256)
4151	/* Small negative offsets are best done with a subtract before the
4152	   dereference, forcing these into a register normally takes two
4153	   instructions.  */
4154	x = force_operand (x, NULL_RTX);
4155      else
4156	{
4157	  /* For the remaining cases, force the constant into a register.  */
4158	  xop1 = force_reg (SImode, xop1);
4159	  x = gen_rtx_PLUS (SImode, xop0, xop1);
4160	}
4161    }
4162  else if (GET_CODE (x) == PLUS
4163	   && s_register_operand (XEXP (x, 1), SImode)
4164	   && !s_register_operand (XEXP (x, 0), SImode))
4165    {
4166      rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
4167
4168      x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
4169    }
4170
4171  if (flag_pic)
4172    {
4173      /* We need to find and carefully transform any SYMBOL and LABEL
4174	 references; so go back to the original address expression.  */
4175      rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
4176
4177      if (new_x != orig_x)
4178	x = new_x;
4179    }
4180
4181  return x;
4182}
4183
4184rtx
4185thumb_legitimize_reload_address (rtx *x_p,
4186				 enum machine_mode mode,
4187				 int opnum, int type,
4188				 int ind_levels ATTRIBUTE_UNUSED)
4189{
4190  rtx x = *x_p;
4191
4192  if (GET_CODE (x) == PLUS
4193      && GET_MODE_SIZE (mode) < 4
4194      && REG_P (XEXP (x, 0))
4195      && XEXP (x, 0) == stack_pointer_rtx
4196      && GET_CODE (XEXP (x, 1)) == CONST_INT
4197      && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
4198    {
4199      rtx orig_x = x;
4200
4201      x = copy_rtx (x);
4202      push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
4203		   Pmode, VOIDmode, 0, 0, opnum, type);
4204      return x;
4205    }
4206
4207  /* If both registers are hi-regs, then it's better to reload the
4208     entire expression rather than each register individually.  That
4209     only requires one reload register rather than two.  */
4210  if (GET_CODE (x) == PLUS
4211      && REG_P (XEXP (x, 0))
4212      && REG_P (XEXP (x, 1))
4213      && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
4214      && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
4215    {
4216      rtx orig_x = x;
4217
4218      x = copy_rtx (x);
4219      push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
4220		   Pmode, VOIDmode, 0, 0, opnum, type);
4221      return x;
4222    }
4223
4224  return NULL;
4225}
4226
4227/* Test for various thread-local symbols.  */
4228
4229/* Return TRUE if X is a thread-local symbol.  */
4230
4231static bool
4232arm_tls_symbol_p (rtx x)
4233{
4234  if (! TARGET_HAVE_TLS)
4235    return false;
4236
4237  if (GET_CODE (x) != SYMBOL_REF)
4238    return false;
4239
4240  return SYMBOL_REF_TLS_MODEL (x) != 0;
4241}
4242
4243/* Helper for arm_tls_referenced_p.  */
4244
4245static int
4246arm_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
4247{
4248  if (GET_CODE (*x) == SYMBOL_REF)
4249    return SYMBOL_REF_TLS_MODEL (*x) != 0;
4250
4251  /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
4252     TLS offsets, not real symbol references.  */
4253  if (GET_CODE (*x) == UNSPEC
4254      && XINT (*x, 1) == UNSPEC_TLS)
4255    return -1;
4256
4257  return 0;
4258}
4259
4260/* Return TRUE if X contains any TLS symbol references.  */
4261
4262bool
4263arm_tls_referenced_p (rtx x)
4264{
4265  if (! TARGET_HAVE_TLS)
4266    return false;
4267
4268  return for_each_rtx (&x, arm_tls_operand_p_1, NULL);
4269}
4270
4271#define REG_OR_SUBREG_REG(X)						\
4272  (GET_CODE (X) == REG							\
4273   || (GET_CODE (X) == SUBREG && GET_CODE (SUBREG_REG (X)) == REG))
4274
4275#define REG_OR_SUBREG_RTX(X)			\
4276   (GET_CODE (X) == REG ? (X) : SUBREG_REG (X))
4277
4278#ifndef COSTS_N_INSNS
4279#define COSTS_N_INSNS(N) ((N) * 4 - 2)
4280#endif
4281static inline int
4282thumb_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
4283{
4284  enum machine_mode mode = GET_MODE (x);
4285
4286  switch (code)
4287    {
4288    case ASHIFT:
4289    case ASHIFTRT:
4290    case LSHIFTRT:
4291    case ROTATERT:
4292    case PLUS:
4293    case MINUS:
4294    case COMPARE:
4295    case NEG:
4296    case NOT:
4297      return COSTS_N_INSNS (1);
4298
4299    case MULT:
4300      if (GET_CODE (XEXP (x, 1)) == CONST_INT)
4301	{
4302	  int cycles = 0;
4303	  unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
4304
4305	  while (i)
4306	    {
4307	      i >>= 2;
4308	      cycles++;
4309	    }
4310	  return COSTS_N_INSNS (2) + cycles;
4311	}
4312      return COSTS_N_INSNS (1) + 16;
4313
4314    case SET:
4315      return (COSTS_N_INSNS (1)
4316	      + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
4317		     + GET_CODE (SET_DEST (x)) == MEM));
4318
4319    case CONST_INT:
4320      if (outer == SET)
4321	{
4322	  if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
4323	    return 0;
4324	  if (thumb_shiftable_const (INTVAL (x)))
4325	    return COSTS_N_INSNS (2);
4326	  return COSTS_N_INSNS (3);
4327	}
4328      else if ((outer == PLUS || outer == COMPARE)
4329	       && INTVAL (x) < 256 && INTVAL (x) > -256)
4330	return 0;
4331      else if (outer == AND
4332	       && INTVAL (x) < 256 && INTVAL (x) >= -256)
4333	return COSTS_N_INSNS (1);
4334      else if (outer == ASHIFT || outer == ASHIFTRT
4335	       || outer == LSHIFTRT)
4336	return 0;
4337      return COSTS_N_INSNS (2);
4338
4339    case CONST:
4340    case CONST_DOUBLE:
4341    case LABEL_REF:
4342    case SYMBOL_REF:
4343      return COSTS_N_INSNS (3);
4344
4345    case UDIV:
4346    case UMOD:
4347    case DIV:
4348    case MOD:
4349      return 100;
4350
4351    case TRUNCATE:
4352      return 99;
4353
4354    case AND:
4355    case XOR:
4356    case IOR:
4357      /* XXX guess.  */
4358      return 8;
4359
4360    case MEM:
4361      /* XXX another guess.  */
4362      /* Memory costs quite a lot for the first word, but subsequent words
4363	 load at the equivalent of a single insn each.  */
4364      return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
4365	      + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
4366		 ? 4 : 0));
4367
4368    case IF_THEN_ELSE:
4369      /* XXX a guess.  */
4370      if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
4371	return 14;
4372      return 2;
4373
4374    case ZERO_EXTEND:
4375      /* XXX still guessing.  */
4376      switch (GET_MODE (XEXP (x, 0)))
4377	{
4378	case QImode:
4379	  return (1 + (mode == DImode ? 4 : 0)
4380		  + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
4381
4382	case HImode:
4383	  return (4 + (mode == DImode ? 4 : 0)
4384		  + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
4385
4386	case SImode:
4387	  return (1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
4388
4389	default:
4390	  return 99;
4391	}
4392
4393    default:
4394      return 99;
4395    }
4396}
4397
4398
4399/* Worker routine for arm_rtx_costs.  */
4400static inline int
4401arm_rtx_costs_1 (rtx x, enum rtx_code code, enum rtx_code outer)
4402{
4403  enum machine_mode mode = GET_MODE (x);
4404  enum rtx_code subcode;
4405  int extra_cost;
4406
4407  switch (code)
4408    {
4409    case MEM:
4410      /* Memory costs quite a lot for the first word, but subsequent words
4411	 load at the equivalent of a single insn each.  */
4412      return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
4413	      + (GET_CODE (x) == SYMBOL_REF
4414		 && CONSTANT_POOL_ADDRESS_P (x) ? 4 : 0));
4415
4416    case DIV:
4417    case MOD:
4418    case UDIV:
4419    case UMOD:
4420      return optimize_size ? COSTS_N_INSNS (2) : 100;
4421
4422    case ROTATE:
4423      if (mode == SImode && GET_CODE (XEXP (x, 1)) == REG)
4424	return 4;
4425      /* Fall through */
4426    case ROTATERT:
4427      if (mode != SImode)
4428	return 8;
4429      /* Fall through */
4430    case ASHIFT: case LSHIFTRT: case ASHIFTRT:
4431      if (mode == DImode)
4432	return (8 + (GET_CODE (XEXP (x, 1)) == CONST_INT ? 0 : 8)
4433		+ ((GET_CODE (XEXP (x, 0)) == REG
4434		    || (GET_CODE (XEXP (x, 0)) == SUBREG
4435			&& GET_CODE (SUBREG_REG (XEXP (x, 0))) == REG))
4436		   ? 0 : 8));
4437      return (1 + ((GET_CODE (XEXP (x, 0)) == REG
4438		    || (GET_CODE (XEXP (x, 0)) == SUBREG
4439			&& GET_CODE (SUBREG_REG (XEXP (x, 0))) == REG))
4440		   ? 0 : 4)
4441	      + ((GET_CODE (XEXP (x, 1)) == REG
4442		  || (GET_CODE (XEXP (x, 1)) == SUBREG
4443		      && GET_CODE (SUBREG_REG (XEXP (x, 1))) == REG)
4444		  || (GET_CODE (XEXP (x, 1)) == CONST_INT))
4445		 ? 0 : 4));
4446
4447    case MINUS:
4448      if (mode == DImode)
4449	return (4 + (REG_OR_SUBREG_REG (XEXP (x, 1)) ? 0 : 8)
4450		+ ((REG_OR_SUBREG_REG (XEXP (x, 0))
4451		    || (GET_CODE (XEXP (x, 0)) == CONST_INT
4452		       && const_ok_for_arm (INTVAL (XEXP (x, 0)))))
4453		   ? 0 : 8));
4454
4455      if (GET_MODE_CLASS (mode) == MODE_FLOAT)
4456	return (2 + ((REG_OR_SUBREG_REG (XEXP (x, 1))
4457		      || (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
4458			  && arm_const_double_rtx (XEXP (x, 1))))
4459		     ? 0 : 8)
4460		+ ((REG_OR_SUBREG_REG (XEXP (x, 0))
4461		    || (GET_CODE (XEXP (x, 0)) == CONST_DOUBLE
4462			&& arm_const_double_rtx (XEXP (x, 0))))
4463		   ? 0 : 8));
4464
4465      if (((GET_CODE (XEXP (x, 0)) == CONST_INT
4466	    && const_ok_for_arm (INTVAL (XEXP (x, 0)))
4467	    && REG_OR_SUBREG_REG (XEXP (x, 1))))
4468	  || (((subcode = GET_CODE (XEXP (x, 1))) == ASHIFT
4469	       || subcode == ASHIFTRT || subcode == LSHIFTRT
4470	       || subcode == ROTATE || subcode == ROTATERT
4471	       || (subcode == MULT
4472		   && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
4473		   && ((INTVAL (XEXP (XEXP (x, 1), 1)) &
4474			(INTVAL (XEXP (XEXP (x, 1), 1)) - 1)) == 0)))
4475	      && REG_OR_SUBREG_REG (XEXP (XEXP (x, 1), 0))
4476	      && (REG_OR_SUBREG_REG (XEXP (XEXP (x, 1), 1))
4477		  || GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT)
4478	      && REG_OR_SUBREG_REG (XEXP (x, 0))))
4479	return 1;
4480      /* Fall through */
4481
4482    case PLUS:
4483      if (GET_CODE (XEXP (x, 0)) == MULT)
4484	{
4485	  extra_cost = rtx_cost (XEXP (x, 0), code);
4486	  if (!REG_OR_SUBREG_REG (XEXP (x, 1)))
4487	    extra_cost += 4 * ARM_NUM_REGS (mode);
4488	  return extra_cost;
4489	}
4490
4491      if (GET_MODE_CLASS (mode) == MODE_FLOAT)
4492	return (2 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 8)
4493		+ ((REG_OR_SUBREG_REG (XEXP (x, 1))
4494		    || (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
4495			&& arm_const_double_rtx (XEXP (x, 1))))
4496		   ? 0 : 8));
4497
4498      /* Fall through */
4499    case AND: case XOR: case IOR:
4500      extra_cost = 0;
4501
4502      /* Normally the frame registers will be spilt into reg+const during
4503	 reload, so it is a bad idea to combine them with other instructions,
4504	 since then they might not be moved outside of loops.  As a compromise
4505	 we allow integration with ops that have a constant as their second
4506	 operand.  */
4507      if ((REG_OR_SUBREG_REG (XEXP (x, 0))
4508	   && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
4509	   && GET_CODE (XEXP (x, 1)) != CONST_INT)
4510	  || (REG_OR_SUBREG_REG (XEXP (x, 0))
4511	      && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))))
4512	extra_cost = 4;
4513
4514      if (mode == DImode)
4515	return (4 + extra_cost + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 8)
4516		+ ((REG_OR_SUBREG_REG (XEXP (x, 1))
4517		    || (GET_CODE (XEXP (x, 1)) == CONST_INT
4518			&& const_ok_for_op (INTVAL (XEXP (x, 1)), code)))
4519		   ? 0 : 8));
4520
4521      if (REG_OR_SUBREG_REG (XEXP (x, 0)))
4522	return (1 + (GET_CODE (XEXP (x, 1)) == CONST_INT ? 0 : extra_cost)
4523		+ ((REG_OR_SUBREG_REG (XEXP (x, 1))
4524		    || (GET_CODE (XEXP (x, 1)) == CONST_INT
4525			&& const_ok_for_op (INTVAL (XEXP (x, 1)), code)))
4526		   ? 0 : 4));
4527
4528      else if (REG_OR_SUBREG_REG (XEXP (x, 1)))
4529	return (1 + extra_cost
4530		+ ((((subcode = GET_CODE (XEXP (x, 0))) == ASHIFT
4531		     || subcode == LSHIFTRT || subcode == ASHIFTRT
4532		     || subcode == ROTATE || subcode == ROTATERT
4533		     || (subcode == MULT
4534			 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
4535			 && ((INTVAL (XEXP (XEXP (x, 0), 1)) &
4536			      (INTVAL (XEXP (XEXP (x, 0), 1)) - 1)) == 0)))
4537		    && (REG_OR_SUBREG_REG (XEXP (XEXP (x, 0), 0)))
4538		    && ((REG_OR_SUBREG_REG (XEXP (XEXP (x, 0), 1)))
4539			|| GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT))
4540		   ? 0 : 4));
4541
4542      return 8;
4543
4544    case MULT:
4545      /* This should have been handled by the CPU specific routines.  */
4546      gcc_unreachable ();
4547
4548    case TRUNCATE:
4549      if (arm_arch3m && mode == SImode
4550	  && GET_CODE (XEXP (x, 0)) == LSHIFTRT
4551	  && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
4552	  && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
4553	      == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
4554	  && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
4555	      || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
4556	return 8;
4557      return 99;
4558
4559    case NEG:
4560      if (GET_MODE_CLASS (mode) == MODE_FLOAT)
4561	return 4 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 6);
4562      /* Fall through */
4563    case NOT:
4564      if (mode == DImode)
4565	return 4 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 4);
4566
4567      return 1 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 4);
4568
4569    case IF_THEN_ELSE:
4570      if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
4571	return 14;
4572      return 2;
4573
4574    case COMPARE:
4575      return 1;
4576
4577    case ABS:
4578      return 4 + (mode == DImode ? 4 : 0);
4579
4580    case SIGN_EXTEND:
4581      if (GET_MODE (XEXP (x, 0)) == QImode)
4582	return (4 + (mode == DImode ? 4 : 0)
4583		+ (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
4584      /* Fall through */
4585    case ZERO_EXTEND:
4586      switch (GET_MODE (XEXP (x, 0)))
4587	{
4588	case QImode:
4589	  return (1 + (mode == DImode ? 4 : 0)
4590		  + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
4591
4592	case HImode:
4593	  return (4 + (mode == DImode ? 4 : 0)
4594		  + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
4595
4596	case SImode:
4597	  return (1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
4598
4599	case V8QImode:
4600	case V4HImode:
4601	case V2SImode:
4602	case V4QImode:
4603	case V2HImode:
4604	    return 1;
4605
4606	default:
4607	  gcc_unreachable ();
4608	}
4609      gcc_unreachable ();
4610
4611    case CONST_INT:
4612      if (const_ok_for_arm (INTVAL (x)))
4613	return outer == SET ? 2 : -1;
4614      else if (outer == AND
4615	       && const_ok_for_arm (~INTVAL (x)))
4616	return -1;
4617      else if ((outer == COMPARE
4618		|| outer == PLUS || outer == MINUS)
4619	       && const_ok_for_arm (-INTVAL (x)))
4620	return -1;
4621      else
4622	return 5;
4623
4624    case CONST:
4625    case LABEL_REF:
4626    case SYMBOL_REF:
4627      return 6;
4628
4629    case CONST_DOUBLE:
4630      if (arm_const_double_rtx (x))
4631	return outer == SET ? 2 : -1;
4632      else if ((outer == COMPARE || outer == PLUS)
4633	       && neg_const_double_rtx_ok_for_fpa (x))
4634	return -1;
4635      return 7;
4636
4637    default:
4638      return 99;
4639    }
4640}
4641
4642/* RTX costs when optimizing for size.  */
4643static bool
4644arm_size_rtx_costs (rtx x, int code, int outer_code, int *total)
4645{
4646  enum machine_mode mode = GET_MODE (x);
4647
4648  if (TARGET_THUMB)
4649    {
4650      /* XXX TBD.  For now, use the standard costs.  */
4651      *total = thumb_rtx_costs (x, code, outer_code);
4652      return true;
4653    }
4654
4655  switch (code)
4656    {
4657    case MEM:
4658      /* A memory access costs 1 insn if the mode is small, or the address is
4659	 a single register, otherwise it costs one insn per word.  */
4660      if (REG_P (XEXP (x, 0)))
4661	*total = COSTS_N_INSNS (1);
4662      else
4663	*total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
4664      return true;
4665
4666    case DIV:
4667    case MOD:
4668    case UDIV:
4669    case UMOD:
4670      /* Needs a libcall, so it costs about this.  */
4671      *total = COSTS_N_INSNS (2);
4672      return false;
4673
4674    case ROTATE:
4675      if (mode == SImode && GET_CODE (XEXP (x, 1)) == REG)
4676	{
4677	  *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code);
4678	  return true;
4679	}
4680      /* Fall through */
4681    case ROTATERT:
4682    case ASHIFT:
4683    case LSHIFTRT:
4684    case ASHIFTRT:
4685      if (mode == DImode && GET_CODE (XEXP (x, 1)) == CONST_INT)
4686	{
4687	  *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code);
4688	  return true;
4689	}
4690      else if (mode == SImode)
4691	{
4692	  *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code);
4693	  /* Slightly disparage register shifts, but not by much.  */
4694	  if (GET_CODE (XEXP (x, 1)) != CONST_INT)
4695	    *total += 1 + rtx_cost (XEXP (x, 1), code);
4696	  return true;
4697	}
4698
4699      /* Needs a libcall.  */
4700      *total = COSTS_N_INSNS (2);
4701      return false;
4702
4703    case MINUS:
4704      if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
4705	{
4706	  *total = COSTS_N_INSNS (1);
4707	  return false;
4708	}
4709
4710      if (mode == SImode)
4711	{
4712	  enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
4713	  enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
4714
4715	  if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
4716	      || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
4717	      || subcode1 == ROTATE || subcode1 == ROTATERT
4718	      || subcode1 == ASHIFT || subcode1 == LSHIFTRT
4719	      || subcode1 == ASHIFTRT)
4720	    {
4721	      /* It's just the cost of the two operands.  */
4722	      *total = 0;
4723	      return false;
4724	    }
4725
4726	  *total = COSTS_N_INSNS (1);
4727	  return false;
4728	}
4729
4730      *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
4731      return false;
4732
4733    case PLUS:
4734      if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
4735	{
4736	  *total = COSTS_N_INSNS (1);
4737	  return false;
4738	}
4739
4740      /* Fall through */
4741    case AND: case XOR: case IOR:
4742      if (mode == SImode)
4743	{
4744	  enum rtx_code subcode = GET_CODE (XEXP (x, 0));
4745
4746	  if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
4747	      || subcode == LSHIFTRT || subcode == ASHIFTRT
4748	      || (code == AND && subcode == NOT))
4749	    {
4750	      /* It's just the cost of the two operands.  */
4751	      *total = 0;
4752	      return false;
4753	    }
4754	}
4755
4756      *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
4757      return false;
4758
4759    case MULT:
4760      *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
4761      return false;
4762
4763    case NEG:
4764      if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
4765	*total = COSTS_N_INSNS (1);
4766      /* Fall through */
4767    case NOT:
4768      *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
4769
4770      return false;
4771
4772    case IF_THEN_ELSE:
4773      *total = 0;
4774      return false;
4775
4776    case COMPARE:
4777      if (cc_register (XEXP (x, 0), VOIDmode))
4778	* total = 0;
4779      else
4780	*total = COSTS_N_INSNS (1);
4781      return false;
4782
4783    case ABS:
4784      if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
4785	*total = COSTS_N_INSNS (1);
4786      else
4787	*total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
4788      return false;
4789
4790    case SIGN_EXTEND:
4791      *total = 0;
4792      if (GET_MODE_SIZE (GET_MODE (XEXP (x, 0))) < 4)
4793	{
4794	  if (!(arm_arch4 && MEM_P (XEXP (x, 0))))
4795	    *total += COSTS_N_INSNS (arm_arch6 ? 1 : 2);
4796	}
4797      if (mode == DImode)
4798	*total += COSTS_N_INSNS (1);
4799      return false;
4800
4801    case ZERO_EXTEND:
4802      *total = 0;
4803      if (!(arm_arch4 && MEM_P (XEXP (x, 0))))
4804	{
4805	  switch (GET_MODE (XEXP (x, 0)))
4806	    {
4807	    case QImode:
4808	      *total += COSTS_N_INSNS (1);
4809	      break;
4810
4811	    case HImode:
4812	      *total += COSTS_N_INSNS (arm_arch6 ? 1 : 2);
4813
4814	    case SImode:
4815	      break;
4816
4817	    default:
4818	      *total += COSTS_N_INSNS (2);
4819	    }
4820	}
4821
4822      if (mode == DImode)
4823	*total += COSTS_N_INSNS (1);
4824
4825      return false;
4826
4827    case CONST_INT:
4828      if (const_ok_for_arm (INTVAL (x)))
4829	*total = COSTS_N_INSNS (outer_code == SET ? 1 : 0);
4830      else if (const_ok_for_arm (~INTVAL (x)))
4831	*total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
4832      else if (const_ok_for_arm (-INTVAL (x)))
4833	{
4834	  if (outer_code == COMPARE || outer_code == PLUS
4835	      || outer_code == MINUS)
4836	    *total = 0;
4837	  else
4838	    *total = COSTS_N_INSNS (1);
4839	}
4840      else
4841	*total = COSTS_N_INSNS (2);
4842      return true;
4843
4844    case CONST:
4845    case LABEL_REF:
4846    case SYMBOL_REF:
4847      *total = COSTS_N_INSNS (2);
4848      return true;
4849
4850    case CONST_DOUBLE:
4851      *total = COSTS_N_INSNS (4);
4852      return true;
4853
4854    default:
4855      if (mode != VOIDmode)
4856	*total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
4857      else
4858	*total = COSTS_N_INSNS (4); /* How knows?  */
4859      return false;
4860    }
4861}
4862
4863/* RTX costs for cores with a slow MUL implementation.  */
4864
4865static bool
4866arm_slowmul_rtx_costs (rtx x, int code, int outer_code, int *total)
4867{
4868  enum machine_mode mode = GET_MODE (x);
4869
4870  if (TARGET_THUMB)
4871    {
4872      *total = thumb_rtx_costs (x, code, outer_code);
4873      return true;
4874    }
4875
4876  switch (code)
4877    {
4878    case MULT:
4879      if (GET_MODE_CLASS (mode) == MODE_FLOAT
4880	  || mode == DImode)
4881	{
4882	  *total = 30;
4883	  return true;
4884	}
4885
4886      if (GET_CODE (XEXP (x, 1)) == CONST_INT)
4887	{
4888	  unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
4889				      & (unsigned HOST_WIDE_INT) 0xffffffff);
4890	  int cost, const_ok = const_ok_for_arm (i);
4891	  int j, booth_unit_size;
4892
4893	  /* Tune as appropriate.  */
4894	  cost = const_ok ? 4 : 8;
4895	  booth_unit_size = 2;
4896	  for (j = 0; i && j < 32; j += booth_unit_size)
4897	    {
4898	      i >>= booth_unit_size;
4899	      cost += 2;
4900	    }
4901
4902	  *total = cost;
4903	  return true;
4904	}
4905
4906      *total = 30 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 4)
4907	          + (REG_OR_SUBREG_REG (XEXP (x, 1)) ? 0 : 4);
4908      return true;
4909
4910    default:
4911      *total = arm_rtx_costs_1 (x, code, outer_code);
4912      return true;
4913    }
4914}
4915
4916
4917/* RTX cost for cores with a fast multiply unit (M variants).  */
4918
4919static bool
4920arm_fastmul_rtx_costs (rtx x, int code, int outer_code, int *total)
4921{
4922  enum machine_mode mode = GET_MODE (x);
4923
4924  if (TARGET_THUMB)
4925    {
4926      *total = thumb_rtx_costs (x, code, outer_code);
4927      return true;
4928    }
4929
4930  switch (code)
4931    {
4932    case MULT:
4933      /* There is no point basing this on the tuning, since it is always the
4934	 fast variant if it exists at all.  */
4935      if (mode == DImode
4936	  && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
4937	  && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
4938	      || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
4939	{
4940	  *total = 8;
4941	  return true;
4942	}
4943
4944
4945      if (GET_MODE_CLASS (mode) == MODE_FLOAT
4946	  || mode == DImode)
4947	{
4948	  *total = 30;
4949	  return true;
4950	}
4951
4952      if (GET_CODE (XEXP (x, 1)) == CONST_INT)
4953	{
4954	  unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
4955				      & (unsigned HOST_WIDE_INT) 0xffffffff);
4956	  int cost, const_ok = const_ok_for_arm (i);
4957	  int j, booth_unit_size;
4958
4959	  /* Tune as appropriate.  */
4960	  cost = const_ok ? 4 : 8;
4961	  booth_unit_size = 8;
4962	  for (j = 0; i && j < 32; j += booth_unit_size)
4963	    {
4964	      i >>= booth_unit_size;
4965	      cost += 2;
4966	    }
4967
4968	  *total = cost;
4969	  return true;
4970	}
4971
4972      *total = 8 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 4)
4973	         + (REG_OR_SUBREG_REG (XEXP (x, 1)) ? 0 : 4);
4974      return true;
4975
4976    default:
4977      *total = arm_rtx_costs_1 (x, code, outer_code);
4978      return true;
4979    }
4980}
4981
4982
4983/* RTX cost for XScale CPUs.  */
4984
4985static bool
4986arm_xscale_rtx_costs (rtx x, int code, int outer_code, int *total)
4987{
4988  enum machine_mode mode = GET_MODE (x);
4989
4990  if (TARGET_THUMB)
4991    {
4992      *total = thumb_rtx_costs (x, code, outer_code);
4993      return true;
4994    }
4995
4996  switch (code)
4997    {
4998    case MULT:
4999      /* There is no point basing this on the tuning, since it is always the
5000	 fast variant if it exists at all.  */
5001      if (mode == DImode
5002	  && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
5003	  && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
5004	      || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
5005	{
5006	  *total = 8;
5007	  return true;
5008	}
5009
5010
5011      if (GET_MODE_CLASS (mode) == MODE_FLOAT
5012	  || mode == DImode)
5013	{
5014	  *total = 30;
5015	  return true;
5016	}
5017
5018      if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5019	{
5020	  unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
5021				      & (unsigned HOST_WIDE_INT) 0xffffffff);
5022	  int cost, const_ok = const_ok_for_arm (i);
5023	  unsigned HOST_WIDE_INT masked_const;
5024
5025	  /* The cost will be related to two insns.
5026	     First a load of the constant (MOV or LDR), then a multiply.  */
5027	  cost = 2;
5028	  if (! const_ok)
5029	    cost += 1;      /* LDR is probably more expensive because
5030			       of longer result latency.  */
5031	  masked_const = i & 0xffff8000;
5032	  if (masked_const != 0 && masked_const != 0xffff8000)
5033	    {
5034	      masked_const = i & 0xf8000000;
5035	      if (masked_const == 0 || masked_const == 0xf8000000)
5036		cost += 1;
5037	      else
5038		cost += 2;
5039	    }
5040	  *total = cost;
5041	  return true;
5042	}
5043
5044      *total = 8 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 4)
5045		 + (REG_OR_SUBREG_REG (XEXP (x, 1)) ? 0 : 4);
5046      return true;
5047
5048    case COMPARE:
5049      /* A COMPARE of a MULT is slow on XScale; the muls instruction
5050	 will stall until the multiplication is complete.  */
5051      if (GET_CODE (XEXP (x, 0)) == MULT)
5052	*total = 4 + rtx_cost (XEXP (x, 0), code);
5053      else
5054	*total = arm_rtx_costs_1 (x, code, outer_code);
5055      return true;
5056
5057    default:
5058      *total = arm_rtx_costs_1 (x, code, outer_code);
5059      return true;
5060    }
5061}
5062
5063
5064/* RTX costs for 9e (and later) cores.  */
5065
5066static bool
5067arm_9e_rtx_costs (rtx x, int code, int outer_code, int *total)
5068{
5069  enum machine_mode mode = GET_MODE (x);
5070  int nonreg_cost;
5071  int cost;
5072
5073  if (TARGET_THUMB)
5074    {
5075      switch (code)
5076	{
5077	case MULT:
5078	  *total = COSTS_N_INSNS (3);
5079	  return true;
5080
5081	default:
5082	  *total = thumb_rtx_costs (x, code, outer_code);
5083	  return true;
5084	}
5085    }
5086
5087  switch (code)
5088    {
5089    case MULT:
5090      /* There is no point basing this on the tuning, since it is always the
5091	 fast variant if it exists at all.  */
5092      if (mode == DImode
5093	  && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
5094	  && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
5095	      || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
5096	{
5097	  *total = 3;
5098	  return true;
5099	}
5100
5101
5102      if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5103	{
5104	  *total = 30;
5105	  return true;
5106	}
5107      if (mode == DImode)
5108	{
5109	  cost = 7;
5110	  nonreg_cost = 8;
5111	}
5112      else
5113	{
5114	  cost = 2;
5115	  nonreg_cost = 4;
5116	}
5117
5118
5119      *total = cost + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : nonreg_cost)
5120		    + (REG_OR_SUBREG_REG (XEXP (x, 1)) ? 0 : nonreg_cost);
5121      return true;
5122
5123    default:
5124      *total = arm_rtx_costs_1 (x, code, outer_code);
5125      return true;
5126    }
5127}
5128/* All address computations that can be done are free, but rtx cost returns
5129   the same for practically all of them.  So we weight the different types
5130   of address here in the order (most pref first):
5131   PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL.  */
5132static inline int
5133arm_arm_address_cost (rtx x)
5134{
5135  enum rtx_code c  = GET_CODE (x);
5136
5137  if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
5138    return 0;
5139  if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
5140    return 10;
5141
5142  if (c == PLUS || c == MINUS)
5143    {
5144      if (GET_CODE (XEXP (x, 0)) == CONST_INT)
5145	return 2;
5146
5147      if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
5148	return 3;
5149
5150      return 4;
5151    }
5152
5153  return 6;
5154}
5155
5156static inline int
5157arm_thumb_address_cost (rtx x)
5158{
5159  enum rtx_code c  = GET_CODE (x);
5160
5161  if (c == REG)
5162    return 1;
5163  if (c == PLUS
5164      && GET_CODE (XEXP (x, 0)) == REG
5165      && GET_CODE (XEXP (x, 1)) == CONST_INT)
5166    return 1;
5167
5168  return 2;
5169}
5170
5171static int
5172arm_address_cost (rtx x)
5173{
5174  return TARGET_ARM ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
5175}
5176
5177static int
5178arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost)
5179{
5180  rtx i_pat, d_pat;
5181
5182  /* Some true dependencies can have a higher cost depending
5183     on precisely how certain input operands are used.  */
5184  if (arm_tune_xscale
5185      && REG_NOTE_KIND (link) == 0
5186      && recog_memoized (insn) >= 0
5187      && recog_memoized (dep) >= 0)
5188    {
5189      int shift_opnum = get_attr_shift (insn);
5190      enum attr_type attr_type = get_attr_type (dep);
5191
5192      /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
5193	 operand for INSN.  If we have a shifted input operand and the
5194	 instruction we depend on is another ALU instruction, then we may
5195	 have to account for an additional stall.  */
5196      if (shift_opnum != 0
5197	  && (attr_type == TYPE_ALU_SHIFT || attr_type == TYPE_ALU_SHIFT_REG))
5198	{
5199	  rtx shifted_operand;
5200	  int opno;
5201
5202	  /* Get the shifted operand.  */
5203	  extract_insn (insn);
5204	  shifted_operand = recog_data.operand[shift_opnum];
5205
5206	  /* Iterate over all the operands in DEP.  If we write an operand
5207	     that overlaps with SHIFTED_OPERAND, then we have increase the
5208	     cost of this dependency.  */
5209	  extract_insn (dep);
5210	  preprocess_constraints ();
5211	  for (opno = 0; opno < recog_data.n_operands; opno++)
5212	    {
5213	      /* We can ignore strict inputs.  */
5214	      if (recog_data.operand_type[opno] == OP_IN)
5215		continue;
5216
5217	      if (reg_overlap_mentioned_p (recog_data.operand[opno],
5218					   shifted_operand))
5219		return 2;
5220	    }
5221	}
5222    }
5223
5224  /* XXX This is not strictly true for the FPA.  */
5225  if (REG_NOTE_KIND (link) == REG_DEP_ANTI
5226      || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
5227    return 0;
5228
5229  /* Call insns don't incur a stall, even if they follow a load.  */
5230  if (REG_NOTE_KIND (link) == 0
5231      && GET_CODE (insn) == CALL_INSN)
5232    return 1;
5233
5234  if ((i_pat = single_set (insn)) != NULL
5235      && GET_CODE (SET_SRC (i_pat)) == MEM
5236      && (d_pat = single_set (dep)) != NULL
5237      && GET_CODE (SET_DEST (d_pat)) == MEM)
5238    {
5239      rtx src_mem = XEXP (SET_SRC (i_pat), 0);
5240      /* This is a load after a store, there is no conflict if the load reads
5241	 from a cached area.  Assume that loads from the stack, and from the
5242	 constant pool are cached, and that others will miss.  This is a
5243	 hack.  */
5244
5245      if ((GET_CODE (src_mem) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (src_mem))
5246	  || reg_mentioned_p (stack_pointer_rtx, src_mem)
5247	  || reg_mentioned_p (frame_pointer_rtx, src_mem)
5248	  || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
5249	return 1;
5250    }
5251
5252  return cost;
5253}
5254
5255static int fp_consts_inited = 0;
5256
5257/* Only zero is valid for VFP.  Other values are also valid for FPA.  */
5258static const char * const strings_fp[8] =
5259{
5260  "0",   "1",   "2",   "3",
5261  "4",   "5",   "0.5", "10"
5262};
5263
5264static REAL_VALUE_TYPE values_fp[8];
5265
5266static void
5267init_fp_table (void)
5268{
5269  int i;
5270  REAL_VALUE_TYPE r;
5271
5272  if (TARGET_VFP)
5273    fp_consts_inited = 1;
5274  else
5275    fp_consts_inited = 8;
5276
5277  for (i = 0; i < fp_consts_inited; i++)
5278    {
5279      r = REAL_VALUE_ATOF (strings_fp[i], DFmode);
5280      values_fp[i] = r;
5281    }
5282}
5283
5284/* Return TRUE if rtx X is a valid immediate FP constant.  */
5285int
5286arm_const_double_rtx (rtx x)
5287{
5288  REAL_VALUE_TYPE r;
5289  int i;
5290
5291  if (!fp_consts_inited)
5292    init_fp_table ();
5293
5294  REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5295  if (REAL_VALUE_MINUS_ZERO (r))
5296    return 0;
5297
5298  for (i = 0; i < fp_consts_inited; i++)
5299    if (REAL_VALUES_EQUAL (r, values_fp[i]))
5300      return 1;
5301
5302  return 0;
5303}
5304
5305/* Return TRUE if rtx X is a valid immediate FPA constant.  */
5306int
5307neg_const_double_rtx_ok_for_fpa (rtx x)
5308{
5309  REAL_VALUE_TYPE r;
5310  int i;
5311
5312  if (!fp_consts_inited)
5313    init_fp_table ();
5314
5315  REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5316  r = REAL_VALUE_NEGATE (r);
5317  if (REAL_VALUE_MINUS_ZERO (r))
5318    return 0;
5319
5320  for (i = 0; i < 8; i++)
5321    if (REAL_VALUES_EQUAL (r, values_fp[i]))
5322      return 1;
5323
5324  return 0;
5325}
5326
5327/* Predicates for `match_operand' and `match_operator'.  */
5328
5329/* Return nonzero if OP is a valid Cirrus memory address pattern.  */
5330int
5331cirrus_memory_offset (rtx op)
5332{
5333  /* Reject eliminable registers.  */
5334  if (! (reload_in_progress || reload_completed)
5335      && (   reg_mentioned_p (frame_pointer_rtx, op)
5336	  || reg_mentioned_p (arg_pointer_rtx, op)
5337	  || reg_mentioned_p (virtual_incoming_args_rtx, op)
5338	  || reg_mentioned_p (virtual_outgoing_args_rtx, op)
5339	  || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
5340	  || reg_mentioned_p (virtual_stack_vars_rtx, op)))
5341    return 0;
5342
5343  if (GET_CODE (op) == MEM)
5344    {
5345      rtx ind;
5346
5347      ind = XEXP (op, 0);
5348
5349      /* Match: (mem (reg)).  */
5350      if (GET_CODE (ind) == REG)
5351	return 1;
5352
5353      /* Match:
5354	 (mem (plus (reg)
5355	            (const))).  */
5356      if (GET_CODE (ind) == PLUS
5357	  && GET_CODE (XEXP (ind, 0)) == REG
5358	  && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
5359	  && GET_CODE (XEXP (ind, 1)) == CONST_INT)
5360	return 1;
5361    }
5362
5363  return 0;
5364}
5365
5366/* Return TRUE if OP is a valid coprocessor memory address pattern.
5367   WB if true if writeback address modes are allowed.  */
5368
5369int
5370arm_coproc_mem_operand (rtx op, bool wb)
5371{
5372  rtx ind;
5373
5374  /* Reject eliminable registers.  */
5375  if (! (reload_in_progress || reload_completed)
5376      && (   reg_mentioned_p (frame_pointer_rtx, op)
5377	  || reg_mentioned_p (arg_pointer_rtx, op)
5378	  || reg_mentioned_p (virtual_incoming_args_rtx, op)
5379	  || reg_mentioned_p (virtual_outgoing_args_rtx, op)
5380	  || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
5381	  || reg_mentioned_p (virtual_stack_vars_rtx, op)))
5382    return FALSE;
5383
5384  /* Constants are converted into offsets from labels.  */
5385  if (GET_CODE (op) != MEM)
5386    return FALSE;
5387
5388  ind = XEXP (op, 0);
5389
5390  if (reload_completed
5391      && (GET_CODE (ind) == LABEL_REF
5392	  || (GET_CODE (ind) == CONST
5393	      && GET_CODE (XEXP (ind, 0)) == PLUS
5394	      && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
5395	      && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
5396    return TRUE;
5397
5398  /* Match: (mem (reg)).  */
5399  if (GET_CODE (ind) == REG)
5400    return arm_address_register_rtx_p (ind, 0);
5401
5402  /* Autoincremment addressing modes.  */
5403  if (wb
5404      && (GET_CODE (ind) == PRE_INC
5405	  || GET_CODE (ind) == POST_INC
5406	  || GET_CODE (ind) == PRE_DEC
5407	  || GET_CODE (ind) == POST_DEC))
5408    return arm_address_register_rtx_p (XEXP (ind, 0), 0);
5409
5410  if (wb
5411      && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
5412      && arm_address_register_rtx_p (XEXP (ind, 0), 0)
5413      && GET_CODE (XEXP (ind, 1)) == PLUS
5414      && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
5415    ind = XEXP (ind, 1);
5416
5417  /* Match:
5418     (plus (reg)
5419	   (const)).  */
5420  if (GET_CODE (ind) == PLUS
5421      && GET_CODE (XEXP (ind, 0)) == REG
5422      && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
5423      && GET_CODE (XEXP (ind, 1)) == CONST_INT
5424      && INTVAL (XEXP (ind, 1)) > -1024
5425      && INTVAL (XEXP (ind, 1)) <  1024
5426      && (INTVAL (XEXP (ind, 1)) & 3) == 0)
5427    return TRUE;
5428
5429  return FALSE;
5430}
5431
5432/* Return true if X is a register that will be eliminated later on.  */
5433int
5434arm_eliminable_register (rtx x)
5435{
5436  return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
5437		       || REGNO (x) == ARG_POINTER_REGNUM
5438		       || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
5439			   && REGNO (x) <= LAST_VIRTUAL_REGISTER));
5440}
5441
5442/* Return GENERAL_REGS if a scratch register required to reload x to/from
5443   coprocessor registers.  Otherwise return NO_REGS.  */
5444
5445enum reg_class
5446coproc_secondary_reload_class (enum machine_mode mode, rtx x, bool wb)
5447{
5448  if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
5449    return NO_REGS;
5450
5451  return GENERAL_REGS;
5452}
5453
5454/* Values which must be returned in the most-significant end of the return
5455   register.  */
5456
5457static bool
5458arm_return_in_msb (tree valtype)
5459{
5460  return (TARGET_AAPCS_BASED
5461          && BYTES_BIG_ENDIAN
5462          && (AGGREGATE_TYPE_P (valtype)
5463              || TREE_CODE (valtype) == COMPLEX_TYPE));
5464}
5465
5466/* Returns TRUE if INSN is an "LDR REG, ADDR" instruction.
5467   Use by the Cirrus Maverick code which has to workaround
5468   a hardware bug triggered by such instructions.  */
5469static bool
5470arm_memory_load_p (rtx insn)
5471{
5472  rtx body, lhs, rhs;;
5473
5474  if (insn == NULL_RTX || GET_CODE (insn) != INSN)
5475    return false;
5476
5477  body = PATTERN (insn);
5478
5479  if (GET_CODE (body) != SET)
5480    return false;
5481
5482  lhs = XEXP (body, 0);
5483  rhs = XEXP (body, 1);
5484
5485  lhs = REG_OR_SUBREG_RTX (lhs);
5486
5487  /* If the destination is not a general purpose
5488     register we do not have to worry.  */
5489  if (GET_CODE (lhs) != REG
5490      || REGNO_REG_CLASS (REGNO (lhs)) != GENERAL_REGS)
5491    return false;
5492
5493  /* As well as loads from memory we also have to react
5494     to loads of invalid constants which will be turned
5495     into loads from the minipool.  */
5496  return (GET_CODE (rhs) == MEM
5497	  || GET_CODE (rhs) == SYMBOL_REF
5498	  || note_invalid_constants (insn, -1, false));
5499}
5500
5501/* Return TRUE if INSN is a Cirrus instruction.  */
5502static bool
5503arm_cirrus_insn_p (rtx insn)
5504{
5505  enum attr_cirrus attr;
5506
5507  /* get_attr cannot accept USE or CLOBBER.  */
5508  if (!insn
5509      || GET_CODE (insn) != INSN
5510      || GET_CODE (PATTERN (insn)) == USE
5511      || GET_CODE (PATTERN (insn)) == CLOBBER)
5512    return 0;
5513
5514  attr = get_attr_cirrus (insn);
5515
5516  return attr != CIRRUS_NOT;
5517}
5518
5519/* Cirrus reorg for invalid instruction combinations.  */
5520static void
5521cirrus_reorg (rtx first)
5522{
5523  enum attr_cirrus attr;
5524  rtx body = PATTERN (first);
5525  rtx t;
5526  int nops;
5527
5528  /* Any branch must be followed by 2 non Cirrus instructions.  */
5529  if (GET_CODE (first) == JUMP_INSN && GET_CODE (body) != RETURN)
5530    {
5531      nops = 0;
5532      t = next_nonnote_insn (first);
5533
5534      if (arm_cirrus_insn_p (t))
5535	++ nops;
5536
5537      if (arm_cirrus_insn_p (next_nonnote_insn (t)))
5538	++ nops;
5539
5540      while (nops --)
5541	emit_insn_after (gen_nop (), first);
5542
5543      return;
5544    }
5545
5546  /* (float (blah)) is in parallel with a clobber.  */
5547  if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
5548    body = XVECEXP (body, 0, 0);
5549
5550  if (GET_CODE (body) == SET)
5551    {
5552      rtx lhs = XEXP (body, 0), rhs = XEXP (body, 1);
5553
5554      /* cfldrd, cfldr64, cfstrd, cfstr64 must
5555	 be followed by a non Cirrus insn.  */
5556      if (get_attr_cirrus (first) == CIRRUS_DOUBLE)
5557	{
5558	  if (arm_cirrus_insn_p (next_nonnote_insn (first)))
5559	    emit_insn_after (gen_nop (), first);
5560
5561	  return;
5562	}
5563      else if (arm_memory_load_p (first))
5564	{
5565	  unsigned int arm_regno;
5566
5567	  /* Any ldr/cfmvdlr, ldr/cfmvdhr, ldr/cfmvsr, ldr/cfmv64lr,
5568	     ldr/cfmv64hr combination where the Rd field is the same
5569	     in both instructions must be split with a non Cirrus
5570	     insn.  Example:
5571
5572	     ldr r0, blah
5573	     nop
5574	     cfmvsr mvf0, r0.  */
5575
5576	  /* Get Arm register number for ldr insn.  */
5577	  if (GET_CODE (lhs) == REG)
5578	    arm_regno = REGNO (lhs);
5579	  else
5580	    {
5581	      gcc_assert (GET_CODE (rhs) == REG);
5582	      arm_regno = REGNO (rhs);
5583	    }
5584
5585	  /* Next insn.  */
5586	  first = next_nonnote_insn (first);
5587
5588	  if (! arm_cirrus_insn_p (first))
5589	    return;
5590
5591	  body = PATTERN (first);
5592
5593          /* (float (blah)) is in parallel with a clobber.  */
5594          if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0))
5595	    body = XVECEXP (body, 0, 0);
5596
5597	  if (GET_CODE (body) == FLOAT)
5598	    body = XEXP (body, 0);
5599
5600	  if (get_attr_cirrus (first) == CIRRUS_MOVE
5601	      && GET_CODE (XEXP (body, 1)) == REG
5602	      && arm_regno == REGNO (XEXP (body, 1)))
5603	    emit_insn_after (gen_nop (), first);
5604
5605	  return;
5606	}
5607    }
5608
5609  /* get_attr cannot accept USE or CLOBBER.  */
5610  if (!first
5611      || GET_CODE (first) != INSN
5612      || GET_CODE (PATTERN (first)) == USE
5613      || GET_CODE (PATTERN (first)) == CLOBBER)
5614    return;
5615
5616  attr = get_attr_cirrus (first);
5617
5618  /* Any coprocessor compare instruction (cfcmps, cfcmpd, ...)
5619     must be followed by a non-coprocessor instruction.  */
5620  if (attr == CIRRUS_COMPARE)
5621    {
5622      nops = 0;
5623
5624      t = next_nonnote_insn (first);
5625
5626      if (arm_cirrus_insn_p (t))
5627	++ nops;
5628
5629      if (arm_cirrus_insn_p (next_nonnote_insn (t)))
5630	++ nops;
5631
5632      while (nops --)
5633	emit_insn_after (gen_nop (), first);
5634
5635      return;
5636    }
5637}
5638
5639/* Return TRUE if X references a SYMBOL_REF.  */
5640int
5641symbol_mentioned_p (rtx x)
5642{
5643  const char * fmt;
5644  int i;
5645
5646  if (GET_CODE (x) == SYMBOL_REF)
5647    return 1;
5648
5649  /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
5650     are constant offsets, not symbols.  */
5651  if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
5652    return 0;
5653
5654  fmt = GET_RTX_FORMAT (GET_CODE (x));
5655
5656  for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
5657    {
5658      if (fmt[i] == 'E')
5659	{
5660	  int j;
5661
5662	  for (j = XVECLEN (x, i) - 1; j >= 0; j--)
5663	    if (symbol_mentioned_p (XVECEXP (x, i, j)))
5664	      return 1;
5665	}
5666      else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
5667	return 1;
5668    }
5669
5670  return 0;
5671}
5672
5673/* Return TRUE if X references a LABEL_REF.  */
5674int
5675label_mentioned_p (rtx x)
5676{
5677  const char * fmt;
5678  int i;
5679
5680  if (GET_CODE (x) == LABEL_REF)
5681    return 1;
5682
5683  /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
5684     instruction, but they are constant offsets, not symbols.  */
5685  if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
5686    return 0;
5687
5688  fmt = GET_RTX_FORMAT (GET_CODE (x));
5689  for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
5690    {
5691      if (fmt[i] == 'E')
5692	{
5693	  int j;
5694
5695	  for (j = XVECLEN (x, i) - 1; j >= 0; j--)
5696	    if (label_mentioned_p (XVECEXP (x, i, j)))
5697	      return 1;
5698	}
5699      else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
5700	return 1;
5701    }
5702
5703  return 0;
5704}
5705
5706int
5707tls_mentioned_p (rtx x)
5708{
5709  switch (GET_CODE (x))
5710    {
5711    case CONST:
5712      return tls_mentioned_p (XEXP (x, 0));
5713
5714    case UNSPEC:
5715      if (XINT (x, 1) == UNSPEC_TLS)
5716	return 1;
5717
5718    default:
5719      return 0;
5720    }
5721}
5722
5723/* Must not copy a SET whose source operand is PC-relative.  */
5724
5725static bool
5726arm_cannot_copy_insn_p (rtx insn)
5727{
5728  rtx pat = PATTERN (insn);
5729
5730  if (GET_CODE (pat) == PARALLEL
5731      && GET_CODE (XVECEXP (pat, 0, 0)) == SET)
5732    {
5733      rtx rhs = SET_SRC (XVECEXP (pat, 0, 0));
5734
5735      if (GET_CODE (rhs) == UNSPEC
5736	  && XINT (rhs, 1) == UNSPEC_PIC_BASE)
5737	return TRUE;
5738
5739      if (GET_CODE (rhs) == MEM
5740	  && GET_CODE (XEXP (rhs, 0)) == UNSPEC
5741	  && XINT (XEXP (rhs, 0), 1) == UNSPEC_PIC_BASE)
5742	return TRUE;
5743    }
5744
5745  return FALSE;
5746}
5747
5748enum rtx_code
5749minmax_code (rtx x)
5750{
5751  enum rtx_code code = GET_CODE (x);
5752
5753  switch (code)
5754    {
5755    case SMAX:
5756      return GE;
5757    case SMIN:
5758      return LE;
5759    case UMIN:
5760      return LEU;
5761    case UMAX:
5762      return GEU;
5763    default:
5764      gcc_unreachable ();
5765    }
5766}
5767
5768/* Return 1 if memory locations are adjacent.  */
5769int
5770adjacent_mem_locations (rtx a, rtx b)
5771{
5772  /* We don't guarantee to preserve the order of these memory refs.  */
5773  if (volatile_refs_p (a) || volatile_refs_p (b))
5774    return 0;
5775
5776  if ((GET_CODE (XEXP (a, 0)) == REG
5777       || (GET_CODE (XEXP (a, 0)) == PLUS
5778	   && GET_CODE (XEXP (XEXP (a, 0), 1)) == CONST_INT))
5779      && (GET_CODE (XEXP (b, 0)) == REG
5780	  || (GET_CODE (XEXP (b, 0)) == PLUS
5781	      && GET_CODE (XEXP (XEXP (b, 0), 1)) == CONST_INT)))
5782    {
5783      HOST_WIDE_INT val0 = 0, val1 = 0;
5784      rtx reg0, reg1;
5785      int val_diff;
5786
5787      if (GET_CODE (XEXP (a, 0)) == PLUS)
5788        {
5789	  reg0 = XEXP (XEXP (a, 0), 0);
5790	  val0 = INTVAL (XEXP (XEXP (a, 0), 1));
5791        }
5792      else
5793	reg0 = XEXP (a, 0);
5794
5795      if (GET_CODE (XEXP (b, 0)) == PLUS)
5796        {
5797	  reg1 = XEXP (XEXP (b, 0), 0);
5798	  val1 = INTVAL (XEXP (XEXP (b, 0), 1));
5799        }
5800      else
5801	reg1 = XEXP (b, 0);
5802
5803      /* Don't accept any offset that will require multiple
5804	 instructions to handle, since this would cause the
5805	 arith_adjacentmem pattern to output an overlong sequence.  */
5806      if (!const_ok_for_op (PLUS, val0) || !const_ok_for_op (PLUS, val1))
5807	return 0;
5808
5809      /* Don't allow an eliminable register: register elimination can make
5810	 the offset too large.  */
5811      if (arm_eliminable_register (reg0))
5812	return 0;
5813
5814      val_diff = val1 - val0;
5815
5816      if (arm_ld_sched)
5817	{
5818	  /* If the target has load delay slots, then there's no benefit
5819	     to using an ldm instruction unless the offset is zero and
5820	     we are optimizing for size.  */
5821	  return (optimize_size && (REGNO (reg0) == REGNO (reg1))
5822		  && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
5823		  && (val_diff == 4 || val_diff == -4));
5824	}
5825
5826      return ((REGNO (reg0) == REGNO (reg1))
5827	      && (val_diff == 4 || val_diff == -4));
5828    }
5829
5830  return 0;
5831}
5832
5833int
5834load_multiple_sequence (rtx *operands, int nops, int *regs, int *base,
5835			HOST_WIDE_INT *load_offset)
5836{
5837  int unsorted_regs[4];
5838  HOST_WIDE_INT unsorted_offsets[4];
5839  int order[4];
5840  int base_reg = -1;
5841  int i;
5842
5843  /* Can only handle 2, 3, or 4 insns at present,
5844     though could be easily extended if required.  */
5845  gcc_assert (nops >= 2 && nops <= 4);
5846
5847  /* Loop over the operands and check that the memory references are
5848     suitable (i.e. immediate offsets from the same base register).  At
5849     the same time, extract the target register, and the memory
5850     offsets.  */
5851  for (i = 0; i < nops; i++)
5852    {
5853      rtx reg;
5854      rtx offset;
5855
5856      /* Convert a subreg of a mem into the mem itself.  */
5857      if (GET_CODE (operands[nops + i]) == SUBREG)
5858	operands[nops + i] = alter_subreg (operands + (nops + i));
5859
5860      gcc_assert (GET_CODE (operands[nops + i]) == MEM);
5861
5862      /* Don't reorder volatile memory references; it doesn't seem worth
5863	 looking for the case where the order is ok anyway.  */
5864      if (MEM_VOLATILE_P (operands[nops + i]))
5865	return 0;
5866
5867      offset = const0_rtx;
5868
5869      if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
5870	   || (GET_CODE (reg) == SUBREG
5871	       && GET_CODE (reg = SUBREG_REG (reg)) == REG))
5872	  || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
5873	      && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
5874		   == REG)
5875		  || (GET_CODE (reg) == SUBREG
5876		      && GET_CODE (reg = SUBREG_REG (reg)) == REG))
5877	      && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
5878		  == CONST_INT)))
5879	{
5880	  if (i == 0)
5881	    {
5882	      base_reg = REGNO (reg);
5883	      unsorted_regs[0] = (GET_CODE (operands[i]) == REG
5884				  ? REGNO (operands[i])
5885				  : REGNO (SUBREG_REG (operands[i])));
5886	      order[0] = 0;
5887	    }
5888	  else
5889	    {
5890	      if (base_reg != (int) REGNO (reg))
5891		/* Not addressed from the same base register.  */
5892		return 0;
5893
5894	      unsorted_regs[i] = (GET_CODE (operands[i]) == REG
5895				  ? REGNO (operands[i])
5896				  : REGNO (SUBREG_REG (operands[i])));
5897	      if (unsorted_regs[i] < unsorted_regs[order[0]])
5898		order[0] = i;
5899	    }
5900
5901	  /* If it isn't an integer register, or if it overwrites the
5902	     base register but isn't the last insn in the list, then
5903	     we can't do this.  */
5904	  if (unsorted_regs[i] < 0 || unsorted_regs[i] > 14
5905	      || (i != nops - 1 && unsorted_regs[i] == base_reg))
5906	    return 0;
5907
5908	  unsorted_offsets[i] = INTVAL (offset);
5909	}
5910      else
5911	/* Not a suitable memory address.  */
5912	return 0;
5913    }
5914
5915  /* All the useful information has now been extracted from the
5916     operands into unsorted_regs and unsorted_offsets; additionally,
5917     order[0] has been set to the lowest numbered register in the
5918     list.  Sort the registers into order, and check that the memory
5919     offsets are ascending and adjacent.  */
5920
5921  for (i = 1; i < nops; i++)
5922    {
5923      int j;
5924
5925      order[i] = order[i - 1];
5926      for (j = 0; j < nops; j++)
5927	if (unsorted_regs[j] > unsorted_regs[order[i - 1]]
5928	    && (order[i] == order[i - 1]
5929		|| unsorted_regs[j] < unsorted_regs[order[i]]))
5930	  order[i] = j;
5931
5932      /* Have we found a suitable register? if not, one must be used more
5933	 than once.  */
5934      if (order[i] == order[i - 1])
5935	return 0;
5936
5937      /* Is the memory address adjacent and ascending? */
5938      if (unsorted_offsets[order[i]] != unsorted_offsets[order[i - 1]] + 4)
5939	return 0;
5940    }
5941
5942  if (base)
5943    {
5944      *base = base_reg;
5945
5946      for (i = 0; i < nops; i++)
5947	regs[i] = unsorted_regs[order[i]];
5948
5949      *load_offset = unsorted_offsets[order[0]];
5950    }
5951
5952  if (unsorted_offsets[order[0]] == 0)
5953    return 1; /* ldmia */
5954
5955  if (unsorted_offsets[order[0]] == 4)
5956    return 2; /* ldmib */
5957
5958  if (unsorted_offsets[order[nops - 1]] == 0)
5959    return 3; /* ldmda */
5960
5961  if (unsorted_offsets[order[nops - 1]] == -4)
5962    return 4; /* ldmdb */
5963
5964  /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
5965     if the offset isn't small enough.  The reason 2 ldrs are faster
5966     is because these ARMs are able to do more than one cache access
5967     in a single cycle.  The ARM9 and StrongARM have Harvard caches,
5968     whilst the ARM8 has a double bandwidth cache.  This means that
5969     these cores can do both an instruction fetch and a data fetch in
5970     a single cycle, so the trick of calculating the address into a
5971     scratch register (one of the result regs) and then doing a load
5972     multiple actually becomes slower (and no smaller in code size).
5973     That is the transformation
5974
5975 	ldr	rd1, [rbase + offset]
5976 	ldr	rd2, [rbase + offset + 4]
5977
5978     to
5979
5980 	add	rd1, rbase, offset
5981 	ldmia	rd1, {rd1, rd2}
5982
5983     produces worse code -- '3 cycles + any stalls on rd2' instead of
5984     '2 cycles + any stalls on rd2'.  On ARMs with only one cache
5985     access per cycle, the first sequence could never complete in less
5986     than 6 cycles, whereas the ldm sequence would only take 5 and
5987     would make better use of sequential accesses if not hitting the
5988     cache.
5989
5990     We cheat here and test 'arm_ld_sched' which we currently know to
5991     only be true for the ARM8, ARM9 and StrongARM.  If this ever
5992     changes, then the test below needs to be reworked.  */
5993  if (nops == 2 && arm_ld_sched)
5994    return 0;
5995
5996  /* Can't do it without setting up the offset, only do this if it takes
5997     no more than one insn.  */
5998  return (const_ok_for_arm (unsorted_offsets[order[0]])
5999	  || const_ok_for_arm (-unsorted_offsets[order[0]])) ? 5 : 0;
6000}
6001
6002const char *
6003emit_ldm_seq (rtx *operands, int nops)
6004{
6005  int regs[4];
6006  int base_reg;
6007  HOST_WIDE_INT offset;
6008  char buf[100];
6009  int i;
6010
6011  switch (load_multiple_sequence (operands, nops, regs, &base_reg, &offset))
6012    {
6013    case 1:
6014      strcpy (buf, "ldm%?ia\t");
6015      break;
6016
6017    case 2:
6018      strcpy (buf, "ldm%?ib\t");
6019      break;
6020
6021    case 3:
6022      strcpy (buf, "ldm%?da\t");
6023      break;
6024
6025    case 4:
6026      strcpy (buf, "ldm%?db\t");
6027      break;
6028
6029    case 5:
6030      if (offset >= 0)
6031	sprintf (buf, "add%%?\t%s%s, %s%s, #%ld", REGISTER_PREFIX,
6032		 reg_names[regs[0]], REGISTER_PREFIX, reg_names[base_reg],
6033		 (long) offset);
6034      else
6035	sprintf (buf, "sub%%?\t%s%s, %s%s, #%ld", REGISTER_PREFIX,
6036		 reg_names[regs[0]], REGISTER_PREFIX, reg_names[base_reg],
6037		 (long) -offset);
6038      output_asm_insn (buf, operands);
6039      base_reg = regs[0];
6040      strcpy (buf, "ldm%?ia\t");
6041      break;
6042
6043    default:
6044      gcc_unreachable ();
6045    }
6046
6047  sprintf (buf + strlen (buf), "%s%s, {%s%s", REGISTER_PREFIX,
6048	   reg_names[base_reg], REGISTER_PREFIX, reg_names[regs[0]]);
6049
6050  for (i = 1; i < nops; i++)
6051    sprintf (buf + strlen (buf), ", %s%s", REGISTER_PREFIX,
6052	     reg_names[regs[i]]);
6053
6054  strcat (buf, "}\t%@ phole ldm");
6055
6056  output_asm_insn (buf, operands);
6057  return "";
6058}
6059
6060int
6061store_multiple_sequence (rtx *operands, int nops, int *regs, int *base,
6062			 HOST_WIDE_INT * load_offset)
6063{
6064  int unsorted_regs[4];
6065  HOST_WIDE_INT unsorted_offsets[4];
6066  int order[4];
6067  int base_reg = -1;
6068  int i;
6069
6070  /* Can only handle 2, 3, or 4 insns at present, though could be easily
6071     extended if required.  */
6072  gcc_assert (nops >= 2 && nops <= 4);
6073
6074  /* Loop over the operands and check that the memory references are
6075     suitable (i.e. immediate offsets from the same base register).  At
6076     the same time, extract the target register, and the memory
6077     offsets.  */
6078  for (i = 0; i < nops; i++)
6079    {
6080      rtx reg;
6081      rtx offset;
6082
6083      /* Convert a subreg of a mem into the mem itself.  */
6084      if (GET_CODE (operands[nops + i]) == SUBREG)
6085	operands[nops + i] = alter_subreg (operands + (nops + i));
6086
6087      gcc_assert (GET_CODE (operands[nops + i]) == MEM);
6088
6089      /* Don't reorder volatile memory references; it doesn't seem worth
6090	 looking for the case where the order is ok anyway.  */
6091      if (MEM_VOLATILE_P (operands[nops + i]))
6092	return 0;
6093
6094      offset = const0_rtx;
6095
6096      if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
6097	   || (GET_CODE (reg) == SUBREG
6098	       && GET_CODE (reg = SUBREG_REG (reg)) == REG))
6099	  || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
6100	      && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
6101		   == REG)
6102		  || (GET_CODE (reg) == SUBREG
6103		      && GET_CODE (reg = SUBREG_REG (reg)) == REG))
6104	      && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
6105		  == CONST_INT)))
6106	{
6107	  if (i == 0)
6108	    {
6109	      base_reg = REGNO (reg);
6110	      unsorted_regs[0] = (GET_CODE (operands[i]) == REG
6111				  ? REGNO (operands[i])
6112				  : REGNO (SUBREG_REG (operands[i])));
6113	      order[0] = 0;
6114	    }
6115	  else
6116	    {
6117	      if (base_reg != (int) REGNO (reg))
6118		/* Not addressed from the same base register.  */
6119		return 0;
6120
6121	      unsorted_regs[i] = (GET_CODE (operands[i]) == REG
6122				  ? REGNO (operands[i])
6123				  : REGNO (SUBREG_REG (operands[i])));
6124	      if (unsorted_regs[i] < unsorted_regs[order[0]])
6125		order[0] = i;
6126	    }
6127
6128	  /* If it isn't an integer register, then we can't do this.  */
6129	  if (unsorted_regs[i] < 0 || unsorted_regs[i] > 14)
6130	    return 0;
6131
6132	  unsorted_offsets[i] = INTVAL (offset);
6133	}
6134      else
6135	/* Not a suitable memory address.  */
6136	return 0;
6137    }
6138
6139  /* All the useful information has now been extracted from the
6140     operands into unsorted_regs and unsorted_offsets; additionally,
6141     order[0] has been set to the lowest numbered register in the
6142     list.  Sort the registers into order, and check that the memory
6143     offsets are ascending and adjacent.  */
6144
6145  for (i = 1; i < nops; i++)
6146    {
6147      int j;
6148
6149      order[i] = order[i - 1];
6150      for (j = 0; j < nops; j++)
6151	if (unsorted_regs[j] > unsorted_regs[order[i - 1]]
6152	    && (order[i] == order[i - 1]
6153		|| unsorted_regs[j] < unsorted_regs[order[i]]))
6154	  order[i] = j;
6155
6156      /* Have we found a suitable register? if not, one must be used more
6157	 than once.  */
6158      if (order[i] == order[i - 1])
6159	return 0;
6160
6161      /* Is the memory address adjacent and ascending? */
6162      if (unsorted_offsets[order[i]] != unsorted_offsets[order[i - 1]] + 4)
6163	return 0;
6164    }
6165
6166  if (base)
6167    {
6168      *base = base_reg;
6169
6170      for (i = 0; i < nops; i++)
6171	regs[i] = unsorted_regs[order[i]];
6172
6173      *load_offset = unsorted_offsets[order[0]];
6174    }
6175
6176  if (unsorted_offsets[order[0]] == 0)
6177    return 1; /* stmia */
6178
6179  if (unsorted_offsets[order[0]] == 4)
6180    return 2; /* stmib */
6181
6182  if (unsorted_offsets[order[nops - 1]] == 0)
6183    return 3; /* stmda */
6184
6185  if (unsorted_offsets[order[nops - 1]] == -4)
6186    return 4; /* stmdb */
6187
6188  return 0;
6189}
6190
6191const char *
6192emit_stm_seq (rtx *operands, int nops)
6193{
6194  int regs[4];
6195  int base_reg;
6196  HOST_WIDE_INT offset;
6197  char buf[100];
6198  int i;
6199
6200  switch (store_multiple_sequence (operands, nops, regs, &base_reg, &offset))
6201    {
6202    case 1:
6203      strcpy (buf, "stm%?ia\t");
6204      break;
6205
6206    case 2:
6207      strcpy (buf, "stm%?ib\t");
6208      break;
6209
6210    case 3:
6211      strcpy (buf, "stm%?da\t");
6212      break;
6213
6214    case 4:
6215      strcpy (buf, "stm%?db\t");
6216      break;
6217
6218    default:
6219      gcc_unreachable ();
6220    }
6221
6222  sprintf (buf + strlen (buf), "%s%s, {%s%s", REGISTER_PREFIX,
6223	   reg_names[base_reg], REGISTER_PREFIX, reg_names[regs[0]]);
6224
6225  for (i = 1; i < nops; i++)
6226    sprintf (buf + strlen (buf), ", %s%s", REGISTER_PREFIX,
6227	     reg_names[regs[i]]);
6228
6229  strcat (buf, "}\t%@ phole stm");
6230
6231  output_asm_insn (buf, operands);
6232  return "";
6233}
6234
6235/* Routines for use in generating RTL.  */
6236
6237rtx
6238arm_gen_load_multiple (int base_regno, int count, rtx from, int up,
6239		       int write_back, rtx basemem, HOST_WIDE_INT *offsetp)
6240{
6241  HOST_WIDE_INT offset = *offsetp;
6242  int i = 0, j;
6243  rtx result;
6244  int sign = up ? 1 : -1;
6245  rtx mem, addr;
6246
6247  /* XScale has load-store double instructions, but they have stricter
6248     alignment requirements than load-store multiple, so we cannot
6249     use them.
6250
6251     For XScale ldm requires 2 + NREGS cycles to complete and blocks
6252     the pipeline until completion.
6253
6254	NREGS		CYCLES
6255	  1		  3
6256	  2		  4
6257	  3		  5
6258	  4		  6
6259
6260     An ldr instruction takes 1-3 cycles, but does not block the
6261     pipeline.
6262
6263	NREGS		CYCLES
6264	  1		 1-3
6265	  2		 2-6
6266	  3		 3-9
6267	  4		 4-12
6268
6269     Best case ldr will always win.  However, the more ldr instructions
6270     we issue, the less likely we are to be able to schedule them well.
6271     Using ldr instructions also increases code size.
6272
6273     As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
6274     for counts of 3 or 4 regs.  */
6275  if (arm_tune_xscale && count <= 2 && ! optimize_size)
6276    {
6277      rtx seq;
6278
6279      start_sequence ();
6280
6281      for (i = 0; i < count; i++)
6282	{
6283	  addr = plus_constant (from, i * 4 * sign);
6284	  mem = adjust_automodify_address (basemem, SImode, addr, offset);
6285	  emit_move_insn (gen_rtx_REG (SImode, base_regno + i), mem);
6286	  offset += 4 * sign;
6287	}
6288
6289      if (write_back)
6290	{
6291	  emit_move_insn (from, plus_constant (from, count * 4 * sign));
6292	  *offsetp = offset;
6293	}
6294
6295      seq = get_insns ();
6296      end_sequence ();
6297
6298      return seq;
6299    }
6300
6301  result = gen_rtx_PARALLEL (VOIDmode,
6302			     rtvec_alloc (count + (write_back ? 1 : 0)));
6303  if (write_back)
6304    {
6305      XVECEXP (result, 0, 0)
6306	= gen_rtx_SET (VOIDmode, from, plus_constant (from, count * 4 * sign));
6307      i = 1;
6308      count++;
6309    }
6310
6311  for (j = 0; i < count; i++, j++)
6312    {
6313      addr = plus_constant (from, j * 4 * sign);
6314      mem = adjust_automodify_address_nv (basemem, SImode, addr, offset);
6315      XVECEXP (result, 0, i)
6316	= gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, base_regno + j), mem);
6317      offset += 4 * sign;
6318    }
6319
6320  if (write_back)
6321    *offsetp = offset;
6322
6323  return result;
6324}
6325
6326rtx
6327arm_gen_store_multiple (int base_regno, int count, rtx to, int up,
6328			int write_back, rtx basemem, HOST_WIDE_INT *offsetp)
6329{
6330  HOST_WIDE_INT offset = *offsetp;
6331  int i = 0, j;
6332  rtx result;
6333  int sign = up ? 1 : -1;
6334  rtx mem, addr;
6335
6336  /* See arm_gen_load_multiple for discussion of
6337     the pros/cons of ldm/stm usage for XScale.  */
6338  if (arm_tune_xscale && count <= 2 && ! optimize_size)
6339    {
6340      rtx seq;
6341
6342      start_sequence ();
6343
6344      for (i = 0; i < count; i++)
6345	{
6346	  addr = plus_constant (to, i * 4 * sign);
6347	  mem = adjust_automodify_address (basemem, SImode, addr, offset);
6348	  emit_move_insn (mem, gen_rtx_REG (SImode, base_regno + i));
6349	  offset += 4 * sign;
6350	}
6351
6352      if (write_back)
6353	{
6354	  emit_move_insn (to, plus_constant (to, count * 4 * sign));
6355	  *offsetp = offset;
6356	}
6357
6358      seq = get_insns ();
6359      end_sequence ();
6360
6361      return seq;
6362    }
6363
6364  result = gen_rtx_PARALLEL (VOIDmode,
6365			     rtvec_alloc (count + (write_back ? 1 : 0)));
6366  if (write_back)
6367    {
6368      XVECEXP (result, 0, 0)
6369	= gen_rtx_SET (VOIDmode, to,
6370		       plus_constant (to, count * 4 * sign));
6371      i = 1;
6372      count++;
6373    }
6374
6375  for (j = 0; i < count; i++, j++)
6376    {
6377      addr = plus_constant (to, j * 4 * sign);
6378      mem = adjust_automodify_address_nv (basemem, SImode, addr, offset);
6379      XVECEXP (result, 0, i)
6380	= gen_rtx_SET (VOIDmode, mem, gen_rtx_REG (SImode, base_regno + j));
6381      offset += 4 * sign;
6382    }
6383
6384  if (write_back)
6385    *offsetp = offset;
6386
6387  return result;
6388}
6389
6390int
6391arm_gen_movmemqi (rtx *operands)
6392{
6393  HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
6394  HOST_WIDE_INT srcoffset, dstoffset;
6395  int i;
6396  rtx src, dst, srcbase, dstbase;
6397  rtx part_bytes_reg = NULL;
6398  rtx mem;
6399
6400  if (GET_CODE (operands[2]) != CONST_INT
6401      || GET_CODE (operands[3]) != CONST_INT
6402      || INTVAL (operands[2]) > 64
6403      || INTVAL (operands[3]) & 3)
6404    return 0;
6405
6406  dstbase = operands[0];
6407  srcbase = operands[1];
6408
6409  dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
6410  src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
6411
6412  in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
6413  out_words_to_go = INTVAL (operands[2]) / 4;
6414  last_bytes = INTVAL (operands[2]) & 3;
6415  dstoffset = srcoffset = 0;
6416
6417  if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
6418    part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
6419
6420  for (i = 0; in_words_to_go >= 2; i+=4)
6421    {
6422      if (in_words_to_go > 4)
6423	emit_insn (arm_gen_load_multiple (0, 4, src, TRUE, TRUE,
6424					  srcbase, &srcoffset));
6425      else
6426	emit_insn (arm_gen_load_multiple (0, in_words_to_go, src, TRUE,
6427					  FALSE, srcbase, &srcoffset));
6428
6429      if (out_words_to_go)
6430	{
6431	  if (out_words_to_go > 4)
6432	    emit_insn (arm_gen_store_multiple (0, 4, dst, TRUE, TRUE,
6433					       dstbase, &dstoffset));
6434	  else if (out_words_to_go != 1)
6435	    emit_insn (arm_gen_store_multiple (0, out_words_to_go,
6436					       dst, TRUE,
6437					       (last_bytes == 0
6438						? FALSE : TRUE),
6439					       dstbase, &dstoffset));
6440	  else
6441	    {
6442	      mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
6443	      emit_move_insn (mem, gen_rtx_REG (SImode, 0));
6444	      if (last_bytes != 0)
6445		{
6446		  emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
6447		  dstoffset += 4;
6448		}
6449	    }
6450	}
6451
6452      in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
6453      out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
6454    }
6455
6456  /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do.  */
6457  if (out_words_to_go)
6458    {
6459      rtx sreg;
6460
6461      mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
6462      sreg = copy_to_reg (mem);
6463
6464      mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
6465      emit_move_insn (mem, sreg);
6466      in_words_to_go--;
6467
6468      gcc_assert (!in_words_to_go);	/* Sanity check */
6469    }
6470
6471  if (in_words_to_go)
6472    {
6473      gcc_assert (in_words_to_go > 0);
6474
6475      mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
6476      part_bytes_reg = copy_to_mode_reg (SImode, mem);
6477    }
6478
6479  gcc_assert (!last_bytes || part_bytes_reg);
6480
6481  if (BYTES_BIG_ENDIAN && last_bytes)
6482    {
6483      rtx tmp = gen_reg_rtx (SImode);
6484
6485      /* The bytes we want are in the top end of the word.  */
6486      emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
6487			      GEN_INT (8 * (4 - last_bytes))));
6488      part_bytes_reg = tmp;
6489
6490      while (last_bytes)
6491	{
6492	  mem = adjust_automodify_address (dstbase, QImode,
6493					   plus_constant (dst, last_bytes - 1),
6494					   dstoffset + last_bytes - 1);
6495	  emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
6496
6497	  if (--last_bytes)
6498	    {
6499	      tmp = gen_reg_rtx (SImode);
6500	      emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
6501	      part_bytes_reg = tmp;
6502	    }
6503	}
6504
6505    }
6506  else
6507    {
6508      if (last_bytes > 1)
6509	{
6510	  mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
6511	  emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
6512	  last_bytes -= 2;
6513	  if (last_bytes)
6514	    {
6515	      rtx tmp = gen_reg_rtx (SImode);
6516	      emit_insn (gen_addsi3 (dst, dst, const2_rtx));
6517	      emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
6518	      part_bytes_reg = tmp;
6519	      dstoffset += 2;
6520	    }
6521	}
6522
6523      if (last_bytes)
6524	{
6525	  mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
6526	  emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
6527	}
6528    }
6529
6530  return 1;
6531}
6532
6533/* Select a dominance comparison mode if possible for a test of the general
6534   form (OP (COND_OR (X) (Y)) (const_int 0)).  We support three forms.
6535   COND_OR == DOM_CC_X_AND_Y => (X && Y)
6536   COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
6537   COND_OR == DOM_CC_X_OR_Y => (X || Y)
6538   In all cases OP will be either EQ or NE, but we don't need to know which
6539   here.  If we are unable to support a dominance comparison we return
6540   CC mode.  This will then fail to match for the RTL expressions that
6541   generate this call.  */
6542enum machine_mode
6543arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
6544{
6545  enum rtx_code cond1, cond2;
6546  int swapped = 0;
6547
6548  /* Currently we will probably get the wrong result if the individual
6549     comparisons are not simple.  This also ensures that it is safe to
6550     reverse a comparison if necessary.  */
6551  if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
6552       != CCmode)
6553      || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
6554	  != CCmode))
6555    return CCmode;
6556
6557  /* The if_then_else variant of this tests the second condition if the
6558     first passes, but is true if the first fails.  Reverse the first
6559     condition to get a true "inclusive-or" expression.  */
6560  if (cond_or == DOM_CC_NX_OR_Y)
6561    cond1 = reverse_condition (cond1);
6562
6563  /* If the comparisons are not equal, and one doesn't dominate the other,
6564     then we can't do this.  */
6565  if (cond1 != cond2
6566      && !comparison_dominates_p (cond1, cond2)
6567      && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
6568    return CCmode;
6569
6570  if (swapped)
6571    {
6572      enum rtx_code temp = cond1;
6573      cond1 = cond2;
6574      cond2 = temp;
6575    }
6576
6577  switch (cond1)
6578    {
6579    case EQ:
6580      if (cond_or == DOM_CC_X_AND_Y)
6581	return CC_DEQmode;
6582
6583      switch (cond2)
6584	{
6585	case EQ: return CC_DEQmode;
6586	case LE: return CC_DLEmode;
6587	case LEU: return CC_DLEUmode;
6588	case GE: return CC_DGEmode;
6589	case GEU: return CC_DGEUmode;
6590	default: gcc_unreachable ();
6591	}
6592
6593    case LT:
6594      if (cond_or == DOM_CC_X_AND_Y)
6595	return CC_DLTmode;
6596
6597      switch (cond2)
6598	{
6599	case  LT:
6600	    return CC_DLTmode;
6601	case LE:
6602	  return CC_DLEmode;
6603	case NE:
6604	  return CC_DNEmode;
6605	default:
6606	  gcc_unreachable ();
6607	}
6608
6609    case GT:
6610      if (cond_or == DOM_CC_X_AND_Y)
6611	return CC_DGTmode;
6612
6613      switch (cond2)
6614	{
6615	case GT:
6616	  return CC_DGTmode;
6617	case GE:
6618	  return CC_DGEmode;
6619	case NE:
6620	  return CC_DNEmode;
6621	default:
6622	  gcc_unreachable ();
6623	}
6624
6625    case LTU:
6626      if (cond_or == DOM_CC_X_AND_Y)
6627	return CC_DLTUmode;
6628
6629      switch (cond2)
6630	{
6631	case LTU:
6632	  return CC_DLTUmode;
6633	case LEU:
6634	  return CC_DLEUmode;
6635	case NE:
6636	  return CC_DNEmode;
6637	default:
6638	  gcc_unreachable ();
6639	}
6640
6641    case GTU:
6642      if (cond_or == DOM_CC_X_AND_Y)
6643	return CC_DGTUmode;
6644
6645      switch (cond2)
6646	{
6647	case GTU:
6648	  return CC_DGTUmode;
6649	case GEU:
6650	  return CC_DGEUmode;
6651	case NE:
6652	  return CC_DNEmode;
6653	default:
6654	  gcc_unreachable ();
6655	}
6656
6657    /* The remaining cases only occur when both comparisons are the
6658       same.  */
6659    case NE:
6660      gcc_assert (cond1 == cond2);
6661      return CC_DNEmode;
6662
6663    case LE:
6664      gcc_assert (cond1 == cond2);
6665      return CC_DLEmode;
6666
6667    case GE:
6668      gcc_assert (cond1 == cond2);
6669      return CC_DGEmode;
6670
6671    case LEU:
6672      gcc_assert (cond1 == cond2);
6673      return CC_DLEUmode;
6674
6675    case GEU:
6676      gcc_assert (cond1 == cond2);
6677      return CC_DGEUmode;
6678
6679    default:
6680      gcc_unreachable ();
6681    }
6682}
6683
6684enum machine_mode
6685arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
6686{
6687  /* All floating point compares return CCFP if it is an equality
6688     comparison, and CCFPE otherwise.  */
6689  if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
6690    {
6691      switch (op)
6692	{
6693	case EQ:
6694	case NE:
6695	case UNORDERED:
6696	case ORDERED:
6697	case UNLT:
6698	case UNLE:
6699	case UNGT:
6700	case UNGE:
6701	case UNEQ:
6702	case LTGT:
6703	  return CCFPmode;
6704
6705	case LT:
6706	case LE:
6707	case GT:
6708	case GE:
6709	  if (TARGET_HARD_FLOAT && TARGET_MAVERICK)
6710	    return CCFPmode;
6711	  return CCFPEmode;
6712
6713	default:
6714	  gcc_unreachable ();
6715	}
6716    }
6717
6718  /* A compare with a shifted operand.  Because of canonicalization, the
6719     comparison will have to be swapped when we emit the assembler.  */
6720  if (GET_MODE (y) == SImode && GET_CODE (y) == REG
6721      && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
6722	  || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
6723	  || GET_CODE (x) == ROTATERT))
6724    return CC_SWPmode;
6725
6726  /* This operation is performed swapped, but since we only rely on the Z
6727     flag we don't need an additional mode.  */
6728  if (GET_MODE (y) == SImode && REG_P (y)
6729      && GET_CODE (x) == NEG
6730      && (op ==	EQ || op == NE))
6731    return CC_Zmode;
6732
6733  /* This is a special case that is used by combine to allow a
6734     comparison of a shifted byte load to be split into a zero-extend
6735     followed by a comparison of the shifted integer (only valid for
6736     equalities and unsigned inequalities).  */
6737  if (GET_MODE (x) == SImode
6738      && GET_CODE (x) == ASHIFT
6739      && GET_CODE (XEXP (x, 1)) == CONST_INT && INTVAL (XEXP (x, 1)) == 24
6740      && GET_CODE (XEXP (x, 0)) == SUBREG
6741      && GET_CODE (SUBREG_REG (XEXP (x, 0))) == MEM
6742      && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
6743      && (op == EQ || op == NE
6744	  || op == GEU || op == GTU || op == LTU || op == LEU)
6745      && GET_CODE (y) == CONST_INT)
6746    return CC_Zmode;
6747
6748  /* A construct for a conditional compare, if the false arm contains
6749     0, then both conditions must be true, otherwise either condition
6750     must be true.  Not all conditions are possible, so CCmode is
6751     returned if it can't be done.  */
6752  if (GET_CODE (x) == IF_THEN_ELSE
6753      && (XEXP (x, 2) == const0_rtx
6754	  || XEXP (x, 2) == const1_rtx)
6755      && COMPARISON_P (XEXP (x, 0))
6756      && COMPARISON_P (XEXP (x, 1)))
6757    return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
6758					 INTVAL (XEXP (x, 2)));
6759
6760  /* Alternate canonicalizations of the above.  These are somewhat cleaner.  */
6761  if (GET_CODE (x) == AND
6762      && COMPARISON_P (XEXP (x, 0))
6763      && COMPARISON_P (XEXP (x, 1)))
6764    return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
6765					 DOM_CC_X_AND_Y);
6766
6767  if (GET_CODE (x) == IOR
6768      && COMPARISON_P (XEXP (x, 0))
6769      && COMPARISON_P (XEXP (x, 1)))
6770    return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
6771					 DOM_CC_X_OR_Y);
6772
6773  /* An operation (on Thumb) where we want to test for a single bit.
6774     This is done by shifting that bit up into the top bit of a
6775     scratch register; we can then branch on the sign bit.  */
6776  if (TARGET_THUMB
6777      && GET_MODE (x) == SImode
6778      && (op == EQ || op == NE)
6779      && GET_CODE (x) == ZERO_EXTRACT
6780      && XEXP (x, 1) == const1_rtx)
6781    return CC_Nmode;
6782
6783  /* An operation that sets the condition codes as a side-effect, the
6784     V flag is not set correctly, so we can only use comparisons where
6785     this doesn't matter.  (For LT and GE we can use "mi" and "pl"
6786     instead.)  */
6787  if (GET_MODE (x) == SImode
6788      && y == const0_rtx
6789      && (op == EQ || op == NE || op == LT || op == GE)
6790      && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
6791	  || GET_CODE (x) == AND || GET_CODE (x) == IOR
6792	  || GET_CODE (x) == XOR || GET_CODE (x) == MULT
6793	  || GET_CODE (x) == NOT || GET_CODE (x) == NEG
6794	  || GET_CODE (x) == LSHIFTRT
6795	  || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
6796	  || GET_CODE (x) == ROTATERT
6797	  || (TARGET_ARM && GET_CODE (x) == ZERO_EXTRACT)))
6798    return CC_NOOVmode;
6799
6800  if (GET_MODE (x) == QImode && (op == EQ || op == NE))
6801    return CC_Zmode;
6802
6803  if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
6804      && GET_CODE (x) == PLUS
6805      && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
6806    return CC_Cmode;
6807
6808  return CCmode;
6809}
6810
6811/* X and Y are two things to compare using CODE.  Emit the compare insn and
6812   return the rtx for register 0 in the proper mode.  FP means this is a
6813   floating point compare: I don't think that it is needed on the arm.  */
6814rtx
6815arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y)
6816{
6817  enum machine_mode mode = SELECT_CC_MODE (code, x, y);
6818  rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
6819
6820  emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
6821
6822  return cc_reg;
6823}
6824
6825/* Generate a sequence of insns that will generate the correct return
6826   address mask depending on the physical architecture that the program
6827   is running on.  */
6828rtx
6829arm_gen_return_addr_mask (void)
6830{
6831  rtx reg = gen_reg_rtx (Pmode);
6832
6833  emit_insn (gen_return_addr_mask (reg));
6834  return reg;
6835}
6836
6837void
6838arm_reload_in_hi (rtx *operands)
6839{
6840  rtx ref = operands[1];
6841  rtx base, scratch;
6842  HOST_WIDE_INT offset = 0;
6843
6844  if (GET_CODE (ref) == SUBREG)
6845    {
6846      offset = SUBREG_BYTE (ref);
6847      ref = SUBREG_REG (ref);
6848    }
6849
6850  if (GET_CODE (ref) == REG)
6851    {
6852      /* We have a pseudo which has been spilt onto the stack; there
6853	 are two cases here: the first where there is a simple
6854	 stack-slot replacement and a second where the stack-slot is
6855	 out of range, or is used as a subreg.  */
6856      if (reg_equiv_mem[REGNO (ref)])
6857	{
6858	  ref = reg_equiv_mem[REGNO (ref)];
6859	  base = find_replacement (&XEXP (ref, 0));
6860	}
6861      else
6862	/* The slot is out of range, or was dressed up in a SUBREG.  */
6863	base = reg_equiv_address[REGNO (ref)];
6864    }
6865  else
6866    base = find_replacement (&XEXP (ref, 0));
6867
6868  /* Handle the case where the address is too complex to be offset by 1.  */
6869  if (GET_CODE (base) == MINUS
6870      || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
6871    {
6872      rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
6873
6874      emit_set_insn (base_plus, base);
6875      base = base_plus;
6876    }
6877  else if (GET_CODE (base) == PLUS)
6878    {
6879      /* The addend must be CONST_INT, or we would have dealt with it above.  */
6880      HOST_WIDE_INT hi, lo;
6881
6882      offset += INTVAL (XEXP (base, 1));
6883      base = XEXP (base, 0);
6884
6885      /* Rework the address into a legal sequence of insns.  */
6886      /* Valid range for lo is -4095 -> 4095 */
6887      lo = (offset >= 0
6888	    ? (offset & 0xfff)
6889	    : -((-offset) & 0xfff));
6890
6891      /* Corner case, if lo is the max offset then we would be out of range
6892	 once we have added the additional 1 below, so bump the msb into the
6893	 pre-loading insn(s).  */
6894      if (lo == 4095)
6895	lo &= 0x7ff;
6896
6897      hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
6898	     ^ (HOST_WIDE_INT) 0x80000000)
6899	    - (HOST_WIDE_INT) 0x80000000);
6900
6901      gcc_assert (hi + lo == offset);
6902
6903      if (hi != 0)
6904	{
6905	  rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
6906
6907	  /* Get the base address; addsi3 knows how to handle constants
6908	     that require more than one insn.  */
6909	  emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
6910	  base = base_plus;
6911	  offset = lo;
6912	}
6913    }
6914
6915  /* Operands[2] may overlap operands[0] (though it won't overlap
6916     operands[1]), that's why we asked for a DImode reg -- so we can
6917     use the bit that does not overlap.  */
6918  if (REGNO (operands[2]) == REGNO (operands[0]))
6919    scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
6920  else
6921    scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
6922
6923  emit_insn (gen_zero_extendqisi2 (scratch,
6924				   gen_rtx_MEM (QImode,
6925						plus_constant (base,
6926							       offset))));
6927  emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
6928				   gen_rtx_MEM (QImode,
6929						plus_constant (base,
6930							       offset + 1))));
6931  if (!BYTES_BIG_ENDIAN)
6932    emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
6933		   gen_rtx_IOR (SImode,
6934				gen_rtx_ASHIFT
6935				(SImode,
6936				 gen_rtx_SUBREG (SImode, operands[0], 0),
6937				 GEN_INT (8)),
6938				scratch));
6939  else
6940    emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
6941		   gen_rtx_IOR (SImode,
6942				gen_rtx_ASHIFT (SImode, scratch,
6943						GEN_INT (8)),
6944				gen_rtx_SUBREG (SImode, operands[0], 0)));
6945}
6946
6947/* Handle storing a half-word to memory during reload by synthesizing as two
6948   byte stores.  Take care not to clobber the input values until after we
6949   have moved them somewhere safe.  This code assumes that if the DImode
6950   scratch in operands[2] overlaps either the input value or output address
6951   in some way, then that value must die in this insn (we absolutely need
6952   two scratch registers for some corner cases).  */
6953void
6954arm_reload_out_hi (rtx *operands)
6955{
6956  rtx ref = operands[0];
6957  rtx outval = operands[1];
6958  rtx base, scratch;
6959  HOST_WIDE_INT offset = 0;
6960
6961  if (GET_CODE (ref) == SUBREG)
6962    {
6963      offset = SUBREG_BYTE (ref);
6964      ref = SUBREG_REG (ref);
6965    }
6966
6967  if (GET_CODE (ref) == REG)
6968    {
6969      /* We have a pseudo which has been spilt onto the stack; there
6970	 are two cases here: the first where there is a simple
6971	 stack-slot replacement and a second where the stack-slot is
6972	 out of range, or is used as a subreg.  */
6973      if (reg_equiv_mem[REGNO (ref)])
6974	{
6975	  ref = reg_equiv_mem[REGNO (ref)];
6976	  base = find_replacement (&XEXP (ref, 0));
6977	}
6978      else
6979	/* The slot is out of range, or was dressed up in a SUBREG.  */
6980	base = reg_equiv_address[REGNO (ref)];
6981    }
6982  else
6983    base = find_replacement (&XEXP (ref, 0));
6984
6985  scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
6986
6987  /* Handle the case where the address is too complex to be offset by 1.  */
6988  if (GET_CODE (base) == MINUS
6989      || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
6990    {
6991      rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
6992
6993      /* Be careful not to destroy OUTVAL.  */
6994      if (reg_overlap_mentioned_p (base_plus, outval))
6995	{
6996	  /* Updating base_plus might destroy outval, see if we can
6997	     swap the scratch and base_plus.  */
6998	  if (!reg_overlap_mentioned_p (scratch, outval))
6999	    {
7000	      rtx tmp = scratch;
7001	      scratch = base_plus;
7002	      base_plus = tmp;
7003	    }
7004	  else
7005	    {
7006	      rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
7007
7008	      /* Be conservative and copy OUTVAL into the scratch now,
7009		 this should only be necessary if outval is a subreg
7010		 of something larger than a word.  */
7011	      /* XXX Might this clobber base?  I can't see how it can,
7012		 since scratch is known to overlap with OUTVAL, and
7013		 must be wider than a word.  */
7014	      emit_insn (gen_movhi (scratch_hi, outval));
7015	      outval = scratch_hi;
7016	    }
7017	}
7018
7019      emit_set_insn (base_plus, base);
7020      base = base_plus;
7021    }
7022  else if (GET_CODE (base) == PLUS)
7023    {
7024      /* The addend must be CONST_INT, or we would have dealt with it above.  */
7025      HOST_WIDE_INT hi, lo;
7026
7027      offset += INTVAL (XEXP (base, 1));
7028      base = XEXP (base, 0);
7029
7030      /* Rework the address into a legal sequence of insns.  */
7031      /* Valid range for lo is -4095 -> 4095 */
7032      lo = (offset >= 0
7033	    ? (offset & 0xfff)
7034	    : -((-offset) & 0xfff));
7035
7036      /* Corner case, if lo is the max offset then we would be out of range
7037	 once we have added the additional 1 below, so bump the msb into the
7038	 pre-loading insn(s).  */
7039      if (lo == 4095)
7040	lo &= 0x7ff;
7041
7042      hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
7043	     ^ (HOST_WIDE_INT) 0x80000000)
7044	    - (HOST_WIDE_INT) 0x80000000);
7045
7046      gcc_assert (hi + lo == offset);
7047
7048      if (hi != 0)
7049	{
7050	  rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
7051
7052	  /* Be careful not to destroy OUTVAL.  */
7053	  if (reg_overlap_mentioned_p (base_plus, outval))
7054	    {
7055	      /* Updating base_plus might destroy outval, see if we
7056		 can swap the scratch and base_plus.  */
7057	      if (!reg_overlap_mentioned_p (scratch, outval))
7058		{
7059		  rtx tmp = scratch;
7060		  scratch = base_plus;
7061		  base_plus = tmp;
7062		}
7063	      else
7064		{
7065		  rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
7066
7067		  /* Be conservative and copy outval into scratch now,
7068		     this should only be necessary if outval is a
7069		     subreg of something larger than a word.  */
7070		  /* XXX Might this clobber base?  I can't see how it
7071		     can, since scratch is known to overlap with
7072		     outval.  */
7073		  emit_insn (gen_movhi (scratch_hi, outval));
7074		  outval = scratch_hi;
7075		}
7076	    }
7077
7078	  /* Get the base address; addsi3 knows how to handle constants
7079	     that require more than one insn.  */
7080	  emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
7081	  base = base_plus;
7082	  offset = lo;
7083	}
7084    }
7085
7086  if (BYTES_BIG_ENDIAN)
7087    {
7088      emit_insn (gen_movqi (gen_rtx_MEM (QImode,
7089					 plus_constant (base, offset + 1)),
7090			    gen_lowpart (QImode, outval)));
7091      emit_insn (gen_lshrsi3 (scratch,
7092			      gen_rtx_SUBREG (SImode, outval, 0),
7093			      GEN_INT (8)));
7094      emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
7095			    gen_lowpart (QImode, scratch)));
7096    }
7097  else
7098    {
7099      emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
7100			    gen_lowpart (QImode, outval)));
7101      emit_insn (gen_lshrsi3 (scratch,
7102			      gen_rtx_SUBREG (SImode, outval, 0),
7103			      GEN_INT (8)));
7104      emit_insn (gen_movqi (gen_rtx_MEM (QImode,
7105					 plus_constant (base, offset + 1)),
7106			    gen_lowpart (QImode, scratch)));
7107    }
7108}
7109
7110/* Return true if a type must be passed in memory. For AAPCS, small aggregates
7111   (padded to the size of a word) should be passed in a register.  */
7112
7113static bool
7114arm_must_pass_in_stack (enum machine_mode mode, tree type)
7115{
7116  if (TARGET_AAPCS_BASED)
7117    return must_pass_in_stack_var_size (mode, type);
7118  else
7119    return must_pass_in_stack_var_size_or_pad (mode, type);
7120}
7121
7122
7123/* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
7124   Return true if an argument passed on the stack should be padded upwards,
7125   i.e. if the least-significant byte has useful data.
7126   For legacy APCS ABIs we use the default.  For AAPCS based ABIs small
7127   aggregate types are placed in the lowest memory address.  */
7128
7129bool
7130arm_pad_arg_upward (enum machine_mode mode, tree type)
7131{
7132  if (!TARGET_AAPCS_BASED)
7133    return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
7134
7135  if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
7136    return false;
7137
7138  return true;
7139}
7140
7141
7142/* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
7143   For non-AAPCS, return !BYTES_BIG_ENDIAN if the least significant
7144   byte of the register has useful data, and return the opposite if the
7145   most significant byte does.
7146   For AAPCS, small aggregates and small complex types are always padded
7147   upwards.  */
7148
7149bool
7150arm_pad_reg_upward (enum machine_mode mode ATTRIBUTE_UNUSED,
7151                    tree type, int first ATTRIBUTE_UNUSED)
7152{
7153  if (TARGET_AAPCS_BASED
7154      && BYTES_BIG_ENDIAN
7155      && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE)
7156      && int_size_in_bytes (type) <= 4)
7157    return true;
7158
7159  /* Otherwise, use default padding.  */
7160  return !BYTES_BIG_ENDIAN;
7161}
7162
7163
7164/* Print a symbolic form of X to the debug file, F.  */
7165static void
7166arm_print_value (FILE *f, rtx x)
7167{
7168  switch (GET_CODE (x))
7169    {
7170    case CONST_INT:
7171      fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
7172      return;
7173
7174    case CONST_DOUBLE:
7175      fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
7176      return;
7177
7178    case CONST_VECTOR:
7179      {
7180	int i;
7181
7182	fprintf (f, "<");
7183	for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
7184	  {
7185	    fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
7186	    if (i < (CONST_VECTOR_NUNITS (x) - 1))
7187	      fputc (',', f);
7188	  }
7189	fprintf (f, ">");
7190      }
7191      return;
7192
7193    case CONST_STRING:
7194      fprintf (f, "\"%s\"", XSTR (x, 0));
7195      return;
7196
7197    case SYMBOL_REF:
7198      fprintf (f, "`%s'", XSTR (x, 0));
7199      return;
7200
7201    case LABEL_REF:
7202      fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
7203      return;
7204
7205    case CONST:
7206      arm_print_value (f, XEXP (x, 0));
7207      return;
7208
7209    case PLUS:
7210      arm_print_value (f, XEXP (x, 0));
7211      fprintf (f, "+");
7212      arm_print_value (f, XEXP (x, 1));
7213      return;
7214
7215    case PC:
7216      fprintf (f, "pc");
7217      return;
7218
7219    default:
7220      fprintf (f, "????");
7221      return;
7222    }
7223}
7224
7225/* Routines for manipulation of the constant pool.  */
7226
7227/* Arm instructions cannot load a large constant directly into a
7228   register; they have to come from a pc relative load.  The constant
7229   must therefore be placed in the addressable range of the pc
7230   relative load.  Depending on the precise pc relative load
7231   instruction the range is somewhere between 256 bytes and 4k.  This
7232   means that we often have to dump a constant inside a function, and
7233   generate code to branch around it.
7234
7235   It is important to minimize this, since the branches will slow
7236   things down and make the code larger.
7237
7238   Normally we can hide the table after an existing unconditional
7239   branch so that there is no interruption of the flow, but in the
7240   worst case the code looks like this:
7241
7242	ldr	rn, L1
7243	...
7244	b	L2
7245	align
7246	L1:	.long value
7247	L2:
7248	...
7249
7250	ldr	rn, L3
7251	...
7252	b	L4
7253	align
7254	L3:	.long value
7255	L4:
7256	...
7257
7258   We fix this by performing a scan after scheduling, which notices
7259   which instructions need to have their operands fetched from the
7260   constant table and builds the table.
7261
7262   The algorithm starts by building a table of all the constants that
7263   need fixing up and all the natural barriers in the function (places
7264   where a constant table can be dropped without breaking the flow).
7265   For each fixup we note how far the pc-relative replacement will be
7266   able to reach and the offset of the instruction into the function.
7267
7268   Having built the table we then group the fixes together to form
7269   tables that are as large as possible (subject to addressing
7270   constraints) and emit each table of constants after the last
7271   barrier that is within range of all the instructions in the group.
7272   If a group does not contain a barrier, then we forcibly create one
7273   by inserting a jump instruction into the flow.  Once the table has
7274   been inserted, the insns are then modified to reference the
7275   relevant entry in the pool.
7276
7277   Possible enhancements to the algorithm (not implemented) are:
7278
7279   1) For some processors and object formats, there may be benefit in
7280   aligning the pools to the start of cache lines; this alignment
7281   would need to be taken into account when calculating addressability
7282   of a pool.  */
7283
7284/* These typedefs are located at the start of this file, so that
7285   they can be used in the prototypes there.  This comment is to
7286   remind readers of that fact so that the following structures
7287   can be understood more easily.
7288
7289     typedef struct minipool_node    Mnode;
7290     typedef struct minipool_fixup   Mfix;  */
7291
7292struct minipool_node
7293{
7294  /* Doubly linked chain of entries.  */
7295  Mnode * next;
7296  Mnode * prev;
7297  /* The maximum offset into the code that this entry can be placed.  While
7298     pushing fixes for forward references, all entries are sorted in order
7299     of increasing max_address.  */
7300  HOST_WIDE_INT max_address;
7301  /* Similarly for an entry inserted for a backwards ref.  */
7302  HOST_WIDE_INT min_address;
7303  /* The number of fixes referencing this entry.  This can become zero
7304     if we "unpush" an entry.  In this case we ignore the entry when we
7305     come to emit the code.  */
7306  int refcount;
7307  /* The offset from the start of the minipool.  */
7308  HOST_WIDE_INT offset;
7309  /* The value in table.  */
7310  rtx value;
7311  /* The mode of value.  */
7312  enum machine_mode mode;
7313  /* The size of the value.  With iWMMXt enabled
7314     sizes > 4 also imply an alignment of 8-bytes.  */
7315  int fix_size;
7316};
7317
7318struct minipool_fixup
7319{
7320  Mfix *            next;
7321  rtx               insn;
7322  HOST_WIDE_INT     address;
7323  rtx *             loc;
7324  enum machine_mode mode;
7325  int               fix_size;
7326  rtx               value;
7327  Mnode *           minipool;
7328  HOST_WIDE_INT     forwards;
7329  HOST_WIDE_INT     backwards;
7330};
7331
7332/* Fixes less than a word need padding out to a word boundary.  */
7333#define MINIPOOL_FIX_SIZE(mode) \
7334  (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
7335
7336static Mnode *	minipool_vector_head;
7337static Mnode *	minipool_vector_tail;
7338static rtx	minipool_vector_label;
7339static int	minipool_pad;
7340
7341/* The linked list of all minipool fixes required for this function.  */
7342Mfix * 		minipool_fix_head;
7343Mfix * 		minipool_fix_tail;
7344/* The fix entry for the current minipool, once it has been placed.  */
7345Mfix *		minipool_barrier;
7346
7347/* Determines if INSN is the start of a jump table.  Returns the end
7348   of the TABLE or NULL_RTX.  */
7349static rtx
7350is_jump_table (rtx insn)
7351{
7352  rtx table;
7353
7354  if (GET_CODE (insn) == JUMP_INSN
7355      && JUMP_LABEL (insn) != NULL
7356      && ((table = next_real_insn (JUMP_LABEL (insn)))
7357	  == next_real_insn (insn))
7358      && table != NULL
7359      && GET_CODE (table) == JUMP_INSN
7360      && (GET_CODE (PATTERN (table)) == ADDR_VEC
7361	  || GET_CODE (PATTERN (table)) == ADDR_DIFF_VEC))
7362    return table;
7363
7364  return NULL_RTX;
7365}
7366
7367#ifndef JUMP_TABLES_IN_TEXT_SECTION
7368#define JUMP_TABLES_IN_TEXT_SECTION 0
7369#endif
7370
7371static HOST_WIDE_INT
7372get_jump_table_size (rtx insn)
7373{
7374  /* ADDR_VECs only take room if read-only data does into the text
7375     section.  */
7376  if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
7377    {
7378      rtx body = PATTERN (insn);
7379      int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
7380
7381      return GET_MODE_SIZE (GET_MODE (body)) * XVECLEN (body, elt);
7382    }
7383
7384  return 0;
7385}
7386
7387/* Move a minipool fix MP from its current location to before MAX_MP.
7388   If MAX_MP is NULL, then MP doesn't need moving, but the addressing
7389   constraints may need updating.  */
7390static Mnode *
7391move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
7392			       HOST_WIDE_INT max_address)
7393{
7394  /* The code below assumes these are different.  */
7395  gcc_assert (mp != max_mp);
7396
7397  if (max_mp == NULL)
7398    {
7399      if (max_address < mp->max_address)
7400	mp->max_address = max_address;
7401    }
7402  else
7403    {
7404      if (max_address > max_mp->max_address - mp->fix_size)
7405	mp->max_address = max_mp->max_address - mp->fix_size;
7406      else
7407	mp->max_address = max_address;
7408
7409      /* Unlink MP from its current position.  Since max_mp is non-null,
7410       mp->prev must be non-null.  */
7411      mp->prev->next = mp->next;
7412      if (mp->next != NULL)
7413	mp->next->prev = mp->prev;
7414      else
7415	minipool_vector_tail = mp->prev;
7416
7417      /* Re-insert it before MAX_MP.  */
7418      mp->next = max_mp;
7419      mp->prev = max_mp->prev;
7420      max_mp->prev = mp;
7421
7422      if (mp->prev != NULL)
7423	mp->prev->next = mp;
7424      else
7425	minipool_vector_head = mp;
7426    }
7427
7428  /* Save the new entry.  */
7429  max_mp = mp;
7430
7431  /* Scan over the preceding entries and adjust their addresses as
7432     required.  */
7433  while (mp->prev != NULL
7434	 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
7435    {
7436      mp->prev->max_address = mp->max_address - mp->prev->fix_size;
7437      mp = mp->prev;
7438    }
7439
7440  return max_mp;
7441}
7442
7443/* Add a constant to the minipool for a forward reference.  Returns the
7444   node added or NULL if the constant will not fit in this pool.  */
7445static Mnode *
7446add_minipool_forward_ref (Mfix *fix)
7447{
7448  /* If set, max_mp is the first pool_entry that has a lower
7449     constraint than the one we are trying to add.  */
7450  Mnode *       max_mp = NULL;
7451  HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
7452  Mnode *       mp;
7453
7454  /* If the minipool starts before the end of FIX->INSN then this FIX
7455     can not be placed into the current pool.  Furthermore, adding the
7456     new constant pool entry may cause the pool to start FIX_SIZE bytes
7457     earlier.  */
7458  if (minipool_vector_head &&
7459      (fix->address + get_attr_length (fix->insn)
7460       >= minipool_vector_head->max_address - fix->fix_size))
7461    return NULL;
7462
7463  /* Scan the pool to see if a constant with the same value has
7464     already been added.  While we are doing this, also note the
7465     location where we must insert the constant if it doesn't already
7466     exist.  */
7467  for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
7468    {
7469      if (GET_CODE (fix->value) == GET_CODE (mp->value)
7470	  && fix->mode == mp->mode
7471	  && (GET_CODE (fix->value) != CODE_LABEL
7472	      || (CODE_LABEL_NUMBER (fix->value)
7473		  == CODE_LABEL_NUMBER (mp->value)))
7474	  && rtx_equal_p (fix->value, mp->value))
7475	{
7476	  /* More than one fix references this entry.  */
7477	  mp->refcount++;
7478	  return move_minipool_fix_forward_ref (mp, max_mp, max_address);
7479	}
7480
7481      /* Note the insertion point if necessary.  */
7482      if (max_mp == NULL
7483	  && mp->max_address > max_address)
7484	max_mp = mp;
7485
7486      /* If we are inserting an 8-bytes aligned quantity and
7487	 we have not already found an insertion point, then
7488	 make sure that all such 8-byte aligned quantities are
7489	 placed at the start of the pool.  */
7490      if (ARM_DOUBLEWORD_ALIGN
7491	  && max_mp == NULL
7492	  && fix->fix_size == 8
7493	  && mp->fix_size != 8)
7494	{
7495	  max_mp = mp;
7496	  max_address = mp->max_address;
7497	}
7498    }
7499
7500  /* The value is not currently in the minipool, so we need to create
7501     a new entry for it.  If MAX_MP is NULL, the entry will be put on
7502     the end of the list since the placement is less constrained than
7503     any existing entry.  Otherwise, we insert the new fix before
7504     MAX_MP and, if necessary, adjust the constraints on the other
7505     entries.  */
7506  mp = XNEW (Mnode);
7507  mp->fix_size = fix->fix_size;
7508  mp->mode = fix->mode;
7509  mp->value = fix->value;
7510  mp->refcount = 1;
7511  /* Not yet required for a backwards ref.  */
7512  mp->min_address = -65536;
7513
7514  if (max_mp == NULL)
7515    {
7516      mp->max_address = max_address;
7517      mp->next = NULL;
7518      mp->prev = minipool_vector_tail;
7519
7520      if (mp->prev == NULL)
7521	{
7522	  minipool_vector_head = mp;
7523	  minipool_vector_label = gen_label_rtx ();
7524	}
7525      else
7526	mp->prev->next = mp;
7527
7528      minipool_vector_tail = mp;
7529    }
7530  else
7531    {
7532      if (max_address > max_mp->max_address - mp->fix_size)
7533	mp->max_address = max_mp->max_address - mp->fix_size;
7534      else
7535	mp->max_address = max_address;
7536
7537      mp->next = max_mp;
7538      mp->prev = max_mp->prev;
7539      max_mp->prev = mp;
7540      if (mp->prev != NULL)
7541	mp->prev->next = mp;
7542      else
7543	minipool_vector_head = mp;
7544    }
7545
7546  /* Save the new entry.  */
7547  max_mp = mp;
7548
7549  /* Scan over the preceding entries and adjust their addresses as
7550     required.  */
7551  while (mp->prev != NULL
7552	 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
7553    {
7554      mp->prev->max_address = mp->max_address - mp->prev->fix_size;
7555      mp = mp->prev;
7556    }
7557
7558  return max_mp;
7559}
7560
7561static Mnode *
7562move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
7563				HOST_WIDE_INT  min_address)
7564{
7565  HOST_WIDE_INT offset;
7566
7567  /* The code below assumes these are different.  */
7568  gcc_assert (mp != min_mp);
7569
7570  if (min_mp == NULL)
7571    {
7572      if (min_address > mp->min_address)
7573	mp->min_address = min_address;
7574    }
7575  else
7576    {
7577      /* We will adjust this below if it is too loose.  */
7578      mp->min_address = min_address;
7579
7580      /* Unlink MP from its current position.  Since min_mp is non-null,
7581	 mp->next must be non-null.  */
7582      mp->next->prev = mp->prev;
7583      if (mp->prev != NULL)
7584	mp->prev->next = mp->next;
7585      else
7586	minipool_vector_head = mp->next;
7587
7588      /* Reinsert it after MIN_MP.  */
7589      mp->prev = min_mp;
7590      mp->next = min_mp->next;
7591      min_mp->next = mp;
7592      if (mp->next != NULL)
7593	mp->next->prev = mp;
7594      else
7595	minipool_vector_tail = mp;
7596    }
7597
7598  min_mp = mp;
7599
7600  offset = 0;
7601  for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
7602    {
7603      mp->offset = offset;
7604      if (mp->refcount > 0)
7605	offset += mp->fix_size;
7606
7607      if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
7608	mp->next->min_address = mp->min_address + mp->fix_size;
7609    }
7610
7611  return min_mp;
7612}
7613
7614/* Add a constant to the minipool for a backward reference.  Returns the
7615   node added or NULL if the constant will not fit in this pool.
7616
7617   Note that the code for insertion for a backwards reference can be
7618   somewhat confusing because the calculated offsets for each fix do
7619   not take into account the size of the pool (which is still under
7620   construction.  */
7621static Mnode *
7622add_minipool_backward_ref (Mfix *fix)
7623{
7624  /* If set, min_mp is the last pool_entry that has a lower constraint
7625     than the one we are trying to add.  */
7626  Mnode *min_mp = NULL;
7627  /* This can be negative, since it is only a constraint.  */
7628  HOST_WIDE_INT  min_address = fix->address - fix->backwards;
7629  Mnode *mp;
7630
7631  /* If we can't reach the current pool from this insn, or if we can't
7632     insert this entry at the end of the pool without pushing other
7633     fixes out of range, then we don't try.  This ensures that we
7634     can't fail later on.  */
7635  if (min_address >= minipool_barrier->address
7636      || (minipool_vector_tail->min_address + fix->fix_size
7637	  >= minipool_barrier->address))
7638    return NULL;
7639
7640  /* Scan the pool to see if a constant with the same value has
7641     already been added.  While we are doing this, also note the
7642     location where we must insert the constant if it doesn't already
7643     exist.  */
7644  for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
7645    {
7646      if (GET_CODE (fix->value) == GET_CODE (mp->value)
7647	  && fix->mode == mp->mode
7648	  && (GET_CODE (fix->value) != CODE_LABEL
7649	      || (CODE_LABEL_NUMBER (fix->value)
7650		  == CODE_LABEL_NUMBER (mp->value)))
7651	  && rtx_equal_p (fix->value, mp->value)
7652	  /* Check that there is enough slack to move this entry to the
7653	     end of the table (this is conservative).  */
7654	  && (mp->max_address
7655	      > (minipool_barrier->address
7656		 + minipool_vector_tail->offset
7657		 + minipool_vector_tail->fix_size)))
7658	{
7659	  mp->refcount++;
7660	  return move_minipool_fix_backward_ref (mp, min_mp, min_address);
7661	}
7662
7663      if (min_mp != NULL)
7664	mp->min_address += fix->fix_size;
7665      else
7666	{
7667	  /* Note the insertion point if necessary.  */
7668	  if (mp->min_address < min_address)
7669	    {
7670	      /* For now, we do not allow the insertion of 8-byte alignment
7671		 requiring nodes anywhere but at the start of the pool.  */
7672	      if (ARM_DOUBLEWORD_ALIGN
7673		  && fix->fix_size == 8 && mp->fix_size != 8)
7674		return NULL;
7675	      else
7676		min_mp = mp;
7677	    }
7678	  else if (mp->max_address
7679		   < minipool_barrier->address + mp->offset + fix->fix_size)
7680	    {
7681	      /* Inserting before this entry would push the fix beyond
7682		 its maximum address (which can happen if we have
7683		 re-located a forwards fix); force the new fix to come
7684		 after it.  */
7685	      min_mp = mp;
7686	      min_address = mp->min_address + fix->fix_size;
7687	    }
7688	  /* If we are inserting an 8-bytes aligned quantity and
7689	     we have not already found an insertion point, then
7690	     make sure that all such 8-byte aligned quantities are
7691	     placed at the start of the pool.  */
7692	  else if (ARM_DOUBLEWORD_ALIGN
7693		   && min_mp == NULL
7694		   && fix->fix_size == 8
7695		   && mp->fix_size < 8)
7696	    {
7697	      min_mp = mp;
7698	      min_address = mp->min_address + fix->fix_size;
7699	    }
7700	}
7701    }
7702
7703  /* We need to create a new entry.  */
7704  mp = XNEW (Mnode);
7705  mp->fix_size = fix->fix_size;
7706  mp->mode = fix->mode;
7707  mp->value = fix->value;
7708  mp->refcount = 1;
7709  mp->max_address = minipool_barrier->address + 65536;
7710
7711  mp->min_address = min_address;
7712
7713  if (min_mp == NULL)
7714    {
7715      mp->prev = NULL;
7716      mp->next = minipool_vector_head;
7717
7718      if (mp->next == NULL)
7719	{
7720	  minipool_vector_tail = mp;
7721	  minipool_vector_label = gen_label_rtx ();
7722	}
7723      else
7724	mp->next->prev = mp;
7725
7726      minipool_vector_head = mp;
7727    }
7728  else
7729    {
7730      mp->next = min_mp->next;
7731      mp->prev = min_mp;
7732      min_mp->next = mp;
7733
7734      if (mp->next != NULL)
7735	mp->next->prev = mp;
7736      else
7737	minipool_vector_tail = mp;
7738    }
7739
7740  /* Save the new entry.  */
7741  min_mp = mp;
7742
7743  if (mp->prev)
7744    mp = mp->prev;
7745  else
7746    mp->offset = 0;
7747
7748  /* Scan over the following entries and adjust their offsets.  */
7749  while (mp->next != NULL)
7750    {
7751      if (mp->next->min_address < mp->min_address + mp->fix_size)
7752	mp->next->min_address = mp->min_address + mp->fix_size;
7753
7754      if (mp->refcount)
7755	mp->next->offset = mp->offset + mp->fix_size;
7756      else
7757	mp->next->offset = mp->offset;
7758
7759      mp = mp->next;
7760    }
7761
7762  return min_mp;
7763}
7764
7765static void
7766assign_minipool_offsets (Mfix *barrier)
7767{
7768  HOST_WIDE_INT offset = 0;
7769  Mnode *mp;
7770
7771  minipool_barrier = barrier;
7772
7773  for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
7774    {
7775      mp->offset = offset;
7776
7777      if (mp->refcount > 0)
7778	offset += mp->fix_size;
7779    }
7780}
7781
7782/* Output the literal table */
7783static void
7784dump_minipool (rtx scan)
7785{
7786  Mnode * mp;
7787  Mnode * nmp;
7788  int align64 = 0;
7789
7790  if (ARM_DOUBLEWORD_ALIGN)
7791    for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
7792      if (mp->refcount > 0 && mp->fix_size == 8)
7793	{
7794	  align64 = 1;
7795	  break;
7796	}
7797
7798  if (dump_file)
7799    fprintf (dump_file,
7800	     ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
7801	     INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
7802
7803  scan = emit_label_after (gen_label_rtx (), scan);
7804  scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
7805  scan = emit_label_after (minipool_vector_label, scan);
7806
7807  for (mp = minipool_vector_head; mp != NULL; mp = nmp)
7808    {
7809      if (mp->refcount > 0)
7810	{
7811	  if (dump_file)
7812	    {
7813	      fprintf (dump_file,
7814		       ";;  Offset %u, min %ld, max %ld ",
7815		       (unsigned) mp->offset, (unsigned long) mp->min_address,
7816		       (unsigned long) mp->max_address);
7817	      arm_print_value (dump_file, mp->value);
7818	      fputc ('\n', dump_file);
7819	    }
7820
7821	  switch (mp->fix_size)
7822	    {
7823#ifdef HAVE_consttable_1
7824	    case 1:
7825	      scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
7826	      break;
7827
7828#endif
7829#ifdef HAVE_consttable_2
7830	    case 2:
7831	      scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
7832	      break;
7833
7834#endif
7835#ifdef HAVE_consttable_4
7836	    case 4:
7837	      scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
7838	      break;
7839
7840#endif
7841#ifdef HAVE_consttable_8
7842	    case 8:
7843	      scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
7844	      break;
7845
7846#endif
7847	    default:
7848	      gcc_unreachable ();
7849	    }
7850	}
7851
7852      nmp = mp->next;
7853      free (mp);
7854    }
7855
7856  minipool_vector_head = minipool_vector_tail = NULL;
7857  scan = emit_insn_after (gen_consttable_end (), scan);
7858  scan = emit_barrier_after (scan);
7859}
7860
7861/* Return the cost of forcibly inserting a barrier after INSN.  */
7862static int
7863arm_barrier_cost (rtx insn)
7864{
7865  /* Basing the location of the pool on the loop depth is preferable,
7866     but at the moment, the basic block information seems to be
7867     corrupt by this stage of the compilation.  */
7868  int base_cost = 50;
7869  rtx next = next_nonnote_insn (insn);
7870
7871  if (next != NULL && GET_CODE (next) == CODE_LABEL)
7872    base_cost -= 20;
7873
7874  switch (GET_CODE (insn))
7875    {
7876    case CODE_LABEL:
7877      /* It will always be better to place the table before the label, rather
7878	 than after it.  */
7879      return 50;
7880
7881    case INSN:
7882    case CALL_INSN:
7883      return base_cost;
7884
7885    case JUMP_INSN:
7886      return base_cost - 10;
7887
7888    default:
7889      return base_cost + 10;
7890    }
7891}
7892
7893/* Find the best place in the insn stream in the range
7894   (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
7895   Create the barrier by inserting a jump and add a new fix entry for
7896   it.  */
7897static Mfix *
7898create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
7899{
7900  HOST_WIDE_INT count = 0;
7901  rtx barrier;
7902  rtx from = fix->insn;
7903  /* The instruction after which we will insert the jump.  */
7904  rtx selected = NULL;
7905  int selected_cost;
7906  /* The address at which the jump instruction will be placed.  */
7907  HOST_WIDE_INT selected_address;
7908  Mfix * new_fix;
7909  HOST_WIDE_INT max_count = max_address - fix->address;
7910  rtx label = gen_label_rtx ();
7911
7912  selected_cost = arm_barrier_cost (from);
7913  selected_address = fix->address;
7914
7915  while (from && count < max_count)
7916    {
7917      rtx tmp;
7918      int new_cost;
7919
7920      /* This code shouldn't have been called if there was a natural barrier
7921	 within range.  */
7922      gcc_assert (GET_CODE (from) != BARRIER);
7923
7924      /* Count the length of this insn.  */
7925      count += get_attr_length (from);
7926
7927      /* If there is a jump table, add its length.  */
7928      tmp = is_jump_table (from);
7929      if (tmp != NULL)
7930	{
7931	  count += get_jump_table_size (tmp);
7932
7933	  /* Jump tables aren't in a basic block, so base the cost on
7934	     the dispatch insn.  If we select this location, we will
7935	     still put the pool after the table.  */
7936	  new_cost = arm_barrier_cost (from);
7937
7938	  if (count < max_count
7939	      && (!selected || new_cost <= selected_cost))
7940	    {
7941	      selected = tmp;
7942	      selected_cost = new_cost;
7943	      selected_address = fix->address + count;
7944	    }
7945
7946	  /* Continue after the dispatch table.  */
7947	  from = NEXT_INSN (tmp);
7948	  continue;
7949	}
7950
7951      new_cost = arm_barrier_cost (from);
7952
7953      if (count < max_count
7954	  && (!selected || new_cost <= selected_cost))
7955	{
7956	  selected = from;
7957	  selected_cost = new_cost;
7958	  selected_address = fix->address + count;
7959	}
7960
7961      from = NEXT_INSN (from);
7962    }
7963
7964  /* Make sure that we found a place to insert the jump.  */
7965  gcc_assert (selected);
7966
7967  /* Create a new JUMP_INSN that branches around a barrier.  */
7968  from = emit_jump_insn_after (gen_jump (label), selected);
7969  JUMP_LABEL (from) = label;
7970  barrier = emit_barrier_after (from);
7971  emit_label_after (label, barrier);
7972
7973  /* Create a minipool barrier entry for the new barrier.  */
7974  new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
7975  new_fix->insn = barrier;
7976  new_fix->address = selected_address;
7977  new_fix->next = fix->next;
7978  fix->next = new_fix;
7979
7980  return new_fix;
7981}
7982
7983/* Record that there is a natural barrier in the insn stream at
7984   ADDRESS.  */
7985static void
7986push_minipool_barrier (rtx insn, HOST_WIDE_INT address)
7987{
7988  Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
7989
7990  fix->insn = insn;
7991  fix->address = address;
7992
7993  fix->next = NULL;
7994  if (minipool_fix_head != NULL)
7995    minipool_fix_tail->next = fix;
7996  else
7997    minipool_fix_head = fix;
7998
7999  minipool_fix_tail = fix;
8000}
8001
8002/* Record INSN, which will need fixing up to load a value from the
8003   minipool.  ADDRESS is the offset of the insn since the start of the
8004   function; LOC is a pointer to the part of the insn which requires
8005   fixing; VALUE is the constant that must be loaded, which is of type
8006   MODE.  */
8007static void
8008push_minipool_fix (rtx insn, HOST_WIDE_INT address, rtx *loc,
8009		   enum machine_mode mode, rtx value)
8010{
8011  Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
8012
8013#ifdef AOF_ASSEMBLER
8014  /* PIC symbol references need to be converted into offsets into the
8015     based area.  */
8016  /* XXX This shouldn't be done here.  */
8017  if (flag_pic && GET_CODE (value) == SYMBOL_REF)
8018    value = aof_pic_entry (value);
8019#endif /* AOF_ASSEMBLER */
8020
8021  fix->insn = insn;
8022  fix->address = address;
8023  fix->loc = loc;
8024  fix->mode = mode;
8025  fix->fix_size = MINIPOOL_FIX_SIZE (mode);
8026  fix->value = value;
8027  fix->forwards = get_attr_pool_range (insn);
8028  fix->backwards = get_attr_neg_pool_range (insn);
8029  fix->minipool = NULL;
8030
8031  /* If an insn doesn't have a range defined for it, then it isn't
8032     expecting to be reworked by this code.  Better to stop now than
8033     to generate duff assembly code.  */
8034  gcc_assert (fix->forwards || fix->backwards);
8035
8036  /* If an entry requires 8-byte alignment then assume all constant pools
8037     require 4 bytes of padding.  Trying to do this later on a per-pool
8038     basis is awkward because existing pool entries have to be modified.  */
8039  if (ARM_DOUBLEWORD_ALIGN && fix->fix_size == 8)
8040    minipool_pad = 4;
8041
8042  if (dump_file)
8043    {
8044      fprintf (dump_file,
8045	       ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
8046	       GET_MODE_NAME (mode),
8047	       INSN_UID (insn), (unsigned long) address,
8048	       -1 * (long)fix->backwards, (long)fix->forwards);
8049      arm_print_value (dump_file, fix->value);
8050      fprintf (dump_file, "\n");
8051    }
8052
8053  /* Add it to the chain of fixes.  */
8054  fix->next = NULL;
8055
8056  if (minipool_fix_head != NULL)
8057    minipool_fix_tail->next = fix;
8058  else
8059    minipool_fix_head = fix;
8060
8061  minipool_fix_tail = fix;
8062}
8063
8064/* Return the cost of synthesizing a 64-bit constant VAL inline.
8065   Returns the number of insns needed, or 99 if we don't know how to
8066   do it.  */
8067int
8068arm_const_double_inline_cost (rtx val)
8069{
8070  rtx lowpart, highpart;
8071  enum machine_mode mode;
8072
8073  mode = GET_MODE (val);
8074
8075  if (mode == VOIDmode)
8076    mode = DImode;
8077
8078  gcc_assert (GET_MODE_SIZE (mode) == 8);
8079
8080  lowpart = gen_lowpart (SImode, val);
8081  highpart = gen_highpart_mode (SImode, mode, val);
8082
8083  gcc_assert (GET_CODE (lowpart) == CONST_INT);
8084  gcc_assert (GET_CODE (highpart) == CONST_INT);
8085
8086  return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
8087			    NULL_RTX, NULL_RTX, 0, 0)
8088	  + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
8089			      NULL_RTX, NULL_RTX, 0, 0));
8090}
8091
8092/* Return true if it is worthwhile to split a 64-bit constant into two
8093   32-bit operations.  This is the case if optimizing for size, or
8094   if we have load delay slots, or if one 32-bit part can be done with
8095   a single data operation.  */
8096bool
8097arm_const_double_by_parts (rtx val)
8098{
8099  enum machine_mode mode = GET_MODE (val);
8100  rtx part;
8101
8102  if (optimize_size || arm_ld_sched)
8103    return true;
8104
8105  if (mode == VOIDmode)
8106    mode = DImode;
8107
8108  part = gen_highpart_mode (SImode, mode, val);
8109
8110  gcc_assert (GET_CODE (part) == CONST_INT);
8111
8112  if (const_ok_for_arm (INTVAL (part))
8113      || const_ok_for_arm (~INTVAL (part)))
8114    return true;
8115
8116  part = gen_lowpart (SImode, val);
8117
8118  gcc_assert (GET_CODE (part) == CONST_INT);
8119
8120  if (const_ok_for_arm (INTVAL (part))
8121      || const_ok_for_arm (~INTVAL (part)))
8122    return true;
8123
8124  return false;
8125}
8126
8127/* Scan INSN and note any of its operands that need fixing.
8128   If DO_PUSHES is false we do not actually push any of the fixups
8129   needed.  The function returns TRUE if any fixups were needed/pushed.
8130   This is used by arm_memory_load_p() which needs to know about loads
8131   of constants that will be converted into minipool loads.  */
8132static bool
8133note_invalid_constants (rtx insn, HOST_WIDE_INT address, int do_pushes)
8134{
8135  bool result = false;
8136  int opno;
8137
8138  extract_insn (insn);
8139
8140  if (!constrain_operands (1))
8141    fatal_insn_not_found (insn);
8142
8143  if (recog_data.n_alternatives == 0)
8144    return false;
8145
8146  /* Fill in recog_op_alt with information about the constraints of
8147     this insn.  */
8148  preprocess_constraints ();
8149
8150  for (opno = 0; opno < recog_data.n_operands; opno++)
8151    {
8152      /* Things we need to fix can only occur in inputs.  */
8153      if (recog_data.operand_type[opno] != OP_IN)
8154	continue;
8155
8156      /* If this alternative is a memory reference, then any mention
8157	 of constants in this alternative is really to fool reload
8158	 into allowing us to accept one there.  We need to fix them up
8159	 now so that we output the right code.  */
8160      if (recog_op_alt[opno][which_alternative].memory_ok)
8161	{
8162	  rtx op = recog_data.operand[opno];
8163
8164	  if (CONSTANT_P (op))
8165	    {
8166	      if (do_pushes)
8167		push_minipool_fix (insn, address, recog_data.operand_loc[opno],
8168				   recog_data.operand_mode[opno], op);
8169	      result = true;
8170	    }
8171	  else if (GET_CODE (op) == MEM
8172		   && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
8173		   && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
8174	    {
8175	      if (do_pushes)
8176		{
8177		  rtx cop = avoid_constant_pool_reference (op);
8178
8179		  /* Casting the address of something to a mode narrower
8180		     than a word can cause avoid_constant_pool_reference()
8181		     to return the pool reference itself.  That's no good to
8182		     us here.  Lets just hope that we can use the
8183		     constant pool value directly.  */
8184		  if (op == cop)
8185		    cop = get_pool_constant (XEXP (op, 0));
8186
8187		  push_minipool_fix (insn, address,
8188				     recog_data.operand_loc[opno],
8189				     recog_data.operand_mode[opno], cop);
8190		}
8191
8192	      result = true;
8193	    }
8194	}
8195    }
8196
8197  return result;
8198}
8199
8200/* Gcc puts the pool in the wrong place for ARM, since we can only
8201   load addresses a limited distance around the pc.  We do some
8202   special munging to move the constant pool values to the correct
8203   point in the code.  */
8204static void
8205arm_reorg (void)
8206{
8207  rtx insn;
8208  HOST_WIDE_INT address = 0;
8209  Mfix * fix;
8210
8211  minipool_fix_head = minipool_fix_tail = NULL;
8212
8213  /* The first insn must always be a note, or the code below won't
8214     scan it properly.  */
8215  insn = get_insns ();
8216  gcc_assert (GET_CODE (insn) == NOTE);
8217  minipool_pad = 0;
8218
8219  /* Scan all the insns and record the operands that will need fixing.  */
8220  for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
8221    {
8222      if (TARGET_CIRRUS_FIX_INVALID_INSNS
8223          && (arm_cirrus_insn_p (insn)
8224	      || GET_CODE (insn) == JUMP_INSN
8225	      || arm_memory_load_p (insn)))
8226	cirrus_reorg (insn);
8227
8228      if (GET_CODE (insn) == BARRIER)
8229	push_minipool_barrier (insn, address);
8230      else if (INSN_P (insn))
8231	{
8232	  rtx table;
8233
8234	  note_invalid_constants (insn, address, true);
8235	  address += get_attr_length (insn);
8236
8237	  /* If the insn is a vector jump, add the size of the table
8238	     and skip the table.  */
8239	  if ((table = is_jump_table (insn)) != NULL)
8240	    {
8241	      address += get_jump_table_size (table);
8242	      insn = table;
8243	    }
8244	}
8245    }
8246
8247  fix = minipool_fix_head;
8248
8249  /* Now scan the fixups and perform the required changes.  */
8250  while (fix)
8251    {
8252      Mfix * ftmp;
8253      Mfix * fdel;
8254      Mfix *  last_added_fix;
8255      Mfix * last_barrier = NULL;
8256      Mfix * this_fix;
8257
8258      /* Skip any further barriers before the next fix.  */
8259      while (fix && GET_CODE (fix->insn) == BARRIER)
8260	fix = fix->next;
8261
8262      /* No more fixes.  */
8263      if (fix == NULL)
8264	break;
8265
8266      last_added_fix = NULL;
8267
8268      for (ftmp = fix; ftmp; ftmp = ftmp->next)
8269	{
8270	  if (GET_CODE (ftmp->insn) == BARRIER)
8271	    {
8272	      if (ftmp->address >= minipool_vector_head->max_address)
8273		break;
8274
8275	      last_barrier = ftmp;
8276	    }
8277	  else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
8278	    break;
8279
8280	  last_added_fix = ftmp;  /* Keep track of the last fix added.  */
8281	}
8282
8283      /* If we found a barrier, drop back to that; any fixes that we
8284	 could have reached but come after the barrier will now go in
8285	 the next mini-pool.  */
8286      if (last_barrier != NULL)
8287	{
8288	  /* Reduce the refcount for those fixes that won't go into this
8289	     pool after all.  */
8290	  for (fdel = last_barrier->next;
8291	       fdel && fdel != ftmp;
8292	       fdel = fdel->next)
8293	    {
8294	      fdel->minipool->refcount--;
8295	      fdel->minipool = NULL;
8296	    }
8297
8298	  ftmp = last_barrier;
8299	}
8300      else
8301        {
8302	  /* ftmp is first fix that we can't fit into this pool and
8303	     there no natural barriers that we could use.  Insert a
8304	     new barrier in the code somewhere between the previous
8305	     fix and this one, and arrange to jump around it.  */
8306	  HOST_WIDE_INT max_address;
8307
8308	  /* The last item on the list of fixes must be a barrier, so
8309	     we can never run off the end of the list of fixes without
8310	     last_barrier being set.  */
8311	  gcc_assert (ftmp);
8312
8313	  max_address = minipool_vector_head->max_address;
8314	  /* Check that there isn't another fix that is in range that
8315	     we couldn't fit into this pool because the pool was
8316	     already too large: we need to put the pool before such an
8317	     instruction.  The pool itself may come just after the
8318	     fix because create_fix_barrier also allows space for a
8319	     jump instruction.  */
8320	  if (ftmp->address < max_address)
8321	    max_address = ftmp->address + 1;
8322
8323	  last_barrier = create_fix_barrier (last_added_fix, max_address);
8324	}
8325
8326      assign_minipool_offsets (last_barrier);
8327
8328      while (ftmp)
8329	{
8330	  if (GET_CODE (ftmp->insn) != BARRIER
8331	      && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
8332		  == NULL))
8333	    break;
8334
8335	  ftmp = ftmp->next;
8336	}
8337
8338      /* Scan over the fixes we have identified for this pool, fixing them
8339	 up and adding the constants to the pool itself.  */
8340      for (this_fix = fix; this_fix && ftmp != this_fix;
8341	   this_fix = this_fix->next)
8342	if (GET_CODE (this_fix->insn) != BARRIER)
8343	  {
8344	    rtx addr
8345	      = plus_constant (gen_rtx_LABEL_REF (VOIDmode,
8346						  minipool_vector_label),
8347			       this_fix->minipool->offset);
8348	    *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
8349	  }
8350
8351      dump_minipool (last_barrier->insn);
8352      fix = ftmp;
8353    }
8354
8355  /* From now on we must synthesize any constants that we can't handle
8356     directly.  This can happen if the RTL gets split during final
8357     instruction generation.  */
8358  after_arm_reorg = 1;
8359
8360  /* Free the minipool memory.  */
8361  obstack_free (&minipool_obstack, minipool_startobj);
8362}
8363
8364/* Routines to output assembly language.  */
8365
8366/* If the rtx is the correct value then return the string of the number.
8367   In this way we can ensure that valid double constants are generated even
8368   when cross compiling.  */
8369const char *
8370fp_immediate_constant (rtx x)
8371{
8372  REAL_VALUE_TYPE r;
8373  int i;
8374
8375  if (!fp_consts_inited)
8376    init_fp_table ();
8377
8378  REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8379  for (i = 0; i < 8; i++)
8380    if (REAL_VALUES_EQUAL (r, values_fp[i]))
8381      return strings_fp[i];
8382
8383  gcc_unreachable ();
8384}
8385
8386/* As for fp_immediate_constant, but value is passed directly, not in rtx.  */
8387static const char *
8388fp_const_from_val (REAL_VALUE_TYPE *r)
8389{
8390  int i;
8391
8392  if (!fp_consts_inited)
8393    init_fp_table ();
8394
8395  for (i = 0; i < 8; i++)
8396    if (REAL_VALUES_EQUAL (*r, values_fp[i]))
8397      return strings_fp[i];
8398
8399  gcc_unreachable ();
8400}
8401
8402/* Output the operands of a LDM/STM instruction to STREAM.
8403   MASK is the ARM register set mask of which only bits 0-15 are important.
8404   REG is the base register, either the frame pointer or the stack pointer,
8405   INSTR is the possibly suffixed load or store instruction.  */
8406
8407static void
8408print_multi_reg (FILE *stream, const char *instr, unsigned reg,
8409		 unsigned long mask)
8410{
8411  unsigned i;
8412  bool not_first = FALSE;
8413
8414  fputc ('\t', stream);
8415  asm_fprintf (stream, instr, reg);
8416  fputs (", {", stream);
8417
8418  for (i = 0; i <= LAST_ARM_REGNUM; i++)
8419    if (mask & (1 << i))
8420      {
8421	if (not_first)
8422	  fprintf (stream, ", ");
8423
8424	asm_fprintf (stream, "%r", i);
8425	not_first = TRUE;
8426      }
8427
8428  fprintf (stream, "}\n");
8429}
8430
8431
8432/* Output a FLDMX instruction to STREAM.
8433   BASE if the register containing the address.
8434   REG and COUNT specify the register range.
8435   Extra registers may be added to avoid hardware bugs.  */
8436
8437static void
8438arm_output_fldmx (FILE * stream, unsigned int base, int reg, int count)
8439{
8440  int i;
8441
8442  /* Workaround ARM10 VFPr1 bug.  */
8443  if (count == 2 && !arm_arch6)
8444    {
8445      if (reg == 15)
8446	reg--;
8447      count++;
8448    }
8449
8450  fputc ('\t', stream);
8451  asm_fprintf (stream, "fldmfdx\t%r!, {", base);
8452
8453  for (i = reg; i < reg + count; i++)
8454    {
8455      if (i > reg)
8456	fputs (", ", stream);
8457      asm_fprintf (stream, "d%d", i);
8458    }
8459  fputs ("}\n", stream);
8460
8461}
8462
8463
8464/* Output the assembly for a store multiple.  */
8465
8466const char *
8467vfp_output_fstmx (rtx * operands)
8468{
8469  char pattern[100];
8470  int p;
8471  int base;
8472  int i;
8473
8474  strcpy (pattern, "fstmfdx\t%m0!, {%P1");
8475  p = strlen (pattern);
8476
8477  gcc_assert (GET_CODE (operands[1]) == REG);
8478
8479  base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
8480  for (i = 1; i < XVECLEN (operands[2], 0); i++)
8481    {
8482      p += sprintf (&pattern[p], ", d%d", base + i);
8483    }
8484  strcpy (&pattern[p], "}");
8485
8486  output_asm_insn (pattern, operands);
8487  return "";
8488}
8489
8490
8491/* Emit RTL to save block of VFP register pairs to the stack.  Returns the
8492   number of bytes pushed.  */
8493
8494static int
8495vfp_emit_fstmx (int base_reg, int count)
8496{
8497  rtx par;
8498  rtx dwarf;
8499  rtx tmp, reg;
8500  int i;
8501
8502  /* Workaround ARM10 VFPr1 bug.  Data corruption can occur when exactly two
8503     register pairs are stored by a store multiple insn.  We avoid this
8504     by pushing an extra pair.  */
8505  if (count == 2 && !arm_arch6)
8506    {
8507      if (base_reg == LAST_VFP_REGNUM - 3)
8508	base_reg -= 2;
8509      count++;
8510    }
8511
8512  /* ??? The frame layout is implementation defined.  We describe
8513     standard format 1 (equivalent to a FSTMD insn and unused pad word).
8514     We really need some way of representing the whole block so that the
8515     unwinder can figure it out at runtime.  */
8516  par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
8517  dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
8518
8519  reg = gen_rtx_REG (DFmode, base_reg);
8520  base_reg += 2;
8521
8522  XVECEXP (par, 0, 0)
8523    = gen_rtx_SET (VOIDmode,
8524		   gen_frame_mem (BLKmode,
8525				  gen_rtx_PRE_DEC (BLKmode,
8526						   stack_pointer_rtx)),
8527		   gen_rtx_UNSPEC (BLKmode,
8528				   gen_rtvec (1, reg),
8529				   UNSPEC_PUSH_MULT));
8530
8531  tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8532		     plus_constant (stack_pointer_rtx, -(count * 8 + 4)));
8533  RTX_FRAME_RELATED_P (tmp) = 1;
8534  XVECEXP (dwarf, 0, 0) = tmp;
8535
8536  tmp = gen_rtx_SET (VOIDmode,
8537		     gen_frame_mem (DFmode, stack_pointer_rtx),
8538		     reg);
8539  RTX_FRAME_RELATED_P (tmp) = 1;
8540  XVECEXP (dwarf, 0, 1) = tmp;
8541
8542  for (i = 1; i < count; i++)
8543    {
8544      reg = gen_rtx_REG (DFmode, base_reg);
8545      base_reg += 2;
8546      XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
8547
8548      tmp = gen_rtx_SET (VOIDmode,
8549			 gen_frame_mem (DFmode,
8550					plus_constant (stack_pointer_rtx,
8551						       i * 8)),
8552			 reg);
8553      RTX_FRAME_RELATED_P (tmp) = 1;
8554      XVECEXP (dwarf, 0, i + 1) = tmp;
8555    }
8556
8557  par = emit_insn (par);
8558  REG_NOTES (par) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
8559				       REG_NOTES (par));
8560  RTX_FRAME_RELATED_P (par) = 1;
8561
8562  return count * 8 + 4;
8563}
8564
8565
8566/* Output a 'call' insn.  */
8567const char *
8568output_call (rtx *operands)
8569{
8570  gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly.  */
8571
8572  /* Handle calls to lr using ip (which may be clobbered in subr anyway).  */
8573  if (REGNO (operands[0]) == LR_REGNUM)
8574    {
8575      operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
8576      output_asm_insn ("mov%?\t%0, %|lr", operands);
8577    }
8578
8579  output_asm_insn ("mov%?\t%|lr, %|pc", operands);
8580
8581  if (TARGET_INTERWORK || arm_arch4t)
8582    output_asm_insn ("bx%?\t%0", operands);
8583  else
8584    output_asm_insn ("mov%?\t%|pc, %0", operands);
8585
8586  return "";
8587}
8588
8589/* Output a 'call' insn that is a reference in memory.  */
8590const char *
8591output_call_mem (rtx *operands)
8592{
8593  if (TARGET_INTERWORK && !arm_arch5)
8594    {
8595      output_asm_insn ("ldr%?\t%|ip, %0", operands);
8596      output_asm_insn ("mov%?\t%|lr, %|pc", operands);
8597      output_asm_insn ("bx%?\t%|ip", operands);
8598    }
8599  else if (regno_use_in (LR_REGNUM, operands[0]))
8600    {
8601      /* LR is used in the memory address.  We load the address in the
8602	 first instruction.  It's safe to use IP as the target of the
8603	 load since the call will kill it anyway.  */
8604      output_asm_insn ("ldr%?\t%|ip, %0", operands);
8605      if (arm_arch5)
8606	output_asm_insn ("blx%?\t%|ip", operands);
8607      else
8608	{
8609	  output_asm_insn ("mov%?\t%|lr, %|pc", operands);
8610	  if (arm_arch4t)
8611	    output_asm_insn ("bx%?\t%|ip", operands);
8612	  else
8613	    output_asm_insn ("mov%?\t%|pc, %|ip", operands);
8614	}
8615    }
8616  else
8617    {
8618      output_asm_insn ("mov%?\t%|lr, %|pc", operands);
8619      output_asm_insn ("ldr%?\t%|pc, %0", operands);
8620    }
8621
8622  return "";
8623}
8624
8625
8626/* Output a move from arm registers to an fpa registers.
8627   OPERANDS[0] is an fpa register.
8628   OPERANDS[1] is the first registers of an arm register pair.  */
8629const char *
8630output_mov_long_double_fpa_from_arm (rtx *operands)
8631{
8632  int arm_reg0 = REGNO (operands[1]);
8633  rtx ops[3];
8634
8635  gcc_assert (arm_reg0 != IP_REGNUM);
8636
8637  ops[0] = gen_rtx_REG (SImode, arm_reg0);
8638  ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
8639  ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
8640
8641  output_asm_insn ("stm%?fd\t%|sp!, {%0, %1, %2}", ops);
8642  output_asm_insn ("ldf%?e\t%0, [%|sp], #12", operands);
8643
8644  return "";
8645}
8646
8647/* Output a move from an fpa register to arm registers.
8648   OPERANDS[0] is the first registers of an arm register pair.
8649   OPERANDS[1] is an fpa register.  */
8650const char *
8651output_mov_long_double_arm_from_fpa (rtx *operands)
8652{
8653  int arm_reg0 = REGNO (operands[0]);
8654  rtx ops[3];
8655
8656  gcc_assert (arm_reg0 != IP_REGNUM);
8657
8658  ops[0] = gen_rtx_REG (SImode, arm_reg0);
8659  ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
8660  ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
8661
8662  output_asm_insn ("stf%?e\t%1, [%|sp, #-12]!", operands);
8663  output_asm_insn ("ldm%?fd\t%|sp!, {%0, %1, %2}", ops);
8664  return "";
8665}
8666
8667/* Output a move from arm registers to arm registers of a long double
8668   OPERANDS[0] is the destination.
8669   OPERANDS[1] is the source.  */
8670const char *
8671output_mov_long_double_arm_from_arm (rtx *operands)
8672{
8673  /* We have to be careful here because the two might overlap.  */
8674  int dest_start = REGNO (operands[0]);
8675  int src_start = REGNO (operands[1]);
8676  rtx ops[2];
8677  int i;
8678
8679  if (dest_start < src_start)
8680    {
8681      for (i = 0; i < 3; i++)
8682	{
8683	  ops[0] = gen_rtx_REG (SImode, dest_start + i);
8684	  ops[1] = gen_rtx_REG (SImode, src_start + i);
8685	  output_asm_insn ("mov%?\t%0, %1", ops);
8686	}
8687    }
8688  else
8689    {
8690      for (i = 2; i >= 0; i--)
8691	{
8692	  ops[0] = gen_rtx_REG (SImode, dest_start + i);
8693	  ops[1] = gen_rtx_REG (SImode, src_start + i);
8694	  output_asm_insn ("mov%?\t%0, %1", ops);
8695	}
8696    }
8697
8698  return "";
8699}
8700
8701
8702/* Output a move from arm registers to an fpa registers.
8703   OPERANDS[0] is an fpa register.
8704   OPERANDS[1] is the first registers of an arm register pair.  */
8705const char *
8706output_mov_double_fpa_from_arm (rtx *operands)
8707{
8708  int arm_reg0 = REGNO (operands[1]);
8709  rtx ops[2];
8710
8711  gcc_assert (arm_reg0 != IP_REGNUM);
8712
8713  ops[0] = gen_rtx_REG (SImode, arm_reg0);
8714  ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
8715  output_asm_insn ("stm%?fd\t%|sp!, {%0, %1}", ops);
8716  output_asm_insn ("ldf%?d\t%0, [%|sp], #8", operands);
8717  return "";
8718}
8719
8720/* Output a move from an fpa register to arm registers.
8721   OPERANDS[0] is the first registers of an arm register pair.
8722   OPERANDS[1] is an fpa register.  */
8723const char *
8724output_mov_double_arm_from_fpa (rtx *operands)
8725{
8726  int arm_reg0 = REGNO (operands[0]);
8727  rtx ops[2];
8728
8729  gcc_assert (arm_reg0 != IP_REGNUM);
8730
8731  ops[0] = gen_rtx_REG (SImode, arm_reg0);
8732  ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
8733  output_asm_insn ("stf%?d\t%1, [%|sp, #-8]!", operands);
8734  output_asm_insn ("ldm%?fd\t%|sp!, {%0, %1}", ops);
8735  return "";
8736}
8737
8738/* Output a move between double words.
8739   It must be REG<-REG, REG<-CONST_DOUBLE, REG<-CONST_INT, REG<-MEM
8740   or MEM<-REG and all MEMs must be offsettable addresses.  */
8741const char *
8742output_move_double (rtx *operands)
8743{
8744  enum rtx_code code0 = GET_CODE (operands[0]);
8745  enum rtx_code code1 = GET_CODE (operands[1]);
8746  rtx otherops[3];
8747
8748  if (code0 == REG)
8749    {
8750      int reg0 = REGNO (operands[0]);
8751
8752      otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
8753
8754      gcc_assert (code1 == MEM);  /* Constraints should ensure this.  */
8755
8756      switch (GET_CODE (XEXP (operands[1], 0)))
8757	{
8758	case REG:
8759	  output_asm_insn ("ldm%?ia\t%m1, %M0", operands);
8760	  break;
8761
8762	case PRE_INC:
8763	  gcc_assert (TARGET_LDRD);
8764	  output_asm_insn ("ldr%?d\t%0, [%m1, #8]!", operands);
8765	  break;
8766
8767	case PRE_DEC:
8768	  output_asm_insn ("ldm%?db\t%m1!, %M0", operands);
8769	  break;
8770
8771	case POST_INC:
8772	  output_asm_insn ("ldm%?ia\t%m1!, %M0", operands);
8773	  break;
8774
8775	case POST_DEC:
8776	  gcc_assert (TARGET_LDRD);
8777	  output_asm_insn ("ldr%?d\t%0, [%m1], #-8", operands);
8778	  break;
8779
8780	case PRE_MODIFY:
8781	case POST_MODIFY:
8782	  otherops[0] = operands[0];
8783	  otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
8784	  otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
8785
8786	  if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
8787	    {
8788	      if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
8789		{
8790		  /* Registers overlap so split out the increment.  */
8791		  output_asm_insn ("add%?\t%1, %1, %2", otherops);
8792		  output_asm_insn ("ldr%?d\t%0, [%1] @split", otherops);
8793		}
8794	      else
8795		{
8796		  /* IWMMXT allows offsets larger than ldrd can handle,
8797		     fix these up with a pair of ldr.  */
8798		  if (GET_CODE (otherops[2]) == CONST_INT
8799		      && (INTVAL(otherops[2]) <= -256
8800			  || INTVAL(otherops[2]) >= 256))
8801		    {
8802		      output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
8803		      otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
8804		      output_asm_insn ("ldr%?\t%0, [%1, #4]", otherops);
8805		    }
8806		  else
8807		    output_asm_insn ("ldr%?d\t%0, [%1, %2]!", otherops);
8808		}
8809	    }
8810	  else
8811	    {
8812	      /* IWMMXT allows offsets larger than ldrd can handle,
8813		 fix these up with a pair of ldr.  */
8814	      if (GET_CODE (otherops[2]) == CONST_INT
8815		  && (INTVAL(otherops[2]) <= -256
8816		      || INTVAL(otherops[2]) >= 256))
8817		{
8818		  otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
8819		  output_asm_insn ("ldr%?\t%0, [%1, #4]", otherops);
8820		  otherops[0] = operands[0];
8821		  output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
8822		}
8823	      else
8824		/* We only allow constant increments, so this is safe.  */
8825		output_asm_insn ("ldr%?d\t%0, [%1], %2", otherops);
8826	    }
8827	  break;
8828
8829	case LABEL_REF:
8830	case CONST:
8831	  output_asm_insn ("adr%?\t%0, %1", operands);
8832	  output_asm_insn ("ldm%?ia\t%0, %M0", operands);
8833	  break;
8834
8835	default:
8836	  if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
8837			       GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
8838	    {
8839	      otherops[0] = operands[0];
8840	      otherops[1] = XEXP (XEXP (operands[1], 0), 0);
8841	      otherops[2] = XEXP (XEXP (operands[1], 0), 1);
8842
8843	      if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
8844		{
8845		  if (GET_CODE (otherops[2]) == CONST_INT)
8846		    {
8847		      switch ((int) INTVAL (otherops[2]))
8848			{
8849			case -8:
8850			  output_asm_insn ("ldm%?db\t%1, %M0", otherops);
8851			  return "";
8852			case -4:
8853			  output_asm_insn ("ldm%?da\t%1, %M0", otherops);
8854			  return "";
8855			case 4:
8856			  output_asm_insn ("ldm%?ib\t%1, %M0", otherops);
8857			  return "";
8858			}
8859		    }
8860		  if (TARGET_LDRD
8861		      && (GET_CODE (otherops[2]) == REG
8862			  || (GET_CODE (otherops[2]) == CONST_INT
8863			      && INTVAL (otherops[2]) > -256
8864			      && INTVAL (otherops[2]) < 256)))
8865		    {
8866		      if (reg_overlap_mentioned_p (otherops[0],
8867						   otherops[2]))
8868			{
8869			  /* Swap base and index registers over to
8870			     avoid a conflict.  */
8871			  otherops[1] = XEXP (XEXP (operands[1], 0), 1);
8872			  otherops[2] = XEXP (XEXP (operands[1], 0), 0);
8873			}
8874		      /* If both registers conflict, it will usually
8875			 have been fixed by a splitter.  */
8876		      if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
8877			{
8878			  output_asm_insn ("add%?\t%1, %1, %2", otherops);
8879			  output_asm_insn ("ldr%?d\t%0, [%1]",
8880					   otherops);
8881			}
8882		      else
8883			output_asm_insn ("ldr%?d\t%0, [%1, %2]", otherops);
8884		      return "";
8885		    }
8886
8887		  if (GET_CODE (otherops[2]) == CONST_INT)
8888		    {
8889		      if (!(const_ok_for_arm (INTVAL (otherops[2]))))
8890			output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
8891		      else
8892			output_asm_insn ("add%?\t%0, %1, %2", otherops);
8893		    }
8894		  else
8895		    output_asm_insn ("add%?\t%0, %1, %2", otherops);
8896		}
8897	      else
8898		output_asm_insn ("sub%?\t%0, %1, %2", otherops);
8899
8900	      return "ldm%?ia\t%0, %M0";
8901	    }
8902	  else
8903	    {
8904	      otherops[1] = adjust_address (operands[1], SImode, 4);
8905	      /* Take care of overlapping base/data reg.  */
8906	      if (reg_mentioned_p (operands[0], operands[1]))
8907		{
8908		  output_asm_insn ("ldr%?\t%0, %1", otherops);
8909		  output_asm_insn ("ldr%?\t%0, %1", operands);
8910		}
8911	      else
8912		{
8913		  output_asm_insn ("ldr%?\t%0, %1", operands);
8914		  output_asm_insn ("ldr%?\t%0, %1", otherops);
8915		}
8916	    }
8917	}
8918    }
8919  else
8920    {
8921      /* Constraints should ensure this.  */
8922      gcc_assert (code0 == MEM && code1 == REG);
8923      gcc_assert (REGNO (operands[1]) != IP_REGNUM);
8924
8925      switch (GET_CODE (XEXP (operands[0], 0)))
8926        {
8927	case REG:
8928	  output_asm_insn ("stm%?ia\t%m0, %M1", operands);
8929	  break;
8930
8931        case PRE_INC:
8932	  gcc_assert (TARGET_LDRD);
8933	  output_asm_insn ("str%?d\t%1, [%m0, #8]!", operands);
8934	  break;
8935
8936        case PRE_DEC:
8937	  output_asm_insn ("stm%?db\t%m0!, %M1", operands);
8938	  break;
8939
8940        case POST_INC:
8941	  output_asm_insn ("stm%?ia\t%m0!, %M1", operands);
8942	  break;
8943
8944        case POST_DEC:
8945	  gcc_assert (TARGET_LDRD);
8946	  output_asm_insn ("str%?d\t%1, [%m0], #-8", operands);
8947	  break;
8948
8949	case PRE_MODIFY:
8950	case POST_MODIFY:
8951	  otherops[0] = operands[1];
8952	  otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
8953	  otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
8954
8955	  /* IWMMXT allows offsets larger than ldrd can handle,
8956	     fix these up with a pair of ldr.  */
8957	  if (GET_CODE (otherops[2]) == CONST_INT
8958	      && (INTVAL(otherops[2]) <= -256
8959		  || INTVAL(otherops[2]) >= 256))
8960	    {
8961	      rtx reg1;
8962	      reg1 = gen_rtx_REG (SImode, 1 + REGNO (operands[1]));
8963	      if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
8964		{
8965		  output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
8966		  otherops[0] = reg1;
8967		  output_asm_insn ("ldr%?\t%0, [%1, #4]", otherops);
8968		}
8969	      else
8970		{
8971		  otherops[0] = reg1;
8972		  output_asm_insn ("ldr%?\t%0, [%1, #4]", otherops);
8973		  otherops[0] = operands[1];
8974		  output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
8975		}
8976	    }
8977	  else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
8978	    output_asm_insn ("str%?d\t%0, [%1, %2]!", otherops);
8979	  else
8980	    output_asm_insn ("str%?d\t%0, [%1], %2", otherops);
8981	  break;
8982
8983	case PLUS:
8984	  otherops[2] = XEXP (XEXP (operands[0], 0), 1);
8985	  if (GET_CODE (otherops[2]) == CONST_INT)
8986	    {
8987	      switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
8988		{
8989		case -8:
8990		  output_asm_insn ("stm%?db\t%m0, %M1", operands);
8991		  return "";
8992
8993		case -4:
8994		  output_asm_insn ("stm%?da\t%m0, %M1", operands);
8995		  return "";
8996
8997		case 4:
8998		  output_asm_insn ("stm%?ib\t%m0, %M1", operands);
8999		  return "";
9000		}
9001	    }
9002	  if (TARGET_LDRD
9003	      && (GET_CODE (otherops[2]) == REG
9004		  || (GET_CODE (otherops[2]) == CONST_INT
9005		      && INTVAL (otherops[2]) > -256
9006		      && INTVAL (otherops[2]) < 256)))
9007	    {
9008	      otherops[0] = operands[1];
9009	      otherops[1] = XEXP (XEXP (operands[0], 0), 0);
9010	      output_asm_insn ("str%?d\t%0, [%1, %2]", otherops);
9011	      return "";
9012	    }
9013	  /* Fall through */
9014
9015        default:
9016	  otherops[0] = adjust_address (operands[0], SImode, 4);
9017	  otherops[1] = gen_rtx_REG (SImode, 1 + REGNO (operands[1]));
9018	  output_asm_insn ("str%?\t%1, %0", operands);
9019	  output_asm_insn ("str%?\t%1, %0", otherops);
9020	}
9021    }
9022
9023  return "";
9024}
9025
9026/* Output an ADD r, s, #n where n may be too big for one instruction.
9027   If adding zero to one register, output nothing.  */
9028const char *
9029output_add_immediate (rtx *operands)
9030{
9031  HOST_WIDE_INT n = INTVAL (operands[2]);
9032
9033  if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
9034    {
9035      if (n < 0)
9036	output_multi_immediate (operands,
9037				"sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
9038				-n);
9039      else
9040	output_multi_immediate (operands,
9041				"add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
9042				n);
9043    }
9044
9045  return "";
9046}
9047
9048/* Output a multiple immediate operation.
9049   OPERANDS is the vector of operands referred to in the output patterns.
9050   INSTR1 is the output pattern to use for the first constant.
9051   INSTR2 is the output pattern to use for subsequent constants.
9052   IMMED_OP is the index of the constant slot in OPERANDS.
9053   N is the constant value.  */
9054static const char *
9055output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
9056			int immed_op, HOST_WIDE_INT n)
9057{
9058#if HOST_BITS_PER_WIDE_INT > 32
9059  n &= 0xffffffff;
9060#endif
9061
9062  if (n == 0)
9063    {
9064      /* Quick and easy output.  */
9065      operands[immed_op] = const0_rtx;
9066      output_asm_insn (instr1, operands);
9067    }
9068  else
9069    {
9070      int i;
9071      const char * instr = instr1;
9072
9073      /* Note that n is never zero here (which would give no output).  */
9074      for (i = 0; i < 32; i += 2)
9075	{
9076	  if (n & (3 << i))
9077	    {
9078	      operands[immed_op] = GEN_INT (n & (255 << i));
9079	      output_asm_insn (instr, operands);
9080	      instr = instr2;
9081	      i += 6;
9082	    }
9083	}
9084    }
9085
9086  return "";
9087}
9088
9089/* Return the appropriate ARM instruction for the operation code.
9090   The returned result should not be overwritten.  OP is the rtx of the
9091   operation.  SHIFT_FIRST_ARG is TRUE if the first argument of the operator
9092   was shifted.  */
9093const char *
9094arithmetic_instr (rtx op, int shift_first_arg)
9095{
9096  switch (GET_CODE (op))
9097    {
9098    case PLUS:
9099      return "add";
9100
9101    case MINUS:
9102      return shift_first_arg ? "rsb" : "sub";
9103
9104    case IOR:
9105      return "orr";
9106
9107    case XOR:
9108      return "eor";
9109
9110    case AND:
9111      return "and";
9112
9113    default:
9114      gcc_unreachable ();
9115    }
9116}
9117
9118/* Ensure valid constant shifts and return the appropriate shift mnemonic
9119   for the operation code.  The returned result should not be overwritten.
9120   OP is the rtx code of the shift.
9121   On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
9122   shift.  */
9123static const char *
9124shift_op (rtx op, HOST_WIDE_INT *amountp)
9125{
9126  const char * mnem;
9127  enum rtx_code code = GET_CODE (op);
9128
9129  switch (GET_CODE (XEXP (op, 1)))
9130    {
9131    case REG:
9132    case SUBREG:
9133      *amountp = -1;
9134      break;
9135
9136    case CONST_INT:
9137      *amountp = INTVAL (XEXP (op, 1));
9138      break;
9139
9140    default:
9141      gcc_unreachable ();
9142    }
9143
9144  switch (code)
9145    {
9146    case ASHIFT:
9147      mnem = "asl";
9148      break;
9149
9150    case ASHIFTRT:
9151      mnem = "asr";
9152      break;
9153
9154    case LSHIFTRT:
9155      mnem = "lsr";
9156      break;
9157
9158    case ROTATE:
9159      gcc_assert (*amountp != -1);
9160      *amountp = 32 - *amountp;
9161
9162      /* Fall through.  */
9163
9164    case ROTATERT:
9165      mnem = "ror";
9166      break;
9167
9168    case MULT:
9169      /* We never have to worry about the amount being other than a
9170	 power of 2, since this case can never be reloaded from a reg.  */
9171      gcc_assert (*amountp != -1);
9172      *amountp = int_log2 (*amountp);
9173      return "asl";
9174
9175    default:
9176      gcc_unreachable ();
9177    }
9178
9179  if (*amountp != -1)
9180    {
9181      /* This is not 100% correct, but follows from the desire to merge
9182	 multiplication by a power of 2 with the recognizer for a
9183	 shift.  >=32 is not a valid shift for "asl", so we must try and
9184	 output a shift that produces the correct arithmetical result.
9185	 Using lsr #32 is identical except for the fact that the carry bit
9186	 is not set correctly if we set the flags; but we never use the
9187	 carry bit from such an operation, so we can ignore that.  */
9188      if (code == ROTATERT)
9189	/* Rotate is just modulo 32.  */
9190	*amountp &= 31;
9191      else if (*amountp != (*amountp & 31))
9192	{
9193	  if (code == ASHIFT)
9194	    mnem = "lsr";
9195	  *amountp = 32;
9196	}
9197
9198      /* Shifts of 0 are no-ops.  */
9199      if (*amountp == 0)
9200	return NULL;
9201    }
9202
9203  return mnem;
9204}
9205
9206/* Obtain the shift from the POWER of two.  */
9207
9208static HOST_WIDE_INT
9209int_log2 (HOST_WIDE_INT power)
9210{
9211  HOST_WIDE_INT shift = 0;
9212
9213  while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
9214    {
9215      gcc_assert (shift <= 31);
9216      shift++;
9217    }
9218
9219  return shift;
9220}
9221
9222/* Output a .ascii pseudo-op, keeping track of lengths.  This is
9223   because /bin/as is horribly restrictive.  The judgement about
9224   whether or not each character is 'printable' (and can be output as
9225   is) or not (and must be printed with an octal escape) must be made
9226   with reference to the *host* character set -- the situation is
9227   similar to that discussed in the comments above pp_c_char in
9228   c-pretty-print.c.  */
9229
9230#define MAX_ASCII_LEN 51
9231
9232void
9233output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
9234{
9235  int i;
9236  int len_so_far = 0;
9237
9238  fputs ("\t.ascii\t\"", stream);
9239
9240  for (i = 0; i < len; i++)
9241    {
9242      int c = p[i];
9243
9244      if (len_so_far >= MAX_ASCII_LEN)
9245	{
9246	  fputs ("\"\n\t.ascii\t\"", stream);
9247	  len_so_far = 0;
9248	}
9249
9250      if (ISPRINT (c))
9251	{
9252	  if (c == '\\' || c == '\"')
9253	    {
9254	      putc ('\\', stream);
9255	      len_so_far++;
9256	    }
9257	  putc (c, stream);
9258	  len_so_far++;
9259	}
9260      else
9261	{
9262	  fprintf (stream, "\\%03o", c);
9263	  len_so_far += 4;
9264	}
9265    }
9266
9267  fputs ("\"\n", stream);
9268}
9269
9270/* Compute the register save mask for registers 0 through 12
9271   inclusive.  This code is used by arm_compute_save_reg_mask.  */
9272
9273static unsigned long
9274arm_compute_save_reg0_reg12_mask (void)
9275{
9276  unsigned long func_type = arm_current_func_type ();
9277  unsigned long save_reg_mask = 0;
9278  unsigned int reg;
9279
9280  if (IS_INTERRUPT (func_type))
9281    {
9282      unsigned int max_reg;
9283      /* Interrupt functions must not corrupt any registers,
9284	 even call clobbered ones.  If this is a leaf function
9285	 we can just examine the registers used by the RTL, but
9286	 otherwise we have to assume that whatever function is
9287	 called might clobber anything, and so we have to save
9288	 all the call-clobbered registers as well.  */
9289      if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
9290	/* FIQ handlers have registers r8 - r12 banked, so
9291	   we only need to check r0 - r7, Normal ISRs only
9292	   bank r14 and r15, so we must check up to r12.
9293	   r13 is the stack pointer which is always preserved,
9294	   so we do not need to consider it here.  */
9295	max_reg = 7;
9296      else
9297	max_reg = 12;
9298
9299      for (reg = 0; reg <= max_reg; reg++)
9300	if (regs_ever_live[reg]
9301	    || (! current_function_is_leaf && call_used_regs [reg]))
9302	  save_reg_mask |= (1 << reg);
9303
9304      /* Also save the pic base register if necessary.  */
9305      if (flag_pic
9306	  && !TARGET_SINGLE_PIC_BASE
9307	  && arm_pic_register != INVALID_REGNUM
9308	  && current_function_uses_pic_offset_table)
9309	save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
9310    }
9311  else
9312    {
9313      /* In the normal case we only need to save those registers
9314	 which are call saved and which are used by this function.  */
9315      for (reg = 0; reg <= 10; reg++)
9316	if (regs_ever_live[reg] && ! call_used_regs [reg])
9317	  save_reg_mask |= (1 << reg);
9318
9319      /* Handle the frame pointer as a special case.  */
9320      if (! TARGET_APCS_FRAME
9321	  && ! frame_pointer_needed
9322	  && regs_ever_live[HARD_FRAME_POINTER_REGNUM]
9323	  && ! call_used_regs[HARD_FRAME_POINTER_REGNUM])
9324	save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
9325
9326      /* If we aren't loading the PIC register,
9327	 don't stack it even though it may be live.  */
9328      if (flag_pic
9329	  && !TARGET_SINGLE_PIC_BASE
9330	  && arm_pic_register != INVALID_REGNUM
9331	  && (regs_ever_live[PIC_OFFSET_TABLE_REGNUM]
9332	      || current_function_uses_pic_offset_table))
9333	save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
9334    }
9335
9336  /* Save registers so the exception handler can modify them.  */
9337  if (current_function_calls_eh_return)
9338    {
9339      unsigned int i;
9340
9341      for (i = 0; ; i++)
9342	{
9343	  reg = EH_RETURN_DATA_REGNO (i);
9344	  if (reg == INVALID_REGNUM)
9345	    break;
9346	  save_reg_mask |= 1 << reg;
9347	}
9348    }
9349
9350  return save_reg_mask;
9351}
9352
9353/* Compute a bit mask of which registers need to be
9354   saved on the stack for the current function.  */
9355
9356static unsigned long
9357arm_compute_save_reg_mask (void)
9358{
9359  unsigned int save_reg_mask = 0;
9360  unsigned long func_type = arm_current_func_type ();
9361
9362  if (IS_NAKED (func_type))
9363    /* This should never really happen.  */
9364    return 0;
9365
9366  /* If we are creating a stack frame, then we must save the frame pointer,
9367     IP (which will hold the old stack pointer), LR and the PC.  */
9368  if (frame_pointer_needed)
9369    save_reg_mask |=
9370      (1 << ARM_HARD_FRAME_POINTER_REGNUM)
9371      | (1 << IP_REGNUM)
9372      | (1 << LR_REGNUM)
9373      | (1 << PC_REGNUM);
9374
9375  /* Volatile functions do not return, so there
9376     is no need to save any other registers.  */
9377  if (IS_VOLATILE (func_type))
9378    return save_reg_mask;
9379
9380  save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
9381
9382  /* Decide if we need to save the link register.
9383     Interrupt routines have their own banked link register,
9384     so they never need to save it.
9385     Otherwise if we do not use the link register we do not need to save
9386     it.  If we are pushing other registers onto the stack however, we
9387     can save an instruction in the epilogue by pushing the link register
9388     now and then popping it back into the PC.  This incurs extra memory
9389     accesses though, so we only do it when optimizing for size, and only
9390     if we know that we will not need a fancy return sequence.  */
9391  if (regs_ever_live [LR_REGNUM]
9392	  || (save_reg_mask
9393	      && optimize_size
9394	      && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
9395	      && !current_function_calls_eh_return))
9396    save_reg_mask |= 1 << LR_REGNUM;
9397
9398  if (cfun->machine->lr_save_eliminated)
9399    save_reg_mask &= ~ (1 << LR_REGNUM);
9400
9401  if (TARGET_REALLY_IWMMXT
9402      && ((bit_count (save_reg_mask)
9403	   + ARM_NUM_INTS (current_function_pretend_args_size)) % 2) != 0)
9404    {
9405      unsigned int reg;
9406
9407      /* The total number of registers that are going to be pushed
9408	 onto the stack is odd.  We need to ensure that the stack
9409	 is 64-bit aligned before we start to save iWMMXt registers,
9410	 and also before we start to create locals.  (A local variable
9411	 might be a double or long long which we will load/store using
9412	 an iWMMXt instruction).  Therefore we need to push another
9413	 ARM register, so that the stack will be 64-bit aligned.  We
9414	 try to avoid using the arg registers (r0 -r3) as they might be
9415	 used to pass values in a tail call.  */
9416      for (reg = 4; reg <= 12; reg++)
9417	if ((save_reg_mask & (1 << reg)) == 0)
9418	  break;
9419
9420      if (reg <= 12)
9421	save_reg_mask |= (1 << reg);
9422      else
9423	{
9424	  cfun->machine->sibcall_blocked = 1;
9425	  save_reg_mask |= (1 << 3);
9426	}
9427    }
9428
9429  return save_reg_mask;
9430}
9431
9432
9433/* Compute a bit mask of which registers need to be
9434   saved on the stack for the current function.  */
9435static unsigned long
9436thumb_compute_save_reg_mask (void)
9437{
9438  unsigned long mask;
9439  unsigned reg;
9440
9441  mask = 0;
9442  for (reg = 0; reg < 12; reg ++)
9443    if (regs_ever_live[reg] && !call_used_regs[reg])
9444      mask |= 1 << reg;
9445
9446  if (flag_pic
9447      && !TARGET_SINGLE_PIC_BASE
9448      && arm_pic_register != INVALID_REGNUM
9449      && current_function_uses_pic_offset_table)
9450    mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
9451
9452  /* See if we might need r11 for calls to _interwork_r11_call_via_rN().  */
9453  if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
9454    mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
9455
9456  /* LR will also be pushed if any lo regs are pushed.  */
9457  if (mask & 0xff || thumb_force_lr_save ())
9458    mask |= (1 << LR_REGNUM);
9459
9460  /* Make sure we have a low work register if we need one.
9461     We will need one if we are going to push a high register,
9462     but we are not currently intending to push a low register.  */
9463  if ((mask & 0xff) == 0
9464      && ((mask & 0x0f00) || TARGET_BACKTRACE))
9465    {
9466      /* Use thumb_find_work_register to choose which register
9467	 we will use.  If the register is live then we will
9468	 have to push it.  Use LAST_LO_REGNUM as our fallback
9469	 choice for the register to select.  */
9470      reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
9471
9472      if (! call_used_regs[reg])
9473	mask |= 1 << reg;
9474    }
9475
9476  return mask;
9477}
9478
9479
9480/* Return the number of bytes required to save VFP registers.  */
9481static int
9482arm_get_vfp_saved_size (void)
9483{
9484  unsigned int regno;
9485  int count;
9486  int saved;
9487
9488  saved = 0;
9489  /* Space for saved VFP registers.  */
9490  if (TARGET_HARD_FLOAT && TARGET_VFP)
9491    {
9492      count = 0;
9493      for (regno = FIRST_VFP_REGNUM;
9494	   regno < LAST_VFP_REGNUM;
9495	   regno += 2)
9496	{
9497	  if ((!regs_ever_live[regno] || call_used_regs[regno])
9498	      && (!regs_ever_live[regno + 1] || call_used_regs[regno + 1]))
9499	    {
9500	      if (count > 0)
9501		{
9502		  /* Workaround ARM10 VFPr1 bug.  */
9503		  if (count == 2 && !arm_arch6)
9504		    count++;
9505		  saved += count * 8 + 4;
9506		}
9507	      count = 0;
9508	    }
9509	  else
9510	    count++;
9511	}
9512      if (count > 0)
9513	{
9514	  if (count == 2 && !arm_arch6)
9515	    count++;
9516	  saved += count * 8 + 4;
9517	}
9518    }
9519  return saved;
9520}
9521
9522
9523/* Generate a function exit sequence.  If REALLY_RETURN is false, then do
9524   everything bar the final return instruction.  */
9525const char *
9526output_return_instruction (rtx operand, int really_return, int reverse)
9527{
9528  char conditional[10];
9529  char instr[100];
9530  unsigned reg;
9531  unsigned long live_regs_mask;
9532  unsigned long func_type;
9533  arm_stack_offsets *offsets;
9534
9535  func_type = arm_current_func_type ();
9536
9537  if (IS_NAKED (func_type))
9538    return "";
9539
9540  if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
9541    {
9542      /* If this function was declared non-returning, and we have
9543	 found a tail call, then we have to trust that the called
9544	 function won't return.  */
9545      if (really_return)
9546	{
9547	  rtx ops[2];
9548
9549	  /* Otherwise, trap an attempted return by aborting.  */
9550	  ops[0] = operand;
9551	  ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
9552				       : "abort");
9553	  assemble_external_libcall (ops[1]);
9554	  output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
9555	}
9556
9557      return "";
9558    }
9559
9560  gcc_assert (!current_function_calls_alloca || really_return);
9561
9562  sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
9563
9564  return_used_this_function = 1;
9565
9566  live_regs_mask = arm_compute_save_reg_mask ();
9567
9568  if (live_regs_mask)
9569    {
9570      const char * return_reg;
9571
9572      /* If we do not have any special requirements for function exit
9573	 (e.g. interworking, or ISR) then we can load the return address
9574	 directly into the PC.  Otherwise we must load it into LR.  */
9575      if (really_return
9576	  && ! TARGET_INTERWORK)
9577	return_reg = reg_names[PC_REGNUM];
9578      else
9579	return_reg = reg_names[LR_REGNUM];
9580
9581      if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
9582	{
9583	  /* There are three possible reasons for the IP register
9584	     being saved.  1) a stack frame was created, in which case
9585	     IP contains the old stack pointer, or 2) an ISR routine
9586	     corrupted it, or 3) it was saved to align the stack on
9587	     iWMMXt.  In case 1, restore IP into SP, otherwise just
9588	     restore IP.  */
9589	  if (frame_pointer_needed)
9590	    {
9591	      live_regs_mask &= ~ (1 << IP_REGNUM);
9592	      live_regs_mask |=   (1 << SP_REGNUM);
9593	    }
9594	  else
9595	    gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
9596	}
9597
9598      /* On some ARM architectures it is faster to use LDR rather than
9599	 LDM to load a single register.  On other architectures, the
9600	 cost is the same.  In 26 bit mode, or for exception handlers,
9601	 we have to use LDM to load the PC so that the CPSR is also
9602	 restored.  */
9603      for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
9604	if (live_regs_mask == (1U << reg))
9605	  break;
9606
9607      if (reg <= LAST_ARM_REGNUM
9608	  && (reg != LR_REGNUM
9609	      || ! really_return
9610	      || ! IS_INTERRUPT (func_type)))
9611	{
9612	  sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
9613		   (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
9614	}
9615      else
9616	{
9617	  char *p;
9618	  int first = 1;
9619
9620	  /* Generate the load multiple instruction to restore the
9621	     registers.  Note we can get here, even if
9622	     frame_pointer_needed is true, but only if sp already
9623	     points to the base of the saved core registers.  */
9624	  if (live_regs_mask & (1 << SP_REGNUM))
9625	    {
9626	      unsigned HOST_WIDE_INT stack_adjust;
9627
9628	      offsets = arm_get_frame_offsets ();
9629	      stack_adjust = offsets->outgoing_args - offsets->saved_regs;
9630	      gcc_assert (stack_adjust == 0 || stack_adjust == 4);
9631
9632	      if (stack_adjust && arm_arch5)
9633		sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
9634	      else
9635		{
9636		  /* If we can't use ldmib (SA110 bug),
9637		     then try to pop r3 instead.  */
9638		  if (stack_adjust)
9639		    live_regs_mask |= 1 << 3;
9640		  sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
9641		}
9642	    }
9643	  else
9644	    sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
9645
9646	  p = instr + strlen (instr);
9647
9648	  for (reg = 0; reg <= SP_REGNUM; reg++)
9649	    if (live_regs_mask & (1 << reg))
9650	      {
9651		int l = strlen (reg_names[reg]);
9652
9653		if (first)
9654		  first = 0;
9655		else
9656		  {
9657		    memcpy (p, ", ", 2);
9658		    p += 2;
9659		  }
9660
9661		memcpy (p, "%|", 2);
9662		memcpy (p + 2, reg_names[reg], l);
9663		p += l + 2;
9664	      }
9665
9666	  if (live_regs_mask & (1 << LR_REGNUM))
9667	    {
9668	      sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
9669	      /* If returning from an interrupt, restore the CPSR.  */
9670	      if (IS_INTERRUPT (func_type))
9671		strcat (p, "^");
9672	    }
9673	  else
9674	    strcpy (p, "}");
9675	}
9676
9677      output_asm_insn (instr, & operand);
9678
9679      /* See if we need to generate an extra instruction to
9680	 perform the actual function return.  */
9681      if (really_return
9682	  && func_type != ARM_FT_INTERWORKED
9683	  && (live_regs_mask & (1 << LR_REGNUM)) != 0)
9684	{
9685	  /* The return has already been handled
9686	     by loading the LR into the PC.  */
9687	  really_return = 0;
9688	}
9689    }
9690
9691  if (really_return)
9692    {
9693      switch ((int) ARM_FUNC_TYPE (func_type))
9694	{
9695	case ARM_FT_ISR:
9696	case ARM_FT_FIQ:
9697	  sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
9698	  break;
9699
9700	case ARM_FT_INTERWORKED:
9701	  sprintf (instr, "bx%s\t%%|lr", conditional);
9702	  break;
9703
9704	case ARM_FT_EXCEPTION:
9705	  sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
9706	  break;
9707
9708	default:
9709	  /* Use bx if it's available.  */
9710	  if (arm_arch5 || arm_arch4t)
9711	    sprintf (instr, "bx%s\t%%|lr", conditional);
9712	  else
9713	    sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
9714	  break;
9715	}
9716
9717      output_asm_insn (instr, & operand);
9718    }
9719
9720  return "";
9721}
9722
9723/* Write the function name into the code section, directly preceding
9724   the function prologue.
9725
9726   Code will be output similar to this:
9727     t0
9728	 .ascii "arm_poke_function_name", 0
9729	 .align
9730     t1
9731	 .word 0xff000000 + (t1 - t0)
9732     arm_poke_function_name
9733	 mov     ip, sp
9734	 stmfd   sp!, {fp, ip, lr, pc}
9735	 sub     fp, ip, #4
9736
9737   When performing a stack backtrace, code can inspect the value
9738   of 'pc' stored at 'fp' + 0.  If the trace function then looks
9739   at location pc - 12 and the top 8 bits are set, then we know
9740   that there is a function name embedded immediately preceding this
9741   location and has length ((pc[-3]) & 0xff000000).
9742
9743   We assume that pc is declared as a pointer to an unsigned long.
9744
9745   It is of no benefit to output the function name if we are assembling
9746   a leaf function.  These function types will not contain a stack
9747   backtrace structure, therefore it is not possible to determine the
9748   function name.  */
9749void
9750arm_poke_function_name (FILE *stream, const char *name)
9751{
9752  unsigned long alignlength;
9753  unsigned long length;
9754  rtx           x;
9755
9756  length      = strlen (name) + 1;
9757  alignlength = ROUND_UP_WORD (length);
9758
9759  ASM_OUTPUT_ASCII (stream, name, length);
9760  ASM_OUTPUT_ALIGN (stream, 2);
9761  x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
9762  assemble_aligned_integer (UNITS_PER_WORD, x);
9763}
9764
9765/* Place some comments into the assembler stream
9766   describing the current function.  */
9767static void
9768arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
9769{
9770  unsigned long func_type;
9771
9772  if (!TARGET_ARM)
9773    {
9774      thumb_output_function_prologue (f, frame_size);
9775      return;
9776    }
9777
9778  /* Sanity check.  */
9779  gcc_assert (!arm_ccfsm_state && !arm_target_insn);
9780
9781  func_type = arm_current_func_type ();
9782
9783  switch ((int) ARM_FUNC_TYPE (func_type))
9784    {
9785    default:
9786    case ARM_FT_NORMAL:
9787      break;
9788    case ARM_FT_INTERWORKED:
9789      asm_fprintf (f, "\t%@ Function supports interworking.\n");
9790      break;
9791    case ARM_FT_ISR:
9792      asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
9793      break;
9794    case ARM_FT_FIQ:
9795      asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
9796      break;
9797    case ARM_FT_EXCEPTION:
9798      asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
9799      break;
9800    }
9801
9802  if (IS_NAKED (func_type))
9803    asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
9804
9805  if (IS_VOLATILE (func_type))
9806    asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
9807
9808  if (IS_NESTED (func_type))
9809    asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
9810
9811  asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
9812	       current_function_args_size,
9813	       current_function_pretend_args_size, frame_size);
9814
9815  asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
9816	       frame_pointer_needed,
9817	       cfun->machine->uses_anonymous_args);
9818
9819  if (cfun->machine->lr_save_eliminated)
9820    asm_fprintf (f, "\t%@ link register save eliminated.\n");
9821
9822  if (current_function_calls_eh_return)
9823    asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
9824
9825#ifdef AOF_ASSEMBLER
9826  if (flag_pic)
9827    asm_fprintf (f, "\tmov\t%r, %r\n", IP_REGNUM, PIC_OFFSET_TABLE_REGNUM);
9828#endif
9829
9830  return_used_this_function = 0;
9831}
9832
9833const char *
9834arm_output_epilogue (rtx sibling)
9835{
9836  int reg;
9837  unsigned long saved_regs_mask;
9838  unsigned long func_type;
9839  /* Floats_offset is the offset from the "virtual" frame.  In an APCS
9840     frame that is $fp + 4 for a non-variadic function.  */
9841  int floats_offset = 0;
9842  rtx operands[3];
9843  FILE * f = asm_out_file;
9844  unsigned int lrm_count = 0;
9845  int really_return = (sibling == NULL);
9846  int start_reg;
9847  arm_stack_offsets *offsets;
9848
9849  /* If we have already generated the return instruction
9850     then it is futile to generate anything else.  */
9851  if (use_return_insn (FALSE, sibling) && return_used_this_function)
9852    return "";
9853
9854  func_type = arm_current_func_type ();
9855
9856  if (IS_NAKED (func_type))
9857    /* Naked functions don't have epilogues.  */
9858    return "";
9859
9860  if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
9861    {
9862      rtx op;
9863
9864      /* A volatile function should never return.  Call abort.  */
9865      op = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)" : "abort");
9866      assemble_external_libcall (op);
9867      output_asm_insn ("bl\t%a0", &op);
9868
9869      return "";
9870    }
9871
9872  /* If we are throwing an exception, then we really must be doing a
9873     return, so we can't tail-call.  */
9874  gcc_assert (!current_function_calls_eh_return || really_return);
9875
9876  offsets = arm_get_frame_offsets ();
9877  saved_regs_mask = arm_compute_save_reg_mask ();
9878
9879  if (TARGET_IWMMXT)
9880    lrm_count = bit_count (saved_regs_mask);
9881
9882  floats_offset = offsets->saved_args;
9883  /* Compute how far away the floats will be.  */
9884  for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
9885    if (saved_regs_mask & (1 << reg))
9886      floats_offset += 4;
9887
9888  if (frame_pointer_needed)
9889    {
9890      /* This variable is for the Virtual Frame Pointer, not VFP regs.  */
9891      int vfp_offset = offsets->frame;
9892
9893      if (arm_fpu_arch == FPUTYPE_FPA_EMU2)
9894	{
9895	  for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
9896	    if (regs_ever_live[reg] && !call_used_regs[reg])
9897	      {
9898		floats_offset += 12;
9899		asm_fprintf (f, "\tldfe\t%r, [%r, #-%d]\n",
9900			     reg, FP_REGNUM, floats_offset - vfp_offset);
9901	      }
9902	}
9903      else
9904	{
9905	  start_reg = LAST_FPA_REGNUM;
9906
9907	  for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
9908	    {
9909	      if (regs_ever_live[reg] && !call_used_regs[reg])
9910		{
9911		  floats_offset += 12;
9912
9913		  /* We can't unstack more than four registers at once.  */
9914		  if (start_reg - reg == 3)
9915		    {
9916		      asm_fprintf (f, "\tlfm\t%r, 4, [%r, #-%d]\n",
9917			           reg, FP_REGNUM, floats_offset - vfp_offset);
9918		      start_reg = reg - 1;
9919		    }
9920		}
9921	      else
9922		{
9923		  if (reg != start_reg)
9924		    asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
9925				 reg + 1, start_reg - reg,
9926				 FP_REGNUM, floats_offset - vfp_offset);
9927		  start_reg = reg - 1;
9928		}
9929	    }
9930
9931	  /* Just in case the last register checked also needs unstacking.  */
9932	  if (reg != start_reg)
9933	    asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
9934			 reg + 1, start_reg - reg,
9935			 FP_REGNUM, floats_offset - vfp_offset);
9936	}
9937
9938      if (TARGET_HARD_FLOAT && TARGET_VFP)
9939	{
9940	  int saved_size;
9941
9942	  /* The fldmx insn does not have base+offset addressing modes,
9943	     so we use IP to hold the address.  */
9944	  saved_size = arm_get_vfp_saved_size ();
9945
9946	  if (saved_size > 0)
9947	    {
9948	      floats_offset += saved_size;
9949	      asm_fprintf (f, "\tsub\t%r, %r, #%d\n", IP_REGNUM,
9950			   FP_REGNUM, floats_offset - vfp_offset);
9951	    }
9952	  start_reg = FIRST_VFP_REGNUM;
9953	  for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
9954	    {
9955	      if ((!regs_ever_live[reg] || call_used_regs[reg])
9956		  && (!regs_ever_live[reg + 1] || call_used_regs[reg + 1]))
9957		{
9958		  if (start_reg != reg)
9959		    arm_output_fldmx (f, IP_REGNUM,
9960				      (start_reg - FIRST_VFP_REGNUM) / 2,
9961				      (reg - start_reg) / 2);
9962		  start_reg = reg + 2;
9963		}
9964	    }
9965	  if (start_reg != reg)
9966	    arm_output_fldmx (f, IP_REGNUM,
9967			      (start_reg - FIRST_VFP_REGNUM) / 2,
9968			      (reg - start_reg) / 2);
9969	}
9970
9971      if (TARGET_IWMMXT)
9972	{
9973	  /* The frame pointer is guaranteed to be non-double-word aligned.
9974	     This is because it is set to (old_stack_pointer - 4) and the
9975	     old_stack_pointer was double word aligned.  Thus the offset to
9976	     the iWMMXt registers to be loaded must also be non-double-word
9977	     sized, so that the resultant address *is* double-word aligned.
9978	     We can ignore floats_offset since that was already included in
9979	     the live_regs_mask.  */
9980	  lrm_count += (lrm_count % 2 ? 2 : 1);
9981
9982	  for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
9983	    if (regs_ever_live[reg] && !call_used_regs[reg])
9984	      {
9985		asm_fprintf (f, "\twldrd\t%r, [%r, #-%d]\n",
9986			     reg, FP_REGNUM, lrm_count * 4);
9987		lrm_count += 2;
9988	      }
9989	}
9990
9991      /* saved_regs_mask should contain the IP, which at the time of stack
9992	 frame generation actually contains the old stack pointer.  So a
9993	 quick way to unwind the stack is just pop the IP register directly
9994	 into the stack pointer.  */
9995      gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
9996      saved_regs_mask &= ~ (1 << IP_REGNUM);
9997      saved_regs_mask |=   (1 << SP_REGNUM);
9998
9999      /* There are two registers left in saved_regs_mask - LR and PC.  We
10000	 only need to restore the LR register (the return address), but to
10001	 save time we can load it directly into the PC, unless we need a
10002	 special function exit sequence, or we are not really returning.  */
10003      if (really_return
10004	  && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
10005	  && !current_function_calls_eh_return)
10006	/* Delete the LR from the register mask, so that the LR on
10007	   the stack is loaded into the PC in the register mask.  */
10008	saved_regs_mask &= ~ (1 << LR_REGNUM);
10009      else
10010	saved_regs_mask &= ~ (1 << PC_REGNUM);
10011
10012      /* We must use SP as the base register, because SP is one of the
10013         registers being restored.  If an interrupt or page fault
10014         happens in the ldm instruction, the SP might or might not
10015         have been restored.  That would be bad, as then SP will no
10016         longer indicate the safe area of stack, and we can get stack
10017         corruption.  Using SP as the base register means that it will
10018         be reset correctly to the original value, should an interrupt
10019         occur.  If the stack pointer already points at the right
10020         place, then omit the subtraction.  */
10021      if (offsets->outgoing_args != (1 + (int) bit_count (saved_regs_mask))
10022	  || current_function_calls_alloca)
10023	asm_fprintf (f, "\tsub\t%r, %r, #%d\n", SP_REGNUM, FP_REGNUM,
10024		     4 * bit_count (saved_regs_mask));
10025      print_multi_reg (f, "ldmfd\t%r", SP_REGNUM, saved_regs_mask);
10026
10027      if (IS_INTERRUPT (func_type))
10028	/* Interrupt handlers will have pushed the
10029	   IP onto the stack, so restore it now.  */
10030	print_multi_reg (f, "ldmfd\t%r!", SP_REGNUM, 1 << IP_REGNUM);
10031    }
10032  else
10033    {
10034      /* Restore stack pointer if necessary.  */
10035      if (offsets->outgoing_args != offsets->saved_regs)
10036	{
10037	  operands[0] = operands[1] = stack_pointer_rtx;
10038	  operands[2] = GEN_INT (offsets->outgoing_args - offsets->saved_regs);
10039	  output_add_immediate (operands);
10040	}
10041
10042      if (arm_fpu_arch == FPUTYPE_FPA_EMU2)
10043	{
10044	  for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
10045	    if (regs_ever_live[reg] && !call_used_regs[reg])
10046	      asm_fprintf (f, "\tldfe\t%r, [%r], #12\n",
10047			   reg, SP_REGNUM);
10048	}
10049      else
10050	{
10051	  start_reg = FIRST_FPA_REGNUM;
10052
10053	  for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
10054	    {
10055	      if (regs_ever_live[reg] && !call_used_regs[reg])
10056		{
10057		  if (reg - start_reg == 3)
10058		    {
10059		      asm_fprintf (f, "\tlfmfd\t%r, 4, [%r]!\n",
10060				   start_reg, SP_REGNUM);
10061		      start_reg = reg + 1;
10062		    }
10063		}
10064	      else
10065		{
10066		  if (reg != start_reg)
10067		    asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
10068				 start_reg, reg - start_reg,
10069				 SP_REGNUM);
10070
10071		  start_reg = reg + 1;
10072		}
10073	    }
10074
10075	  /* Just in case the last register checked also needs unstacking.  */
10076	  if (reg != start_reg)
10077	    asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
10078			 start_reg, reg - start_reg, SP_REGNUM);
10079	}
10080
10081      if (TARGET_HARD_FLOAT && TARGET_VFP)
10082	{
10083	  start_reg = FIRST_VFP_REGNUM;
10084	  for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
10085	    {
10086	      if ((!regs_ever_live[reg] || call_used_regs[reg])
10087		  && (!regs_ever_live[reg + 1] || call_used_regs[reg + 1]))
10088		{
10089		  if (start_reg != reg)
10090		    arm_output_fldmx (f, SP_REGNUM,
10091				      (start_reg - FIRST_VFP_REGNUM) / 2,
10092				      (reg - start_reg) / 2);
10093		  start_reg = reg + 2;
10094		}
10095	    }
10096	  if (start_reg != reg)
10097	    arm_output_fldmx (f, SP_REGNUM,
10098			      (start_reg - FIRST_VFP_REGNUM) / 2,
10099			      (reg - start_reg) / 2);
10100	}
10101      if (TARGET_IWMMXT)
10102	for (reg = FIRST_IWMMXT_REGNUM; reg <= LAST_IWMMXT_REGNUM; reg++)
10103	  if (regs_ever_live[reg] && !call_used_regs[reg])
10104	    asm_fprintf (f, "\twldrd\t%r, [%r], #8\n", reg, SP_REGNUM);
10105
10106      /* If we can, restore the LR into the PC.  */
10107      if (ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
10108	  && really_return
10109	  && current_function_pretend_args_size == 0
10110	  && saved_regs_mask & (1 << LR_REGNUM)
10111	  && !current_function_calls_eh_return)
10112	{
10113	  saved_regs_mask &= ~ (1 << LR_REGNUM);
10114	  saved_regs_mask |=   (1 << PC_REGNUM);
10115	}
10116
10117      /* Load the registers off the stack.  If we only have one register
10118	 to load use the LDR instruction - it is faster.  */
10119      if (saved_regs_mask == (1 << LR_REGNUM))
10120	{
10121	  asm_fprintf (f, "\tldr\t%r, [%r], #4\n", LR_REGNUM, SP_REGNUM);
10122	}
10123      else if (saved_regs_mask)
10124	{
10125	  if (saved_regs_mask & (1 << SP_REGNUM))
10126	    /* Note - write back to the stack register is not enabled
10127	       (i.e. "ldmfd sp!...").  We know that the stack pointer is
10128	       in the list of registers and if we add writeback the
10129	       instruction becomes UNPREDICTABLE.  */
10130	    print_multi_reg (f, "ldmfd\t%r", SP_REGNUM, saved_regs_mask);
10131	  else
10132	    print_multi_reg (f, "ldmfd\t%r!", SP_REGNUM, saved_regs_mask);
10133	}
10134
10135      if (current_function_pretend_args_size)
10136	{
10137	  /* Unwind the pre-pushed regs.  */
10138	  operands[0] = operands[1] = stack_pointer_rtx;
10139	  operands[2] = GEN_INT (current_function_pretend_args_size);
10140	  output_add_immediate (operands);
10141	}
10142    }
10143
10144  /* We may have already restored PC directly from the stack.  */
10145  if (!really_return || saved_regs_mask & (1 << PC_REGNUM))
10146    return "";
10147
10148  /* Stack adjustment for exception handler.  */
10149  if (current_function_calls_eh_return)
10150    asm_fprintf (f, "\tadd\t%r, %r, %r\n", SP_REGNUM, SP_REGNUM,
10151		 ARM_EH_STACKADJ_REGNUM);
10152
10153  /* Generate the return instruction.  */
10154  switch ((int) ARM_FUNC_TYPE (func_type))
10155    {
10156    case ARM_FT_ISR:
10157    case ARM_FT_FIQ:
10158      asm_fprintf (f, "\tsubs\t%r, %r, #4\n", PC_REGNUM, LR_REGNUM);
10159      break;
10160
10161    case ARM_FT_EXCEPTION:
10162      asm_fprintf (f, "\tmovs\t%r, %r\n", PC_REGNUM, LR_REGNUM);
10163      break;
10164
10165    case ARM_FT_INTERWORKED:
10166      asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
10167      break;
10168
10169    default:
10170      if (arm_arch5 || arm_arch4t)
10171	asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
10172      else
10173	asm_fprintf (f, "\tmov\t%r, %r\n", PC_REGNUM, LR_REGNUM);
10174      break;
10175    }
10176
10177  return "";
10178}
10179
10180static void
10181arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
10182			      HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
10183{
10184  arm_stack_offsets *offsets;
10185
10186  if (TARGET_THUMB)
10187    {
10188      int regno;
10189
10190      /* Emit any call-via-reg trampolines that are needed for v4t support
10191	 of call_reg and call_value_reg type insns.  */
10192      for (regno = 0; regno < LR_REGNUM; regno++)
10193	{
10194	  rtx label = cfun->machine->call_via[regno];
10195
10196	  if (label != NULL)
10197	    {
10198	      switch_to_section (function_section (current_function_decl));
10199	      targetm.asm_out.internal_label (asm_out_file, "L",
10200					      CODE_LABEL_NUMBER (label));
10201	      asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
10202	    }
10203	}
10204
10205      /* ??? Probably not safe to set this here, since it assumes that a
10206	 function will be emitted as assembly immediately after we generate
10207	 RTL for it.  This does not happen for inline functions.  */
10208      return_used_this_function = 0;
10209    }
10210  else
10211    {
10212      /* We need to take into account any stack-frame rounding.  */
10213      offsets = arm_get_frame_offsets ();
10214
10215      gcc_assert (!use_return_insn (FALSE, NULL)
10216		  || !return_used_this_function
10217		  || offsets->saved_regs == offsets->outgoing_args
10218		  || frame_pointer_needed);
10219
10220      /* Reset the ARM-specific per-function variables.  */
10221      after_arm_reorg = 0;
10222    }
10223}
10224
10225/* Generate and emit an insn that we will recognize as a push_multi.
10226   Unfortunately, since this insn does not reflect very well the actual
10227   semantics of the operation, we need to annotate the insn for the benefit
10228   of DWARF2 frame unwind information.  */
10229static rtx
10230emit_multi_reg_push (unsigned long mask)
10231{
10232  int num_regs = 0;
10233  int num_dwarf_regs;
10234  int i, j;
10235  rtx par;
10236  rtx dwarf;
10237  int dwarf_par_index;
10238  rtx tmp, reg;
10239
10240  for (i = 0; i <= LAST_ARM_REGNUM; i++)
10241    if (mask & (1 << i))
10242      num_regs++;
10243
10244  gcc_assert (num_regs && num_regs <= 16);
10245
10246  /* We don't record the PC in the dwarf frame information.  */
10247  num_dwarf_regs = num_regs;
10248  if (mask & (1 << PC_REGNUM))
10249    num_dwarf_regs--;
10250
10251  /* For the body of the insn we are going to generate an UNSPEC in
10252     parallel with several USEs.  This allows the insn to be recognized
10253     by the push_multi pattern in the arm.md file.  The insn looks
10254     something like this:
10255
10256       (parallel [
10257           (set (mem:BLK (pre_dec:BLK (reg:SI sp)))
10258	        (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
10259           (use (reg:SI 11 fp))
10260           (use (reg:SI 12 ip))
10261           (use (reg:SI 14 lr))
10262           (use (reg:SI 15 pc))
10263        ])
10264
10265     For the frame note however, we try to be more explicit and actually
10266     show each register being stored into the stack frame, plus a (single)
10267     decrement of the stack pointer.  We do it this way in order to be
10268     friendly to the stack unwinding code, which only wants to see a single
10269     stack decrement per instruction.  The RTL we generate for the note looks
10270     something like this:
10271
10272      (sequence [
10273           (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
10274           (set (mem:SI (reg:SI sp)) (reg:SI r4))
10275           (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI fp))
10276           (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI ip))
10277           (set (mem:SI (plus:SI (reg:SI sp) (const_int 12))) (reg:SI lr))
10278        ])
10279
10280      This sequence is used both by the code to support stack unwinding for
10281      exceptions handlers and the code to generate dwarf2 frame debugging.  */
10282
10283  par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
10284  dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
10285  dwarf_par_index = 1;
10286
10287  for (i = 0; i <= LAST_ARM_REGNUM; i++)
10288    {
10289      if (mask & (1 << i))
10290	{
10291	  reg = gen_rtx_REG (SImode, i);
10292
10293	  XVECEXP (par, 0, 0)
10294	    = gen_rtx_SET (VOIDmode,
10295			   gen_frame_mem (BLKmode,
10296					  gen_rtx_PRE_DEC (BLKmode,
10297							   stack_pointer_rtx)),
10298			   gen_rtx_UNSPEC (BLKmode,
10299					   gen_rtvec (1, reg),
10300					   UNSPEC_PUSH_MULT));
10301
10302	  if (i != PC_REGNUM)
10303	    {
10304	      tmp = gen_rtx_SET (VOIDmode,
10305				 gen_frame_mem (SImode, stack_pointer_rtx),
10306				 reg);
10307	      RTX_FRAME_RELATED_P (tmp) = 1;
10308	      XVECEXP (dwarf, 0, dwarf_par_index) = tmp;
10309	      dwarf_par_index++;
10310	    }
10311
10312	  break;
10313	}
10314    }
10315
10316  for (j = 1, i++; j < num_regs; i++)
10317    {
10318      if (mask & (1 << i))
10319	{
10320	  reg = gen_rtx_REG (SImode, i);
10321
10322	  XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
10323
10324	  if (i != PC_REGNUM)
10325	    {
10326	      tmp
10327		= gen_rtx_SET (VOIDmode,
10328			       gen_frame_mem (SImode,
10329					      plus_constant (stack_pointer_rtx,
10330							     4 * j)),
10331			       reg);
10332	      RTX_FRAME_RELATED_P (tmp) = 1;
10333	      XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
10334	    }
10335
10336	  j++;
10337	}
10338    }
10339
10340  par = emit_insn (par);
10341
10342  tmp = gen_rtx_SET (VOIDmode,
10343		     stack_pointer_rtx,
10344		     plus_constant (stack_pointer_rtx, -4 * num_regs));
10345  RTX_FRAME_RELATED_P (tmp) = 1;
10346  XVECEXP (dwarf, 0, 0) = tmp;
10347
10348  REG_NOTES (par) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
10349				       REG_NOTES (par));
10350  return par;
10351}
10352
10353/* Calculate the size of the return value that is passed in registers.  */
10354static int
10355arm_size_return_regs (void)
10356{
10357  enum machine_mode mode;
10358
10359  if (current_function_return_rtx != 0)
10360    mode = GET_MODE (current_function_return_rtx);
10361  else
10362    mode = DECL_MODE (DECL_RESULT (current_function_decl));
10363
10364  return GET_MODE_SIZE (mode);
10365}
10366
10367static rtx
10368emit_sfm (int base_reg, int count)
10369{
10370  rtx par;
10371  rtx dwarf;
10372  rtx tmp, reg;
10373  int i;
10374
10375  par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
10376  dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
10377
10378  reg = gen_rtx_REG (XFmode, base_reg++);
10379
10380  XVECEXP (par, 0, 0)
10381    = gen_rtx_SET (VOIDmode,
10382		   gen_frame_mem (BLKmode,
10383				  gen_rtx_PRE_DEC (BLKmode,
10384						   stack_pointer_rtx)),
10385		   gen_rtx_UNSPEC (BLKmode,
10386				   gen_rtvec (1, reg),
10387				   UNSPEC_PUSH_MULT));
10388  tmp = gen_rtx_SET (VOIDmode,
10389		     gen_frame_mem (XFmode, stack_pointer_rtx), reg);
10390  RTX_FRAME_RELATED_P (tmp) = 1;
10391  XVECEXP (dwarf, 0, 1) = tmp;
10392
10393  for (i = 1; i < count; i++)
10394    {
10395      reg = gen_rtx_REG (XFmode, base_reg++);
10396      XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
10397
10398      tmp = gen_rtx_SET (VOIDmode,
10399			 gen_frame_mem (XFmode,
10400					plus_constant (stack_pointer_rtx,
10401						       i * 12)),
10402			 reg);
10403      RTX_FRAME_RELATED_P (tmp) = 1;
10404      XVECEXP (dwarf, 0, i + 1) = tmp;
10405    }
10406
10407  tmp = gen_rtx_SET (VOIDmode,
10408		     stack_pointer_rtx,
10409		     plus_constant (stack_pointer_rtx, -12 * count));
10410
10411  RTX_FRAME_RELATED_P (tmp) = 1;
10412  XVECEXP (dwarf, 0, 0) = tmp;
10413
10414  par = emit_insn (par);
10415  REG_NOTES (par) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
10416				       REG_NOTES (par));
10417  return par;
10418}
10419
10420
10421/* Return true if the current function needs to save/restore LR.  */
10422
10423static bool
10424thumb_force_lr_save (void)
10425{
10426  return !cfun->machine->lr_save_eliminated
10427	 && (!leaf_function_p ()
10428	     || thumb_far_jump_used_p ()
10429	     || regs_ever_live [LR_REGNUM]);
10430}
10431
10432
10433/* Compute the distance from register FROM to register TO.
10434   These can be the arg pointer (26), the soft frame pointer (25),
10435   the stack pointer (13) or the hard frame pointer (11).
10436   In thumb mode r7 is used as the soft frame pointer, if needed.
10437   Typical stack layout looks like this:
10438
10439       old stack pointer -> |    |
10440                             ----
10441                            |    | \
10442                            |    |   saved arguments for
10443                            |    |   vararg functions
10444			    |    | /
10445                              --
10446   hard FP & arg pointer -> |    | \
10447                            |    |   stack
10448                            |    |   frame
10449                            |    | /
10450                              --
10451                            |    | \
10452                            |    |   call saved
10453                            |    |   registers
10454      soft frame pointer -> |    | /
10455                              --
10456                            |    | \
10457                            |    |   local
10458                            |    |   variables
10459     locals base pointer -> |    | /
10460                              --
10461                            |    | \
10462                            |    |   outgoing
10463                            |    |   arguments
10464   current stack pointer -> |    | /
10465                              --
10466
10467  For a given function some or all of these stack components
10468  may not be needed, giving rise to the possibility of
10469  eliminating some of the registers.
10470
10471  The values returned by this function must reflect the behavior
10472  of arm_expand_prologue() and arm_compute_save_reg_mask().
10473
10474  The sign of the number returned reflects the direction of stack
10475  growth, so the values are positive for all eliminations except
10476  from the soft frame pointer to the hard frame pointer.
10477
10478  SFP may point just inside the local variables block to ensure correct
10479  alignment.  */
10480
10481
10482/* Calculate stack offsets.  These are used to calculate register elimination
10483   offsets and in prologue/epilogue code.  */
10484
10485static arm_stack_offsets *
10486arm_get_frame_offsets (void)
10487{
10488  struct arm_stack_offsets *offsets;
10489  unsigned long func_type;
10490  int leaf;
10491  int saved;
10492  HOST_WIDE_INT frame_size;
10493
10494  offsets = &cfun->machine->stack_offsets;
10495
10496  /* We need to know if we are a leaf function.  Unfortunately, it
10497     is possible to be called after start_sequence has been called,
10498     which causes get_insns to return the insns for the sequence,
10499     not the function, which will cause leaf_function_p to return
10500     the incorrect result.
10501
10502     to know about leaf functions once reload has completed, and the
10503     frame size cannot be changed after that time, so we can safely
10504     use the cached value.  */
10505
10506  if (reload_completed)
10507    return offsets;
10508
10509  /* Initially this is the size of the local variables.  It will translated
10510     into an offset once we have determined the size of preceding data.  */
10511  frame_size = ROUND_UP_WORD (get_frame_size ());
10512
10513  leaf = leaf_function_p ();
10514
10515  /* Space for variadic functions.  */
10516  offsets->saved_args = current_function_pretend_args_size;
10517
10518  offsets->frame = offsets->saved_args + (frame_pointer_needed ? 4 : 0);
10519
10520  if (TARGET_ARM)
10521    {
10522      unsigned int regno;
10523
10524      saved = bit_count (arm_compute_save_reg_mask ()) * 4;
10525
10526      /* We know that SP will be doubleword aligned on entry, and we must
10527	 preserve that condition at any subroutine call.  We also require the
10528	 soft frame pointer to be doubleword aligned.  */
10529
10530      if (TARGET_REALLY_IWMMXT)
10531	{
10532	  /* Check for the call-saved iWMMXt registers.  */
10533	  for (regno = FIRST_IWMMXT_REGNUM;
10534	       regno <= LAST_IWMMXT_REGNUM;
10535	       regno++)
10536	    if (regs_ever_live [regno] && ! call_used_regs [regno])
10537	      saved += 8;
10538	}
10539
10540      func_type = arm_current_func_type ();
10541      if (! IS_VOLATILE (func_type))
10542	{
10543	  /* Space for saved FPA registers.  */
10544	  for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
10545	  if (regs_ever_live[regno] && ! call_used_regs[regno])
10546	    saved += 12;
10547
10548	  /* Space for saved VFP registers.  */
10549	  if (TARGET_HARD_FLOAT && TARGET_VFP)
10550	    saved += arm_get_vfp_saved_size ();
10551	}
10552    }
10553  else /* TARGET_THUMB */
10554    {
10555      saved = bit_count (thumb_compute_save_reg_mask ()) * 4;
10556      if (TARGET_BACKTRACE)
10557	saved += 16;
10558    }
10559
10560  /* Saved registers include the stack frame.  */
10561  offsets->saved_regs = offsets->saved_args + saved;
10562  offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
10563  /* A leaf function does not need any stack alignment if it has nothing
10564     on the stack.  */
10565  if (leaf && frame_size == 0)
10566    {
10567      offsets->outgoing_args = offsets->soft_frame;
10568      offsets->locals_base = offsets->soft_frame;
10569      return offsets;
10570    }
10571
10572  /* Ensure SFP has the correct alignment.  */
10573  if (ARM_DOUBLEWORD_ALIGN
10574      && (offsets->soft_frame & 7))
10575    offsets->soft_frame += 4;
10576
10577  offsets->locals_base = offsets->soft_frame + frame_size;
10578  offsets->outgoing_args = (offsets->locals_base
10579			    + current_function_outgoing_args_size);
10580
10581  if (ARM_DOUBLEWORD_ALIGN)
10582    {
10583      /* Ensure SP remains doubleword aligned.  */
10584      if (offsets->outgoing_args & 7)
10585	offsets->outgoing_args += 4;
10586      gcc_assert (!(offsets->outgoing_args & 7));
10587    }
10588
10589  return offsets;
10590}
10591
10592
10593/* Calculate the relative offsets for the different stack pointers.  Positive
10594   offsets are in the direction of stack growth.  */
10595
10596HOST_WIDE_INT
10597arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
10598{
10599  arm_stack_offsets *offsets;
10600
10601  offsets = arm_get_frame_offsets ();
10602
10603  /* OK, now we have enough information to compute the distances.
10604     There must be an entry in these switch tables for each pair
10605     of registers in ELIMINABLE_REGS, even if some of the entries
10606     seem to be redundant or useless.  */
10607  switch (from)
10608    {
10609    case ARG_POINTER_REGNUM:
10610      switch (to)
10611	{
10612	case THUMB_HARD_FRAME_POINTER_REGNUM:
10613	  return 0;
10614
10615	case FRAME_POINTER_REGNUM:
10616	  /* This is the reverse of the soft frame pointer
10617	     to hard frame pointer elimination below.  */
10618	  return offsets->soft_frame - offsets->saved_args;
10619
10620	case ARM_HARD_FRAME_POINTER_REGNUM:
10621	  /* If there is no stack frame then the hard
10622	     frame pointer and the arg pointer coincide.  */
10623	  if (offsets->frame == offsets->saved_regs)
10624	    return 0;
10625	  /* FIXME:  Not sure about this.  Maybe we should always return 0 ?  */
10626	  return (frame_pointer_needed
10627		  && cfun->static_chain_decl != NULL
10628		  && ! cfun->machine->uses_anonymous_args) ? 4 : 0;
10629
10630	case STACK_POINTER_REGNUM:
10631	  /* If nothing has been pushed on the stack at all
10632	     then this will return -4.  This *is* correct!  */
10633	  return offsets->outgoing_args - (offsets->saved_args + 4);
10634
10635	default:
10636	  gcc_unreachable ();
10637	}
10638      gcc_unreachable ();
10639
10640    case FRAME_POINTER_REGNUM:
10641      switch (to)
10642	{
10643	case THUMB_HARD_FRAME_POINTER_REGNUM:
10644	  return 0;
10645
10646	case ARM_HARD_FRAME_POINTER_REGNUM:
10647	  /* The hard frame pointer points to the top entry in the
10648	     stack frame.  The soft frame pointer to the bottom entry
10649	     in the stack frame.  If there is no stack frame at all,
10650	     then they are identical.  */
10651
10652	  return offsets->frame - offsets->soft_frame;
10653
10654	case STACK_POINTER_REGNUM:
10655	  return offsets->outgoing_args - offsets->soft_frame;
10656
10657	default:
10658	  gcc_unreachable ();
10659	}
10660      gcc_unreachable ();
10661
10662    default:
10663      /* You cannot eliminate from the stack pointer.
10664	 In theory you could eliminate from the hard frame
10665	 pointer to the stack pointer, but this will never
10666	 happen, since if a stack frame is not needed the
10667	 hard frame pointer will never be used.  */
10668      gcc_unreachable ();
10669    }
10670}
10671
10672
10673/* Generate the prologue instructions for entry into an ARM function.  */
10674void
10675arm_expand_prologue (void)
10676{
10677  int reg;
10678  rtx amount;
10679  rtx insn;
10680  rtx ip_rtx;
10681  unsigned long live_regs_mask;
10682  unsigned long func_type;
10683  int fp_offset = 0;
10684  int saved_pretend_args = 0;
10685  int saved_regs = 0;
10686  unsigned HOST_WIDE_INT args_to_push;
10687  arm_stack_offsets *offsets;
10688
10689  func_type = arm_current_func_type ();
10690
10691  /* Naked functions don't have prologues.  */
10692  if (IS_NAKED (func_type))
10693    return;
10694
10695  /* Make a copy of c_f_p_a_s as we may need to modify it locally.  */
10696  args_to_push = current_function_pretend_args_size;
10697
10698  /* Compute which register we will have to save onto the stack.  */
10699  live_regs_mask = arm_compute_save_reg_mask ();
10700
10701  ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
10702
10703  if (frame_pointer_needed)
10704    {
10705      if (IS_INTERRUPT (func_type))
10706	{
10707	  /* Interrupt functions must not corrupt any registers.
10708	     Creating a frame pointer however, corrupts the IP
10709	     register, so we must push it first.  */
10710	  insn = emit_multi_reg_push (1 << IP_REGNUM);
10711
10712	  /* Do not set RTX_FRAME_RELATED_P on this insn.
10713	     The dwarf stack unwinding code only wants to see one
10714	     stack decrement per function, and this is not it.  If
10715	     this instruction is labeled as being part of the frame
10716	     creation sequence then dwarf2out_frame_debug_expr will
10717	     die when it encounters the assignment of IP to FP
10718	     later on, since the use of SP here establishes SP as
10719	     the CFA register and not IP.
10720
10721	     Anyway this instruction is not really part of the stack
10722	     frame creation although it is part of the prologue.  */
10723	}
10724      else if (IS_NESTED (func_type))
10725	{
10726	  /* The Static chain register is the same as the IP register
10727	     used as a scratch register during stack frame creation.
10728	     To get around this need to find somewhere to store IP
10729	     whilst the frame is being created.  We try the following
10730	     places in order:
10731
10732	       1. The last argument register.
10733	       2. A slot on the stack above the frame.  (This only
10734	          works if the function is not a varargs function).
10735	       3. Register r3, after pushing the argument registers
10736	          onto the stack.
10737
10738	     Note - we only need to tell the dwarf2 backend about the SP
10739	     adjustment in the second variant; the static chain register
10740	     doesn't need to be unwound, as it doesn't contain a value
10741	     inherited from the caller.  */
10742
10743	  if (regs_ever_live[3] == 0)
10744	    insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
10745	  else if (args_to_push == 0)
10746	    {
10747	      rtx dwarf;
10748
10749	      insn = gen_rtx_PRE_DEC (SImode, stack_pointer_rtx);
10750	      insn = emit_set_insn (gen_frame_mem (SImode, insn), ip_rtx);
10751	      fp_offset = 4;
10752
10753	      /* Just tell the dwarf backend that we adjusted SP.  */
10754	      dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10755				   plus_constant (stack_pointer_rtx,
10756						  -fp_offset));
10757	      RTX_FRAME_RELATED_P (insn) = 1;
10758	      REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
10759						    dwarf, REG_NOTES (insn));
10760	    }
10761	  else
10762	    {
10763	      /* Store the args on the stack.  */
10764	      if (cfun->machine->uses_anonymous_args)
10765		insn = emit_multi_reg_push
10766		  ((0xf0 >> (args_to_push / 4)) & 0xf);
10767	      else
10768		insn = emit_insn
10769		  (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
10770			       GEN_INT (- args_to_push)));
10771
10772	      RTX_FRAME_RELATED_P (insn) = 1;
10773
10774	      saved_pretend_args = 1;
10775	      fp_offset = args_to_push;
10776	      args_to_push = 0;
10777
10778	      /* Now reuse r3 to preserve IP.  */
10779	      emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
10780	    }
10781	}
10782
10783      insn = emit_set_insn (ip_rtx,
10784			    plus_constant (stack_pointer_rtx, fp_offset));
10785      RTX_FRAME_RELATED_P (insn) = 1;
10786    }
10787
10788  if (args_to_push)
10789    {
10790      /* Push the argument registers, or reserve space for them.  */
10791      if (cfun->machine->uses_anonymous_args)
10792	insn = emit_multi_reg_push
10793	  ((0xf0 >> (args_to_push / 4)) & 0xf);
10794      else
10795	insn = emit_insn
10796	  (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
10797		       GEN_INT (- args_to_push)));
10798      RTX_FRAME_RELATED_P (insn) = 1;
10799    }
10800
10801  /* If this is an interrupt service routine, and the link register
10802     is going to be pushed, and we are not creating a stack frame,
10803     (which would involve an extra push of IP and a pop in the epilogue)
10804     subtracting four from LR now will mean that the function return
10805     can be done with a single instruction.  */
10806  if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
10807      && (live_regs_mask & (1 << LR_REGNUM)) != 0
10808      && ! frame_pointer_needed)
10809    {
10810      rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
10811
10812      emit_set_insn (lr, plus_constant (lr, -4));
10813    }
10814
10815  if (live_regs_mask)
10816    {
10817      insn = emit_multi_reg_push (live_regs_mask);
10818      saved_regs += bit_count (live_regs_mask) * 4;
10819      RTX_FRAME_RELATED_P (insn) = 1;
10820    }
10821
10822  if (TARGET_IWMMXT)
10823    for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
10824      if (regs_ever_live[reg] && ! call_used_regs [reg])
10825	{
10826	  insn = gen_rtx_PRE_DEC (V2SImode, stack_pointer_rtx);
10827	  insn = gen_frame_mem (V2SImode, insn);
10828	  insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
10829	  RTX_FRAME_RELATED_P (insn) = 1;
10830	  saved_regs += 8;
10831	}
10832
10833  if (! IS_VOLATILE (func_type))
10834    {
10835      int start_reg;
10836
10837      /* Save any floating point call-saved registers used by this
10838	 function.  */
10839      if (arm_fpu_arch == FPUTYPE_FPA_EMU2)
10840	{
10841	  for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
10842	    if (regs_ever_live[reg] && !call_used_regs[reg])
10843	      {
10844		insn = gen_rtx_PRE_DEC (XFmode, stack_pointer_rtx);
10845		insn = gen_frame_mem (XFmode, insn);
10846		insn = emit_set_insn (insn, gen_rtx_REG (XFmode, reg));
10847		RTX_FRAME_RELATED_P (insn) = 1;
10848		saved_regs += 12;
10849	      }
10850	}
10851      else
10852	{
10853	  start_reg = LAST_FPA_REGNUM;
10854
10855	  for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
10856	    {
10857	      if (regs_ever_live[reg] && !call_used_regs[reg])
10858		{
10859		  if (start_reg - reg == 3)
10860		    {
10861		      insn = emit_sfm (reg, 4);
10862		      RTX_FRAME_RELATED_P (insn) = 1;
10863		      saved_regs += 48;
10864		      start_reg = reg - 1;
10865		    }
10866		}
10867	      else
10868		{
10869		  if (start_reg != reg)
10870		    {
10871		      insn = emit_sfm (reg + 1, start_reg - reg);
10872		      RTX_FRAME_RELATED_P (insn) = 1;
10873		      saved_regs += (start_reg - reg) * 12;
10874		    }
10875		  start_reg = reg - 1;
10876		}
10877	    }
10878
10879	  if (start_reg != reg)
10880	    {
10881	      insn = emit_sfm (reg + 1, start_reg - reg);
10882	      saved_regs += (start_reg - reg) * 12;
10883	      RTX_FRAME_RELATED_P (insn) = 1;
10884	    }
10885	}
10886      if (TARGET_HARD_FLOAT && TARGET_VFP)
10887	{
10888	  start_reg = FIRST_VFP_REGNUM;
10889
10890 	  for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
10891	    {
10892	      if ((!regs_ever_live[reg] || call_used_regs[reg])
10893		  && (!regs_ever_live[reg + 1] || call_used_regs[reg + 1]))
10894		{
10895		  if (start_reg != reg)
10896		    saved_regs += vfp_emit_fstmx (start_reg,
10897						  (reg - start_reg) / 2);
10898		  start_reg = reg + 2;
10899		}
10900	    }
10901	  if (start_reg != reg)
10902	    saved_regs += vfp_emit_fstmx (start_reg,
10903					  (reg - start_reg) / 2);
10904	}
10905    }
10906
10907  if (frame_pointer_needed)
10908    {
10909      /* Create the new frame pointer.  */
10910      insn = GEN_INT (-(4 + args_to_push + fp_offset));
10911      insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
10912      RTX_FRAME_RELATED_P (insn) = 1;
10913
10914      if (IS_NESTED (func_type))
10915	{
10916	  /* Recover the static chain register.  */
10917	  if (regs_ever_live [3] == 0
10918	      || saved_pretend_args)
10919	    insn = gen_rtx_REG (SImode, 3);
10920	  else /* if (current_function_pretend_args_size == 0) */
10921	    {
10922	      insn = plus_constant (hard_frame_pointer_rtx, 4);
10923	      insn = gen_frame_mem (SImode, insn);
10924	    }
10925
10926	  emit_set_insn (ip_rtx, insn);
10927	  /* Add a USE to stop propagate_one_insn() from barfing.  */
10928	  emit_insn (gen_prologue_use (ip_rtx));
10929	}
10930    }
10931
10932  offsets = arm_get_frame_offsets ();
10933  if (offsets->outgoing_args != offsets->saved_args + saved_regs)
10934    {
10935      /* This add can produce multiple insns for a large constant, so we
10936	 need to get tricky.  */
10937      rtx last = get_last_insn ();
10938
10939      amount = GEN_INT (offsets->saved_args + saved_regs
10940			- offsets->outgoing_args);
10941
10942      insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
10943				    amount));
10944      do
10945	{
10946	  last = last ? NEXT_INSN (last) : get_insns ();
10947	  RTX_FRAME_RELATED_P (last) = 1;
10948	}
10949      while (last != insn);
10950
10951      /* If the frame pointer is needed, emit a special barrier that
10952	 will prevent the scheduler from moving stores to the frame
10953	 before the stack adjustment.  */
10954      if (frame_pointer_needed)
10955	insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
10956					 hard_frame_pointer_rtx));
10957    }
10958
10959
10960  if (flag_pic && arm_pic_register != INVALID_REGNUM)
10961    arm_load_pic_register (0UL);
10962
10963  /* If we are profiling, make sure no instructions are scheduled before
10964     the call to mcount.  Similarly if the user has requested no
10965     scheduling in the prolog.  Similarly if we want non-call exceptions
10966     using the EABI unwinder, to prevent faulting instructions from being
10967     swapped with a stack adjustment.  */
10968  if (current_function_profile || !TARGET_SCHED_PROLOG
10969      || (ARM_EABI_UNWIND_TABLES && flag_non_call_exceptions))
10970    emit_insn (gen_blockage ());
10971
10972  /* If the link register is being kept alive, with the return address in it,
10973     then make sure that it does not get reused by the ce2 pass.  */
10974  if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
10975    {
10976      emit_insn (gen_prologue_use (gen_rtx_REG (SImode, LR_REGNUM)));
10977      cfun->machine->lr_save_eliminated = 1;
10978    }
10979}
10980
10981/* If CODE is 'd', then the X is a condition operand and the instruction
10982   should only be executed if the condition is true.
10983   if CODE is 'D', then the X is a condition operand and the instruction
10984   should only be executed if the condition is false: however, if the mode
10985   of the comparison is CCFPEmode, then always execute the instruction -- we
10986   do this because in these circumstances !GE does not necessarily imply LT;
10987   in these cases the instruction pattern will take care to make sure that
10988   an instruction containing %d will follow, thereby undoing the effects of
10989   doing this instruction unconditionally.
10990   If CODE is 'N' then X is a floating point operand that must be negated
10991   before output.
10992   If CODE is 'B' then output a bitwise inverted value of X (a const int).
10993   If X is a REG and CODE is `M', output a ldm/stm style multi-reg.  */
10994void
10995arm_print_operand (FILE *stream, rtx x, int code)
10996{
10997  switch (code)
10998    {
10999    case '@':
11000      fputs (ASM_COMMENT_START, stream);
11001      return;
11002
11003    case '_':
11004      fputs (user_label_prefix, stream);
11005      return;
11006
11007    case '|':
11008      fputs (REGISTER_PREFIX, stream);
11009      return;
11010
11011    case '?':
11012      if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
11013	{
11014	  if (TARGET_THUMB)
11015	    {
11016	      output_operand_lossage ("predicated Thumb instruction");
11017	      break;
11018	    }
11019	  if (current_insn_predicate != NULL)
11020	    {
11021	      output_operand_lossage
11022		("predicated instruction in conditional sequence");
11023	      break;
11024	    }
11025
11026	  fputs (arm_condition_codes[arm_current_cc], stream);
11027	}
11028      else if (current_insn_predicate)
11029	{
11030	  enum arm_cond_code code;
11031
11032	  if (TARGET_THUMB)
11033	    {
11034	      output_operand_lossage ("predicated Thumb instruction");
11035	      break;
11036	    }
11037
11038	  code = get_arm_condition_code (current_insn_predicate);
11039	  fputs (arm_condition_codes[code], stream);
11040	}
11041      return;
11042
11043    case 'N':
11044      {
11045	REAL_VALUE_TYPE r;
11046	REAL_VALUE_FROM_CONST_DOUBLE (r, x);
11047	r = REAL_VALUE_NEGATE (r);
11048	fprintf (stream, "%s", fp_const_from_val (&r));
11049      }
11050      return;
11051
11052    case 'B':
11053      if (GET_CODE (x) == CONST_INT)
11054	{
11055	  HOST_WIDE_INT val;
11056	  val = ARM_SIGN_EXTEND (~INTVAL (x));
11057	  fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
11058	}
11059      else
11060	{
11061	  putc ('~', stream);
11062	  output_addr_const (stream, x);
11063	}
11064      return;
11065
11066    case 'i':
11067      fprintf (stream, "%s", arithmetic_instr (x, 1));
11068      return;
11069
11070    /* Truncate Cirrus shift counts.  */
11071    case 's':
11072      if (GET_CODE (x) == CONST_INT)
11073	{
11074	  fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 0x3f);
11075	  return;
11076	}
11077      arm_print_operand (stream, x, 0);
11078      return;
11079
11080    case 'I':
11081      fprintf (stream, "%s", arithmetic_instr (x, 0));
11082      return;
11083
11084    case 'S':
11085      {
11086	HOST_WIDE_INT val;
11087	const char *shift;
11088
11089	if (!shift_operator (x, SImode))
11090	  {
11091	    output_operand_lossage ("invalid shift operand");
11092	    break;
11093	  }
11094
11095	shift = shift_op (x, &val);
11096
11097	if (shift)
11098	  {
11099	    fprintf (stream, ", %s ", shift);
11100	    if (val == -1)
11101	      arm_print_operand (stream, XEXP (x, 1), 0);
11102	    else
11103	      fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
11104	  }
11105      }
11106      return;
11107
11108      /* An explanation of the 'Q', 'R' and 'H' register operands:
11109
11110	 In a pair of registers containing a DI or DF value the 'Q'
11111	 operand returns the register number of the register containing
11112	 the least significant part of the value.  The 'R' operand returns
11113	 the register number of the register containing the most
11114	 significant part of the value.
11115
11116	 The 'H' operand returns the higher of the two register numbers.
11117	 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
11118	 same as the 'Q' operand, since the most significant part of the
11119	 value is held in the lower number register.  The reverse is true
11120	 on systems where WORDS_BIG_ENDIAN is false.
11121
11122	 The purpose of these operands is to distinguish between cases
11123	 where the endian-ness of the values is important (for example
11124	 when they are added together), and cases where the endian-ness
11125	 is irrelevant, but the order of register operations is important.
11126	 For example when loading a value from memory into a register
11127	 pair, the endian-ness does not matter.  Provided that the value
11128	 from the lower memory address is put into the lower numbered
11129	 register, and the value from the higher address is put into the
11130	 higher numbered register, the load will work regardless of whether
11131	 the value being loaded is big-wordian or little-wordian.  The
11132	 order of the two register loads can matter however, if the address
11133	 of the memory location is actually held in one of the registers
11134	 being overwritten by the load.  */
11135    case 'Q':
11136      if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
11137	{
11138	  output_operand_lossage ("invalid operand for code '%c'", code);
11139	  return;
11140	}
11141
11142      asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
11143      return;
11144
11145    case 'R':
11146      if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
11147	{
11148	  output_operand_lossage ("invalid operand for code '%c'", code);
11149	  return;
11150	}
11151
11152      asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
11153      return;
11154
11155    case 'H':
11156      if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
11157	{
11158	  output_operand_lossage ("invalid operand for code '%c'", code);
11159	  return;
11160	}
11161
11162      asm_fprintf (stream, "%r", REGNO (x) + 1);
11163      return;
11164
11165    case 'm':
11166      asm_fprintf (stream, "%r",
11167		   GET_CODE (XEXP (x, 0)) == REG
11168		   ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
11169      return;
11170
11171    case 'M':
11172      asm_fprintf (stream, "{%r-%r}",
11173		   REGNO (x),
11174		   REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
11175      return;
11176
11177    case 'd':
11178      /* CONST_TRUE_RTX means always -- that's the default.  */
11179      if (x == const_true_rtx)
11180	return;
11181
11182      if (!COMPARISON_P (x))
11183	{
11184	  output_operand_lossage ("invalid operand for code '%c'", code);
11185	  return;
11186	}
11187
11188      fputs (arm_condition_codes[get_arm_condition_code (x)],
11189	     stream);
11190      return;
11191
11192    case 'D':
11193      /* CONST_TRUE_RTX means not always -- i.e. never.  We shouldn't ever
11194	 want to do that.  */
11195      if (x == const_true_rtx)
11196	{
11197	  output_operand_lossage ("instruction never exectued");
11198	  return;
11199	}
11200      if (!COMPARISON_P (x))
11201	{
11202	  output_operand_lossage ("invalid operand for code '%c'", code);
11203	  return;
11204	}
11205
11206      fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
11207				 (get_arm_condition_code (x))],
11208	     stream);
11209      return;
11210
11211    /* Cirrus registers can be accessed in a variety of ways:
11212         single floating point (f)
11213	 double floating point (d)
11214	 32bit integer         (fx)
11215	 64bit integer         (dx).  */
11216    case 'W':			/* Cirrus register in F mode.  */
11217    case 'X':			/* Cirrus register in D mode.  */
11218    case 'Y':			/* Cirrus register in FX mode.  */
11219    case 'Z':			/* Cirrus register in DX mode.  */
11220      gcc_assert (GET_CODE (x) == REG
11221		  && REGNO_REG_CLASS (REGNO (x)) == CIRRUS_REGS);
11222
11223      fprintf (stream, "mv%s%s",
11224	       code == 'W' ? "f"
11225	       : code == 'X' ? "d"
11226	       : code == 'Y' ? "fx" : "dx", reg_names[REGNO (x)] + 2);
11227
11228      return;
11229
11230    /* Print cirrus register in the mode specified by the register's mode.  */
11231    case 'V':
11232      {
11233	int mode = GET_MODE (x);
11234
11235	if (GET_CODE (x) != REG || REGNO_REG_CLASS (REGNO (x)) != CIRRUS_REGS)
11236	  {
11237	    output_operand_lossage ("invalid operand for code '%c'", code);
11238	    return;
11239	  }
11240
11241	fprintf (stream, "mv%s%s",
11242		 mode == DFmode ? "d"
11243		 : mode == SImode ? "fx"
11244		 : mode == DImode ? "dx"
11245		 : "f", reg_names[REGNO (x)] + 2);
11246
11247	return;
11248      }
11249
11250    case 'U':
11251      if (GET_CODE (x) != REG
11252	  || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
11253	  || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
11254	/* Bad value for wCG register number.  */
11255	{
11256	  output_operand_lossage ("invalid operand for code '%c'", code);
11257	  return;
11258	}
11259
11260      else
11261	fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
11262      return;
11263
11264      /* Print an iWMMXt control register name.  */
11265    case 'w':
11266      if (GET_CODE (x) != CONST_INT
11267	  || INTVAL (x) < 0
11268	  || INTVAL (x) >= 16)
11269	/* Bad value for wC register number.  */
11270	{
11271	  output_operand_lossage ("invalid operand for code '%c'", code);
11272	  return;
11273	}
11274
11275      else
11276	{
11277	  static const char * wc_reg_names [16] =
11278	    {
11279	      "wCID",  "wCon",  "wCSSF", "wCASF",
11280	      "wC4",   "wC5",   "wC6",   "wC7",
11281	      "wCGR0", "wCGR1", "wCGR2", "wCGR3",
11282	      "wC12",  "wC13",  "wC14",  "wC15"
11283	    };
11284
11285	  fprintf (stream, wc_reg_names [INTVAL (x)]);
11286	}
11287      return;
11288
11289      /* Print a VFP double precision register name.  */
11290    case 'P':
11291      {
11292	int mode = GET_MODE (x);
11293	int num;
11294
11295	if (mode != DImode && mode != DFmode)
11296	  {
11297	    output_operand_lossage ("invalid operand for code '%c'", code);
11298	    return;
11299	  }
11300
11301	if (GET_CODE (x) != REG
11302	    || !IS_VFP_REGNUM (REGNO (x)))
11303	  {
11304	    output_operand_lossage ("invalid operand for code '%c'", code);
11305	    return;
11306	  }
11307
11308	num = REGNO(x) - FIRST_VFP_REGNUM;
11309	if (num & 1)
11310	  {
11311	    output_operand_lossage ("invalid operand for code '%c'", code);
11312	    return;
11313	  }
11314
11315	fprintf (stream, "d%d", num >> 1);
11316      }
11317      return;
11318
11319    default:
11320      if (x == 0)
11321	{
11322	  output_operand_lossage ("missing operand");
11323	  return;
11324	}
11325
11326      switch (GET_CODE (x))
11327	{
11328	case REG:
11329	  asm_fprintf (stream, "%r", REGNO (x));
11330	  break;
11331
11332	case MEM:
11333	  output_memory_reference_mode = GET_MODE (x);
11334	  output_address (XEXP (x, 0));
11335	  break;
11336
11337	case CONST_DOUBLE:
11338	  fprintf (stream, "#%s", fp_immediate_constant (x));
11339	  break;
11340
11341	default:
11342	  gcc_assert (GET_CODE (x) != NEG);
11343	  fputc ('#', stream);
11344	  output_addr_const (stream, x);
11345	  break;
11346	}
11347    }
11348}
11349
11350#ifndef AOF_ASSEMBLER
11351/* Target hook for assembling integer objects.  The ARM version needs to
11352   handle word-sized values specially.  */
11353static bool
11354arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
11355{
11356  if (size == UNITS_PER_WORD && aligned_p)
11357    {
11358      fputs ("\t.word\t", asm_out_file);
11359      output_addr_const (asm_out_file, x);
11360
11361      /* Mark symbols as position independent.  We only do this in the
11362	 .text segment, not in the .data segment.  */
11363      if (NEED_GOT_RELOC && flag_pic && making_const_table &&
11364	  (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
11365	{
11366	  if (GET_CODE (x) == SYMBOL_REF
11367	      && (CONSTANT_POOL_ADDRESS_P (x)
11368		  || SYMBOL_REF_LOCAL_P (x)))
11369	    fputs ("(GOTOFF)", asm_out_file);
11370	  else if (GET_CODE (x) == LABEL_REF)
11371	    fputs ("(GOTOFF)", asm_out_file);
11372	  else
11373	    fputs ("(GOT)", asm_out_file);
11374	}
11375      fputc ('\n', asm_out_file);
11376      return true;
11377    }
11378
11379  if (arm_vector_mode_supported_p (GET_MODE (x)))
11380    {
11381      int i, units;
11382
11383      gcc_assert (GET_CODE (x) == CONST_VECTOR);
11384
11385      units = CONST_VECTOR_NUNITS (x);
11386
11387      switch (GET_MODE (x))
11388	{
11389	case V2SImode: size = 4; break;
11390	case V4HImode: size = 2; break;
11391	case V8QImode: size = 1; break;
11392	default:
11393	  gcc_unreachable ();
11394	}
11395
11396      for (i = 0; i < units; i++)
11397	{
11398	  rtx elt;
11399
11400	  elt = CONST_VECTOR_ELT (x, i);
11401	  assemble_integer
11402	    (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
11403	}
11404
11405      return true;
11406    }
11407
11408  return default_assemble_integer (x, size, aligned_p);
11409}
11410
11411
11412/* Add a function to the list of static constructors.  */
11413
11414static void
11415arm_elf_asm_constructor (rtx symbol, int priority ATTRIBUTE_UNUSED)
11416{
11417  if (!TARGET_AAPCS_BASED)
11418    {
11419      default_named_section_asm_out_constructor (symbol, priority);
11420      return;
11421    }
11422
11423  /* Put these in the .init_array section, using a special relocation.  */
11424  switch_to_section (ctors_section);
11425  assemble_align (POINTER_SIZE);
11426  fputs ("\t.word\t", asm_out_file);
11427  output_addr_const (asm_out_file, symbol);
11428  fputs ("(target1)\n", asm_out_file);
11429}
11430#endif
11431
11432/* A finite state machine takes care of noticing whether or not instructions
11433   can be conditionally executed, and thus decrease execution time and code
11434   size by deleting branch instructions.  The fsm is controlled by
11435   final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE.  */
11436
11437/* The state of the fsm controlling condition codes are:
11438   0: normal, do nothing special
11439   1: make ASM_OUTPUT_OPCODE not output this instruction
11440   2: make ASM_OUTPUT_OPCODE not output this instruction
11441   3: make instructions conditional
11442   4: make instructions conditional
11443
11444   State transitions (state->state by whom under condition):
11445   0 -> 1 final_prescan_insn if the `target' is a label
11446   0 -> 2 final_prescan_insn if the `target' is an unconditional branch
11447   1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
11448   2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
11449   3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
11450          (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
11451   4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
11452          (the target insn is arm_target_insn).
11453
11454   If the jump clobbers the conditions then we use states 2 and 4.
11455
11456   A similar thing can be done with conditional return insns.
11457
11458   XXX In case the `target' is an unconditional branch, this conditionalising
11459   of the instructions always reduces code size, but not always execution
11460   time.  But then, I want to reduce the code size to somewhere near what
11461   /bin/cc produces.  */
11462
11463/* Returns the index of the ARM condition code string in
11464   `arm_condition_codes'.  COMPARISON should be an rtx like
11465   `(eq (...) (...))'.  */
11466static enum arm_cond_code
11467get_arm_condition_code (rtx comparison)
11468{
11469  enum machine_mode mode = GET_MODE (XEXP (comparison, 0));
11470  int code;
11471  enum rtx_code comp_code = GET_CODE (comparison);
11472
11473  if (GET_MODE_CLASS (mode) != MODE_CC)
11474    mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
11475			   XEXP (comparison, 1));
11476
11477  switch (mode)
11478    {
11479    case CC_DNEmode: code = ARM_NE; goto dominance;
11480    case CC_DEQmode: code = ARM_EQ; goto dominance;
11481    case CC_DGEmode: code = ARM_GE; goto dominance;
11482    case CC_DGTmode: code = ARM_GT; goto dominance;
11483    case CC_DLEmode: code = ARM_LE; goto dominance;
11484    case CC_DLTmode: code = ARM_LT; goto dominance;
11485    case CC_DGEUmode: code = ARM_CS; goto dominance;
11486    case CC_DGTUmode: code = ARM_HI; goto dominance;
11487    case CC_DLEUmode: code = ARM_LS; goto dominance;
11488    case CC_DLTUmode: code = ARM_CC;
11489
11490    dominance:
11491      gcc_assert (comp_code == EQ || comp_code == NE);
11492
11493      if (comp_code == EQ)
11494	return ARM_INVERSE_CONDITION_CODE (code);
11495      return code;
11496
11497    case CC_NOOVmode:
11498      switch (comp_code)
11499	{
11500	case NE: return ARM_NE;
11501	case EQ: return ARM_EQ;
11502	case GE: return ARM_PL;
11503	case LT: return ARM_MI;
11504	default: gcc_unreachable ();
11505	}
11506
11507    case CC_Zmode:
11508      switch (comp_code)
11509	{
11510	case NE: return ARM_NE;
11511	case EQ: return ARM_EQ;
11512	default: gcc_unreachable ();
11513	}
11514
11515    case CC_Nmode:
11516      switch (comp_code)
11517	{
11518	case NE: return ARM_MI;
11519	case EQ: return ARM_PL;
11520	default: gcc_unreachable ();
11521	}
11522
11523    case CCFPEmode:
11524    case CCFPmode:
11525      /* These encodings assume that AC=1 in the FPA system control
11526	 byte.  This allows us to handle all cases except UNEQ and
11527	 LTGT.  */
11528      switch (comp_code)
11529	{
11530	case GE: return ARM_GE;
11531	case GT: return ARM_GT;
11532	case LE: return ARM_LS;
11533	case LT: return ARM_MI;
11534	case NE: return ARM_NE;
11535	case EQ: return ARM_EQ;
11536	case ORDERED: return ARM_VC;
11537	case UNORDERED: return ARM_VS;
11538	case UNLT: return ARM_LT;
11539	case UNLE: return ARM_LE;
11540	case UNGT: return ARM_HI;
11541	case UNGE: return ARM_PL;
11542	  /* UNEQ and LTGT do not have a representation.  */
11543	case UNEQ: /* Fall through.  */
11544	case LTGT: /* Fall through.  */
11545	default: gcc_unreachable ();
11546	}
11547
11548    case CC_SWPmode:
11549      switch (comp_code)
11550	{
11551	case NE: return ARM_NE;
11552	case EQ: return ARM_EQ;
11553	case GE: return ARM_LE;
11554	case GT: return ARM_LT;
11555	case LE: return ARM_GE;
11556	case LT: return ARM_GT;
11557	case GEU: return ARM_LS;
11558	case GTU: return ARM_CC;
11559	case LEU: return ARM_CS;
11560	case LTU: return ARM_HI;
11561	default: gcc_unreachable ();
11562	}
11563
11564    case CC_Cmode:
11565      switch (comp_code)
11566      {
11567      case LTU: return ARM_CS;
11568      case GEU: return ARM_CC;
11569      default: gcc_unreachable ();
11570      }
11571
11572    case CCmode:
11573      switch (comp_code)
11574	{
11575	case NE: return ARM_NE;
11576	case EQ: return ARM_EQ;
11577	case GE: return ARM_GE;
11578	case GT: return ARM_GT;
11579	case LE: return ARM_LE;
11580	case LT: return ARM_LT;
11581	case GEU: return ARM_CS;
11582	case GTU: return ARM_HI;
11583	case LEU: return ARM_LS;
11584	case LTU: return ARM_CC;
11585	default: gcc_unreachable ();
11586	}
11587
11588    default: gcc_unreachable ();
11589    }
11590}
11591
11592void
11593arm_final_prescan_insn (rtx insn)
11594{
11595  /* BODY will hold the body of INSN.  */
11596  rtx body = PATTERN (insn);
11597
11598  /* This will be 1 if trying to repeat the trick, and things need to be
11599     reversed if it appears to fail.  */
11600  int reverse = 0;
11601
11602  /* JUMP_CLOBBERS will be one implies that the conditions if a branch is
11603     taken are clobbered, even if the rtl suggests otherwise.  It also
11604     means that we have to grub around within the jump expression to find
11605     out what the conditions are when the jump isn't taken.  */
11606  int jump_clobbers = 0;
11607
11608  /* If we start with a return insn, we only succeed if we find another one.  */
11609  int seeking_return = 0;
11610
11611  /* START_INSN will hold the insn from where we start looking.  This is the
11612     first insn after the following code_label if REVERSE is true.  */
11613  rtx start_insn = insn;
11614
11615  /* If in state 4, check if the target branch is reached, in order to
11616     change back to state 0.  */
11617  if (arm_ccfsm_state == 4)
11618    {
11619      if (insn == arm_target_insn)
11620	{
11621	  arm_target_insn = NULL;
11622	  arm_ccfsm_state = 0;
11623	}
11624      return;
11625    }
11626
11627  /* If in state 3, it is possible to repeat the trick, if this insn is an
11628     unconditional branch to a label, and immediately following this branch
11629     is the previous target label which is only used once, and the label this
11630     branch jumps to is not too far off.  */
11631  if (arm_ccfsm_state == 3)
11632    {
11633      if (simplejump_p (insn))
11634	{
11635	  start_insn = next_nonnote_insn (start_insn);
11636	  if (GET_CODE (start_insn) == BARRIER)
11637	    {
11638	      /* XXX Isn't this always a barrier?  */
11639	      start_insn = next_nonnote_insn (start_insn);
11640	    }
11641	  if (GET_CODE (start_insn) == CODE_LABEL
11642	      && CODE_LABEL_NUMBER (start_insn) == arm_target_label
11643	      && LABEL_NUSES (start_insn) == 1)
11644	    reverse = TRUE;
11645	  else
11646	    return;
11647	}
11648      else if (GET_CODE (body) == RETURN)
11649        {
11650	  start_insn = next_nonnote_insn (start_insn);
11651	  if (GET_CODE (start_insn) == BARRIER)
11652	    start_insn = next_nonnote_insn (start_insn);
11653	  if (GET_CODE (start_insn) == CODE_LABEL
11654	      && CODE_LABEL_NUMBER (start_insn) == arm_target_label
11655	      && LABEL_NUSES (start_insn) == 1)
11656	    {
11657	      reverse = TRUE;
11658	      seeking_return = 1;
11659	    }
11660	  else
11661	    return;
11662        }
11663      else
11664	return;
11665    }
11666
11667  gcc_assert (!arm_ccfsm_state || reverse);
11668  if (GET_CODE (insn) != JUMP_INSN)
11669    return;
11670
11671  /* This jump might be paralleled with a clobber of the condition codes
11672     the jump should always come first */
11673  if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
11674    body = XVECEXP (body, 0, 0);
11675
11676  if (reverse
11677      || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
11678	  && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
11679    {
11680      int insns_skipped;
11681      int fail = FALSE, succeed = FALSE;
11682      /* Flag which part of the IF_THEN_ELSE is the LABEL_REF.  */
11683      int then_not_else = TRUE;
11684      rtx this_insn = start_insn, label = 0;
11685
11686      /* If the jump cannot be done with one instruction, we cannot
11687	 conditionally execute the instruction in the inverse case.  */
11688      if (get_attr_conds (insn) == CONDS_JUMP_CLOB)
11689	{
11690	  jump_clobbers = 1;
11691	  return;
11692	}
11693
11694      /* Register the insn jumped to.  */
11695      if (reverse)
11696        {
11697	  if (!seeking_return)
11698	    label = XEXP (SET_SRC (body), 0);
11699        }
11700      else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
11701	label = XEXP (XEXP (SET_SRC (body), 1), 0);
11702      else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
11703	{
11704	  label = XEXP (XEXP (SET_SRC (body), 2), 0);
11705	  then_not_else = FALSE;
11706	}
11707      else if (GET_CODE (XEXP (SET_SRC (body), 1)) == RETURN)
11708	seeking_return = 1;
11709      else if (GET_CODE (XEXP (SET_SRC (body), 2)) == RETURN)
11710        {
11711	  seeking_return = 1;
11712	  then_not_else = FALSE;
11713        }
11714      else
11715	gcc_unreachable ();
11716
11717      /* See how many insns this branch skips, and what kind of insns.  If all
11718	 insns are okay, and the label or unconditional branch to the same
11719	 label is not too far away, succeed.  */
11720      for (insns_skipped = 0;
11721	   !fail && !succeed && insns_skipped++ < max_insns_skipped;)
11722	{
11723	  rtx scanbody;
11724
11725	  this_insn = next_nonnote_insn (this_insn);
11726	  if (!this_insn)
11727	    break;
11728
11729	  switch (GET_CODE (this_insn))
11730	    {
11731	    case CODE_LABEL:
11732	      /* Succeed if it is the target label, otherwise fail since
11733		 control falls in from somewhere else.  */
11734	      if (this_insn == label)
11735		{
11736		  if (jump_clobbers)
11737		    {
11738		      arm_ccfsm_state = 2;
11739		      this_insn = next_nonnote_insn (this_insn);
11740		    }
11741		  else
11742		    arm_ccfsm_state = 1;
11743		  succeed = TRUE;
11744		}
11745	      else
11746		fail = TRUE;
11747	      break;
11748
11749	    case BARRIER:
11750	      /* Succeed if the following insn is the target label.
11751		 Otherwise fail.
11752		 If return insns are used then the last insn in a function
11753		 will be a barrier.  */
11754	      this_insn = next_nonnote_insn (this_insn);
11755	      if (this_insn && this_insn == label)
11756		{
11757		  if (jump_clobbers)
11758		    {
11759		      arm_ccfsm_state = 2;
11760		      this_insn = next_nonnote_insn (this_insn);
11761		    }
11762		  else
11763		    arm_ccfsm_state = 1;
11764		  succeed = TRUE;
11765		}
11766	      else
11767		fail = TRUE;
11768	      break;
11769
11770	    case CALL_INSN:
11771	      /* The AAPCS says that conditional calls should not be
11772		 used since they make interworking inefficient (the
11773		 linker can't transform BL<cond> into BLX).  That's
11774		 only a problem if the machine has BLX.  */
11775	      if (arm_arch5)
11776		{
11777		  fail = TRUE;
11778		  break;
11779		}
11780
11781	      /* Succeed if the following insn is the target label, or
11782		 if the following two insns are a barrier and the
11783		 target label.  */
11784	      this_insn = next_nonnote_insn (this_insn);
11785	      if (this_insn && GET_CODE (this_insn) == BARRIER)
11786		this_insn = next_nonnote_insn (this_insn);
11787
11788	      if (this_insn && this_insn == label
11789		  && insns_skipped < max_insns_skipped)
11790		{
11791		  if (jump_clobbers)
11792		    {
11793		      arm_ccfsm_state = 2;
11794		      this_insn = next_nonnote_insn (this_insn);
11795		    }
11796		  else
11797		    arm_ccfsm_state = 1;
11798		  succeed = TRUE;
11799		}
11800	      else
11801		fail = TRUE;
11802	      break;
11803
11804	    case JUMP_INSN:
11805      	      /* If this is an unconditional branch to the same label, succeed.
11806		 If it is to another label, do nothing.  If it is conditional,
11807		 fail.  */
11808	      /* XXX Probably, the tests for SET and the PC are
11809		 unnecessary.  */
11810
11811	      scanbody = PATTERN (this_insn);
11812	      if (GET_CODE (scanbody) == SET
11813		  && GET_CODE (SET_DEST (scanbody)) == PC)
11814		{
11815		  if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
11816		      && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
11817		    {
11818		      arm_ccfsm_state = 2;
11819		      succeed = TRUE;
11820		    }
11821		  else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
11822		    fail = TRUE;
11823		}
11824	      /* Fail if a conditional return is undesirable (e.g. on a
11825		 StrongARM), but still allow this if optimizing for size.  */
11826	      else if (GET_CODE (scanbody) == RETURN
11827		       && !use_return_insn (TRUE, NULL)
11828		       && !optimize_size)
11829		fail = TRUE;
11830	      else if (GET_CODE (scanbody) == RETURN
11831		       && seeking_return)
11832	        {
11833		  arm_ccfsm_state = 2;
11834		  succeed = TRUE;
11835	        }
11836	      else if (GET_CODE (scanbody) == PARALLEL)
11837	        {
11838		  switch (get_attr_conds (this_insn))
11839		    {
11840		    case CONDS_NOCOND:
11841		      break;
11842		    default:
11843		      fail = TRUE;
11844		      break;
11845		    }
11846		}
11847	      else
11848		fail = TRUE;	/* Unrecognized jump (e.g. epilogue).  */
11849
11850	      break;
11851
11852	    case INSN:
11853	      /* Instructions using or affecting the condition codes make it
11854		 fail.  */
11855	      scanbody = PATTERN (this_insn);
11856	      if (!(GET_CODE (scanbody) == SET
11857		    || GET_CODE (scanbody) == PARALLEL)
11858		  || get_attr_conds (this_insn) != CONDS_NOCOND)
11859		fail = TRUE;
11860
11861	      /* A conditional cirrus instruction must be followed by
11862		 a non Cirrus instruction.  However, since we
11863		 conditionalize instructions in this function and by
11864		 the time we get here we can't add instructions
11865		 (nops), because shorten_branches() has already been
11866		 called, we will disable conditionalizing Cirrus
11867		 instructions to be safe.  */
11868	      if (GET_CODE (scanbody) != USE
11869		  && GET_CODE (scanbody) != CLOBBER
11870		  && get_attr_cirrus (this_insn) != CIRRUS_NOT)
11871		fail = TRUE;
11872	      break;
11873
11874	    default:
11875	      break;
11876	    }
11877	}
11878      if (succeed)
11879	{
11880	  if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
11881	    arm_target_label = CODE_LABEL_NUMBER (label);
11882	  else
11883	    {
11884	      gcc_assert (seeking_return || arm_ccfsm_state == 2);
11885
11886	      while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
11887	        {
11888		  this_insn = next_nonnote_insn (this_insn);
11889		  gcc_assert (!this_insn
11890			      || (GET_CODE (this_insn) != BARRIER
11891				  && GET_CODE (this_insn) != CODE_LABEL));
11892	        }
11893	      if (!this_insn)
11894	        {
11895		  /* Oh, dear! we ran off the end.. give up.  */
11896		  recog (PATTERN (insn), insn, NULL);
11897		  arm_ccfsm_state = 0;
11898		  arm_target_insn = NULL;
11899		  return;
11900	        }
11901	      arm_target_insn = this_insn;
11902	    }
11903	  if (jump_clobbers)
11904	    {
11905	      gcc_assert (!reverse);
11906	      arm_current_cc =
11907		  get_arm_condition_code (XEXP (XEXP (XEXP (SET_SRC (body),
11908							    0), 0), 1));
11909	      if (GET_CODE (XEXP (XEXP (SET_SRC (body), 0), 0)) == AND)
11910		arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
11911	      if (GET_CODE (XEXP (SET_SRC (body), 0)) == NE)
11912		arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
11913	    }
11914	  else
11915	    {
11916	      /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
11917		 what it was.  */
11918	      if (!reverse)
11919		arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body),
11920							       0));
11921	    }
11922
11923	  if (reverse || then_not_else)
11924	    arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
11925	}
11926
11927      /* Restore recog_data (getting the attributes of other insns can
11928	 destroy this array, but final.c assumes that it remains intact
11929	 across this call; since the insn has been recognized already we
11930	 call recog direct).  */
11931      recog (PATTERN (insn), insn, NULL);
11932    }
11933}
11934
11935/* Returns true if REGNO is a valid register
11936   for holding a quantity of type MODE.  */
11937int
11938arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
11939{
11940  if (GET_MODE_CLASS (mode) == MODE_CC)
11941    return (regno == CC_REGNUM
11942	    || (TARGET_HARD_FLOAT && TARGET_VFP
11943		&& regno == VFPCC_REGNUM));
11944
11945  if (TARGET_THUMB)
11946    /* For the Thumb we only allow values bigger than SImode in
11947       registers 0 - 6, so that there is always a second low
11948       register available to hold the upper part of the value.
11949       We probably we ought to ensure that the register is the
11950       start of an even numbered register pair.  */
11951    return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
11952
11953  if (TARGET_HARD_FLOAT && TARGET_MAVERICK
11954      && IS_CIRRUS_REGNUM (regno))
11955    /* We have outlawed SI values in Cirrus registers because they
11956       reside in the lower 32 bits, but SF values reside in the
11957       upper 32 bits.  This causes gcc all sorts of grief.  We can't
11958       even split the registers into pairs because Cirrus SI values
11959       get sign extended to 64bits-- aldyh.  */
11960    return (GET_MODE_CLASS (mode) == MODE_FLOAT) || (mode == DImode);
11961
11962  if (TARGET_HARD_FLOAT && TARGET_VFP
11963      && IS_VFP_REGNUM (regno))
11964    {
11965      if (mode == SFmode || mode == SImode)
11966	return TRUE;
11967
11968      /* DFmode values are only valid in even register pairs.  */
11969      if (mode == DFmode)
11970	return ((regno - FIRST_VFP_REGNUM) & 1) == 0;
11971      return FALSE;
11972    }
11973
11974  if (TARGET_REALLY_IWMMXT)
11975    {
11976      if (IS_IWMMXT_GR_REGNUM (regno))
11977	return mode == SImode;
11978
11979      if (IS_IWMMXT_REGNUM (regno))
11980	return VALID_IWMMXT_REG_MODE (mode);
11981    }
11982
11983  /* We allow any value to be stored in the general registers.
11984     Restrict doubleword quantities to even register pairs so that we can
11985     use ldrd.  */
11986  if (regno <= LAST_ARM_REGNUM)
11987    return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
11988
11989  if (regno == FRAME_POINTER_REGNUM
11990      || regno == ARG_POINTER_REGNUM)
11991    /* We only allow integers in the fake hard registers.  */
11992    return GET_MODE_CLASS (mode) == MODE_INT;
11993
11994  /* The only registers left are the FPA registers
11995     which we only allow to hold FP values.  */
11996  return (TARGET_HARD_FLOAT && TARGET_FPA
11997	  && GET_MODE_CLASS (mode) == MODE_FLOAT
11998	  && regno >= FIRST_FPA_REGNUM
11999	  && regno <= LAST_FPA_REGNUM);
12000}
12001
12002int
12003arm_regno_class (int regno)
12004{
12005  if (TARGET_THUMB)
12006    {
12007      if (regno == STACK_POINTER_REGNUM)
12008	return STACK_REG;
12009      if (regno == CC_REGNUM)
12010	return CC_REG;
12011      if (regno < 8)
12012	return LO_REGS;
12013      return HI_REGS;
12014    }
12015
12016  if (   regno <= LAST_ARM_REGNUM
12017      || regno == FRAME_POINTER_REGNUM
12018      || regno == ARG_POINTER_REGNUM)
12019    return GENERAL_REGS;
12020
12021  if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
12022    return NO_REGS;
12023
12024  if (IS_CIRRUS_REGNUM (regno))
12025    return CIRRUS_REGS;
12026
12027  if (IS_VFP_REGNUM (regno))
12028    return VFP_REGS;
12029
12030  if (IS_IWMMXT_REGNUM (regno))
12031    return IWMMXT_REGS;
12032
12033  if (IS_IWMMXT_GR_REGNUM (regno))
12034    return IWMMXT_GR_REGS;
12035
12036  return FPA_REGS;
12037}
12038
12039/* Handle a special case when computing the offset
12040   of an argument from the frame pointer.  */
12041int
12042arm_debugger_arg_offset (int value, rtx addr)
12043{
12044  rtx insn;
12045
12046  /* We are only interested if dbxout_parms() failed to compute the offset.  */
12047  if (value != 0)
12048    return 0;
12049
12050  /* We can only cope with the case where the address is held in a register.  */
12051  if (GET_CODE (addr) != REG)
12052    return 0;
12053
12054  /* If we are using the frame pointer to point at the argument, then
12055     an offset of 0 is correct.  */
12056  if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
12057    return 0;
12058
12059  /* If we are using the stack pointer to point at the
12060     argument, then an offset of 0 is correct.  */
12061  if ((TARGET_THUMB || !frame_pointer_needed)
12062      && REGNO (addr) == SP_REGNUM)
12063    return 0;
12064
12065  /* Oh dear.  The argument is pointed to by a register rather
12066     than being held in a register, or being stored at a known
12067     offset from the frame pointer.  Since GDB only understands
12068     those two kinds of argument we must translate the address
12069     held in the register into an offset from the frame pointer.
12070     We do this by searching through the insns for the function
12071     looking to see where this register gets its value.  If the
12072     register is initialized from the frame pointer plus an offset
12073     then we are in luck and we can continue, otherwise we give up.
12074
12075     This code is exercised by producing debugging information
12076     for a function with arguments like this:
12077
12078           double func (double a, double b, int c, double d) {return d;}
12079
12080     Without this code the stab for parameter 'd' will be set to
12081     an offset of 0 from the frame pointer, rather than 8.  */
12082
12083  /* The if() statement says:
12084
12085     If the insn is a normal instruction
12086     and if the insn is setting the value in a register
12087     and if the register being set is the register holding the address of the argument
12088     and if the address is computing by an addition
12089     that involves adding to a register
12090     which is the frame pointer
12091     a constant integer
12092
12093     then...  */
12094
12095  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
12096    {
12097      if (   GET_CODE (insn) == INSN
12098	  && GET_CODE (PATTERN (insn)) == SET
12099	  && REGNO    (XEXP (PATTERN (insn), 0)) == REGNO (addr)
12100	  && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
12101	  && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 0)) == REG
12102	  && REGNO    (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
12103	  && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 1)) == CONST_INT
12104	     )
12105	{
12106	  value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
12107
12108	  break;
12109	}
12110    }
12111
12112  if (value == 0)
12113    {
12114      debug_rtx (addr);
12115      warning (0, "unable to compute real location of stacked parameter");
12116      value = 8; /* XXX magic hack */
12117    }
12118
12119  return value;
12120}
12121
12122#define def_mbuiltin(MASK, NAME, TYPE, CODE)				\
12123  do									\
12124    {									\
12125      if ((MASK) & insn_flags)						\
12126        lang_hooks.builtin_function ((NAME), (TYPE), (CODE),		\
12127				     BUILT_IN_MD, NULL, NULL_TREE);	\
12128    }									\
12129  while (0)
12130
12131struct builtin_description
12132{
12133  const unsigned int       mask;
12134  const enum insn_code     icode;
12135  const char * const       name;
12136  const enum arm_builtins  code;
12137  const enum rtx_code      comparison;
12138  const unsigned int       flag;
12139};
12140
12141static const struct builtin_description bdesc_2arg[] =
12142{
12143#define IWMMXT_BUILTIN(code, string, builtin) \
12144  { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
12145    ARM_BUILTIN_##builtin, 0, 0 },
12146
12147  IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
12148  IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
12149  IWMMXT_BUILTIN (addv2si3, "waddw", WADDW)
12150  IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB)
12151  IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH)
12152  IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW)
12153  IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB)
12154  IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH)
12155  IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW)
12156  IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB)
12157  IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH)
12158  IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW)
12159  IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB)
12160  IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH)
12161  IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW)
12162  IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB)
12163  IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH)
12164  IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW)
12165  IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL)
12166  IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM)
12167  IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM)
12168  IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB)
12169  IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH)
12170  IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW)
12171  IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB)
12172  IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH)
12173  IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW)
12174  IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB)
12175  IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH)
12176  IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW)
12177  IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB)
12178  IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB)
12179  IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH)
12180  IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH)
12181  IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW)
12182  IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW)
12183  IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB)
12184  IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB)
12185  IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH)
12186  IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH)
12187  IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW)
12188  IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW)
12189  IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND)
12190  IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN)
12191  IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR)
12192  IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR)
12193  IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B)
12194  IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H)
12195  IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR)
12196  IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR)
12197  IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB)
12198  IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH)
12199  IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW)
12200  IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB)
12201  IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH)
12202  IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW)
12203  IWMMXT_BUILTIN (iwmmxt_wmadds, "wmadds", WMADDS)
12204  IWMMXT_BUILTIN (iwmmxt_wmaddu, "wmaddu", WMADDU)
12205
12206#define IWMMXT_BUILTIN2(code, builtin) \
12207  { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, 0, 0 },
12208
12209  IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
12210  IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
12211  IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
12212  IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
12213  IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
12214  IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
12215  IWMMXT_BUILTIN2 (ashlv4hi3_di,    WSLLH)
12216  IWMMXT_BUILTIN2 (ashlv4hi3,       WSLLHI)
12217  IWMMXT_BUILTIN2 (ashlv2si3_di,    WSLLW)
12218  IWMMXT_BUILTIN2 (ashlv2si3,       WSLLWI)
12219  IWMMXT_BUILTIN2 (ashldi3_di,      WSLLD)
12220  IWMMXT_BUILTIN2 (ashldi3_iwmmxt,  WSLLDI)
12221  IWMMXT_BUILTIN2 (lshrv4hi3_di,    WSRLH)
12222  IWMMXT_BUILTIN2 (lshrv4hi3,       WSRLHI)
12223  IWMMXT_BUILTIN2 (lshrv2si3_di,    WSRLW)
12224  IWMMXT_BUILTIN2 (lshrv2si3,       WSRLWI)
12225  IWMMXT_BUILTIN2 (lshrdi3_di,      WSRLD)
12226  IWMMXT_BUILTIN2 (lshrdi3_iwmmxt,  WSRLDI)
12227  IWMMXT_BUILTIN2 (ashrv4hi3_di,    WSRAH)
12228  IWMMXT_BUILTIN2 (ashrv4hi3,       WSRAHI)
12229  IWMMXT_BUILTIN2 (ashrv2si3_di,    WSRAW)
12230  IWMMXT_BUILTIN2 (ashrv2si3,       WSRAWI)
12231  IWMMXT_BUILTIN2 (ashrdi3_di,      WSRAD)
12232  IWMMXT_BUILTIN2 (ashrdi3_iwmmxt,  WSRADI)
12233  IWMMXT_BUILTIN2 (rorv4hi3_di,     WRORH)
12234  IWMMXT_BUILTIN2 (rorv4hi3,        WRORHI)
12235  IWMMXT_BUILTIN2 (rorv2si3_di,     WRORW)
12236  IWMMXT_BUILTIN2 (rorv2si3,        WRORWI)
12237  IWMMXT_BUILTIN2 (rordi3_di,       WRORD)
12238  IWMMXT_BUILTIN2 (rordi3,          WRORDI)
12239  IWMMXT_BUILTIN2 (iwmmxt_wmacuz,   WMACUZ)
12240  IWMMXT_BUILTIN2 (iwmmxt_wmacsz,   WMACSZ)
12241};
12242
12243static const struct builtin_description bdesc_1arg[] =
12244{
12245  IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB)
12246  IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH)
12247  IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW)
12248  IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB)
12249  IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH)
12250  IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW)
12251  IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB)
12252  IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH)
12253  IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW)
12254  IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB)
12255  IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH)
12256  IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW)
12257  IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB)
12258  IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH)
12259  IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW)
12260  IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB)
12261  IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH)
12262  IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW)
12263};
12264
12265/* Set up all the iWMMXt builtins.  This is
12266   not called if TARGET_IWMMXT is zero.  */
12267
12268static void
12269arm_init_iwmmxt_builtins (void)
12270{
12271  const struct builtin_description * d;
12272  size_t i;
12273  tree endlink = void_list_node;
12274
12275  tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
12276  tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
12277  tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
12278
12279  tree int_ftype_int
12280    = build_function_type (integer_type_node,
12281			   tree_cons (NULL_TREE, integer_type_node, endlink));
12282  tree v8qi_ftype_v8qi_v8qi_int
12283    = build_function_type (V8QI_type_node,
12284			   tree_cons (NULL_TREE, V8QI_type_node,
12285				      tree_cons (NULL_TREE, V8QI_type_node,
12286						 tree_cons (NULL_TREE,
12287							    integer_type_node,
12288							    endlink))));
12289  tree v4hi_ftype_v4hi_int
12290    = build_function_type (V4HI_type_node,
12291			   tree_cons (NULL_TREE, V4HI_type_node,
12292				      tree_cons (NULL_TREE, integer_type_node,
12293						 endlink)));
12294  tree v2si_ftype_v2si_int
12295    = build_function_type (V2SI_type_node,
12296			   tree_cons (NULL_TREE, V2SI_type_node,
12297				      tree_cons (NULL_TREE, integer_type_node,
12298						 endlink)));
12299  tree v2si_ftype_di_di
12300    = build_function_type (V2SI_type_node,
12301			   tree_cons (NULL_TREE, long_long_integer_type_node,
12302				      tree_cons (NULL_TREE, long_long_integer_type_node,
12303						 endlink)));
12304  tree di_ftype_di_int
12305    = build_function_type (long_long_integer_type_node,
12306			   tree_cons (NULL_TREE, long_long_integer_type_node,
12307				      tree_cons (NULL_TREE, integer_type_node,
12308						 endlink)));
12309  tree di_ftype_di_int_int
12310    = build_function_type (long_long_integer_type_node,
12311			   tree_cons (NULL_TREE, long_long_integer_type_node,
12312				      tree_cons (NULL_TREE, integer_type_node,
12313						 tree_cons (NULL_TREE,
12314							    integer_type_node,
12315							    endlink))));
12316  tree int_ftype_v8qi
12317    = build_function_type (integer_type_node,
12318			   tree_cons (NULL_TREE, V8QI_type_node,
12319				      endlink));
12320  tree int_ftype_v4hi
12321    = build_function_type (integer_type_node,
12322			   tree_cons (NULL_TREE, V4HI_type_node,
12323				      endlink));
12324  tree int_ftype_v2si
12325    = build_function_type (integer_type_node,
12326			   tree_cons (NULL_TREE, V2SI_type_node,
12327				      endlink));
12328  tree int_ftype_v8qi_int
12329    = build_function_type (integer_type_node,
12330			   tree_cons (NULL_TREE, V8QI_type_node,
12331				      tree_cons (NULL_TREE, integer_type_node,
12332						 endlink)));
12333  tree int_ftype_v4hi_int
12334    = build_function_type (integer_type_node,
12335			   tree_cons (NULL_TREE, V4HI_type_node,
12336				      tree_cons (NULL_TREE, integer_type_node,
12337						 endlink)));
12338  tree int_ftype_v2si_int
12339    = build_function_type (integer_type_node,
12340			   tree_cons (NULL_TREE, V2SI_type_node,
12341				      tree_cons (NULL_TREE, integer_type_node,
12342						 endlink)));
12343  tree v8qi_ftype_v8qi_int_int
12344    = build_function_type (V8QI_type_node,
12345			   tree_cons (NULL_TREE, V8QI_type_node,
12346				      tree_cons (NULL_TREE, integer_type_node,
12347						 tree_cons (NULL_TREE,
12348							    integer_type_node,
12349							    endlink))));
12350  tree v4hi_ftype_v4hi_int_int
12351    = build_function_type (V4HI_type_node,
12352			   tree_cons (NULL_TREE, V4HI_type_node,
12353				      tree_cons (NULL_TREE, integer_type_node,
12354						 tree_cons (NULL_TREE,
12355							    integer_type_node,
12356							    endlink))));
12357  tree v2si_ftype_v2si_int_int
12358    = build_function_type (V2SI_type_node,
12359			   tree_cons (NULL_TREE, V2SI_type_node,
12360				      tree_cons (NULL_TREE, integer_type_node,
12361						 tree_cons (NULL_TREE,
12362							    integer_type_node,
12363							    endlink))));
12364  /* Miscellaneous.  */
12365  tree v8qi_ftype_v4hi_v4hi
12366    = build_function_type (V8QI_type_node,
12367			   tree_cons (NULL_TREE, V4HI_type_node,
12368				      tree_cons (NULL_TREE, V4HI_type_node,
12369						 endlink)));
12370  tree v4hi_ftype_v2si_v2si
12371    = build_function_type (V4HI_type_node,
12372			   tree_cons (NULL_TREE, V2SI_type_node,
12373				      tree_cons (NULL_TREE, V2SI_type_node,
12374						 endlink)));
12375  tree v2si_ftype_v4hi_v4hi
12376    = build_function_type (V2SI_type_node,
12377			   tree_cons (NULL_TREE, V4HI_type_node,
12378				      tree_cons (NULL_TREE, V4HI_type_node,
12379						 endlink)));
12380  tree v2si_ftype_v8qi_v8qi
12381    = build_function_type (V2SI_type_node,
12382			   tree_cons (NULL_TREE, V8QI_type_node,
12383				      tree_cons (NULL_TREE, V8QI_type_node,
12384						 endlink)));
12385  tree v4hi_ftype_v4hi_di
12386    = build_function_type (V4HI_type_node,
12387			   tree_cons (NULL_TREE, V4HI_type_node,
12388				      tree_cons (NULL_TREE,
12389						 long_long_integer_type_node,
12390						 endlink)));
12391  tree v2si_ftype_v2si_di
12392    = build_function_type (V2SI_type_node,
12393			   tree_cons (NULL_TREE, V2SI_type_node,
12394				      tree_cons (NULL_TREE,
12395						 long_long_integer_type_node,
12396						 endlink)));
12397  tree void_ftype_int_int
12398    = build_function_type (void_type_node,
12399			   tree_cons (NULL_TREE, integer_type_node,
12400				      tree_cons (NULL_TREE, integer_type_node,
12401						 endlink)));
12402  tree di_ftype_void
12403    = build_function_type (long_long_unsigned_type_node, endlink);
12404  tree di_ftype_v8qi
12405    = build_function_type (long_long_integer_type_node,
12406			   tree_cons (NULL_TREE, V8QI_type_node,
12407				      endlink));
12408  tree di_ftype_v4hi
12409    = build_function_type (long_long_integer_type_node,
12410			   tree_cons (NULL_TREE, V4HI_type_node,
12411				      endlink));
12412  tree di_ftype_v2si
12413    = build_function_type (long_long_integer_type_node,
12414			   tree_cons (NULL_TREE, V2SI_type_node,
12415				      endlink));
12416  tree v2si_ftype_v4hi
12417    = build_function_type (V2SI_type_node,
12418			   tree_cons (NULL_TREE, V4HI_type_node,
12419				      endlink));
12420  tree v4hi_ftype_v8qi
12421    = build_function_type (V4HI_type_node,
12422			   tree_cons (NULL_TREE, V8QI_type_node,
12423				      endlink));
12424
12425  tree di_ftype_di_v4hi_v4hi
12426    = build_function_type (long_long_unsigned_type_node,
12427			   tree_cons (NULL_TREE,
12428				      long_long_unsigned_type_node,
12429				      tree_cons (NULL_TREE, V4HI_type_node,
12430						 tree_cons (NULL_TREE,
12431							    V4HI_type_node,
12432							    endlink))));
12433
12434  tree di_ftype_v4hi_v4hi
12435    = build_function_type (long_long_unsigned_type_node,
12436			   tree_cons (NULL_TREE, V4HI_type_node,
12437				      tree_cons (NULL_TREE, V4HI_type_node,
12438						 endlink)));
12439
12440  /* Normal vector binops.  */
12441  tree v8qi_ftype_v8qi_v8qi
12442    = build_function_type (V8QI_type_node,
12443			   tree_cons (NULL_TREE, V8QI_type_node,
12444				      tree_cons (NULL_TREE, V8QI_type_node,
12445						 endlink)));
12446  tree v4hi_ftype_v4hi_v4hi
12447    = build_function_type (V4HI_type_node,
12448			   tree_cons (NULL_TREE, V4HI_type_node,
12449				      tree_cons (NULL_TREE, V4HI_type_node,
12450						 endlink)));
12451  tree v2si_ftype_v2si_v2si
12452    = build_function_type (V2SI_type_node,
12453			   tree_cons (NULL_TREE, V2SI_type_node,
12454				      tree_cons (NULL_TREE, V2SI_type_node,
12455						 endlink)));
12456  tree di_ftype_di_di
12457    = build_function_type (long_long_unsigned_type_node,
12458			   tree_cons (NULL_TREE, long_long_unsigned_type_node,
12459				      tree_cons (NULL_TREE,
12460						 long_long_unsigned_type_node,
12461						 endlink)));
12462
12463  /* Add all builtins that are more or less simple operations on two
12464     operands.  */
12465  for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
12466    {
12467      /* Use one of the operands; the target can have a different mode for
12468	 mask-generating compares.  */
12469      enum machine_mode mode;
12470      tree type;
12471
12472      if (d->name == 0)
12473	continue;
12474
12475      mode = insn_data[d->icode].operand[1].mode;
12476
12477      switch (mode)
12478	{
12479	case V8QImode:
12480	  type = v8qi_ftype_v8qi_v8qi;
12481	  break;
12482	case V4HImode:
12483	  type = v4hi_ftype_v4hi_v4hi;
12484	  break;
12485	case V2SImode:
12486	  type = v2si_ftype_v2si_v2si;
12487	  break;
12488	case DImode:
12489	  type = di_ftype_di_di;
12490	  break;
12491
12492	default:
12493	  gcc_unreachable ();
12494	}
12495
12496      def_mbuiltin (d->mask, d->name, type, d->code);
12497    }
12498
12499  /* Add the remaining MMX insns with somewhat more complicated types.  */
12500  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wzero", di_ftype_void, ARM_BUILTIN_WZERO);
12501  def_mbuiltin (FL_IWMMXT, "__builtin_arm_setwcx", void_ftype_int_int, ARM_BUILTIN_SETWCX);
12502  def_mbuiltin (FL_IWMMXT, "__builtin_arm_getwcx", int_ftype_int, ARM_BUILTIN_GETWCX);
12503
12504  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSLLH);
12505  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllw", v2si_ftype_v2si_di, ARM_BUILTIN_WSLLW);
12506  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslld", di_ftype_di_di, ARM_BUILTIN_WSLLD);
12507  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSLLHI);
12508  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSLLWI);
12509  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslldi", di_ftype_di_int, ARM_BUILTIN_WSLLDI);
12510
12511  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRLH);
12512  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRLW);
12513  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrld", di_ftype_di_di, ARM_BUILTIN_WSRLD);
12514  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRLHI);
12515  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRLWI);
12516  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrldi", di_ftype_di_int, ARM_BUILTIN_WSRLDI);
12517
12518  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrah", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRAH);
12519  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsraw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRAW);
12520  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrad", di_ftype_di_di, ARM_BUILTIN_WSRAD);
12521  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrahi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRAHI);
12522  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrawi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRAWI);
12523  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsradi", di_ftype_di_int, ARM_BUILTIN_WSRADI);
12524
12525  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WRORH);
12526  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorw", v2si_ftype_v2si_di, ARM_BUILTIN_WRORW);
12527  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrord", di_ftype_di_di, ARM_BUILTIN_WRORD);
12528  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WRORHI);
12529  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorwi", v2si_ftype_v2si_int, ARM_BUILTIN_WRORWI);
12530  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrordi", di_ftype_di_int, ARM_BUILTIN_WRORDI);
12531
12532  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wshufh", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSHUFH);
12533
12534  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadb", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADB);
12535  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadh", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADH);
12536  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadbz", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADBZ);
12537  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadhz", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADHZ);
12538
12539  def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsb", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMSB);
12540  def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMSH);
12541  def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMSW);
12542  def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmub", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMUB);
12543  def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMUH);
12544  def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMUW);
12545  def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrb", v8qi_ftype_v8qi_int_int, ARM_BUILTIN_TINSRB);
12546  def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrh", v4hi_ftype_v4hi_int_int, ARM_BUILTIN_TINSRH);
12547  def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrw", v2si_ftype_v2si_int_int, ARM_BUILTIN_TINSRW);
12548
12549  def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccb", di_ftype_v8qi, ARM_BUILTIN_WACCB);
12550  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wacch", di_ftype_v4hi, ARM_BUILTIN_WACCH);
12551  def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccw", di_ftype_v2si, ARM_BUILTIN_WACCW);
12552
12553  def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskb", int_ftype_v8qi, ARM_BUILTIN_TMOVMSKB);
12554  def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskh", int_ftype_v4hi, ARM_BUILTIN_TMOVMSKH);
12555  def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskw", int_ftype_v2si, ARM_BUILTIN_TMOVMSKW);
12556
12557  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhss", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHSS);
12558  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhus", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHUS);
12559  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwus", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWUS);
12560  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwss", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWSS);
12561  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdus", v2si_ftype_di_di, ARM_BUILTIN_WPACKDUS);
12562  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdss", v2si_ftype_di_di, ARM_BUILTIN_WPACKDSS);
12563
12564  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHUB);
12565  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHUH);
12566  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHUW);
12567  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHSB);
12568  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHSH);
12569  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHSW);
12570  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELUB);
12571  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELUH);
12572  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELUW);
12573  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELSB);
12574  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELSH);
12575  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELSW);
12576
12577  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacs", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACS);
12578  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacsz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACSZ);
12579  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacu", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACU);
12580  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacuz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACUZ);
12581
12582  def_mbuiltin (FL_IWMMXT, "__builtin_arm_walign", v8qi_ftype_v8qi_v8qi_int, ARM_BUILTIN_WALIGN);
12583  def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmia", di_ftype_di_int_int, ARM_BUILTIN_TMIA);
12584  def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiaph", di_ftype_di_int_int, ARM_BUILTIN_TMIAPH);
12585  def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabb", di_ftype_di_int_int, ARM_BUILTIN_TMIABB);
12586  def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabt", di_ftype_di_int_int, ARM_BUILTIN_TMIABT);
12587  def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatb", di_ftype_di_int_int, ARM_BUILTIN_TMIATB);
12588  def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatt", di_ftype_di_int_int, ARM_BUILTIN_TMIATT);
12589}
12590
12591static void
12592arm_init_tls_builtins (void)
12593{
12594  tree ftype;
12595  tree nothrow = tree_cons (get_identifier ("nothrow"), NULL, NULL);
12596  tree const_nothrow = tree_cons (get_identifier ("const"), NULL, nothrow);
12597
12598  ftype = build_function_type (ptr_type_node, void_list_node);
12599  lang_hooks.builtin_function ("__builtin_thread_pointer", ftype,
12600			       ARM_BUILTIN_THREAD_POINTER, BUILT_IN_MD,
12601			       NULL, const_nothrow);
12602}
12603
12604static void
12605arm_init_builtins (void)
12606{
12607  arm_init_tls_builtins ();
12608
12609  if (TARGET_REALLY_IWMMXT)
12610    arm_init_iwmmxt_builtins ();
12611}
12612
12613/* Errors in the source file can cause expand_expr to return const0_rtx
12614   where we expect a vector.  To avoid crashing, use one of the vector
12615   clear instructions.  */
12616
12617static rtx
12618safe_vector_operand (rtx x, enum machine_mode mode)
12619{
12620  if (x != const0_rtx)
12621    return x;
12622  x = gen_reg_rtx (mode);
12623
12624  emit_insn (gen_iwmmxt_clrdi (mode == DImode ? x
12625			       : gen_rtx_SUBREG (DImode, x, 0)));
12626  return x;
12627}
12628
12629/* Subroutine of arm_expand_builtin to take care of binop insns.  */
12630
12631static rtx
12632arm_expand_binop_builtin (enum insn_code icode,
12633			  tree arglist, rtx target)
12634{
12635  rtx pat;
12636  tree arg0 = TREE_VALUE (arglist);
12637  tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12638  rtx op0 = expand_normal (arg0);
12639  rtx op1 = expand_normal (arg1);
12640  enum machine_mode tmode = insn_data[icode].operand[0].mode;
12641  enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12642  enum machine_mode mode1 = insn_data[icode].operand[2].mode;
12643
12644  if (VECTOR_MODE_P (mode0))
12645    op0 = safe_vector_operand (op0, mode0);
12646  if (VECTOR_MODE_P (mode1))
12647    op1 = safe_vector_operand (op1, mode1);
12648
12649  if (! target
12650      || GET_MODE (target) != tmode
12651      || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12652    target = gen_reg_rtx (tmode);
12653
12654  gcc_assert (GET_MODE (op0) == mode0 && GET_MODE (op1) == mode1);
12655
12656  if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12657    op0 = copy_to_mode_reg (mode0, op0);
12658  if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12659    op1 = copy_to_mode_reg (mode1, op1);
12660
12661  pat = GEN_FCN (icode) (target, op0, op1);
12662  if (! pat)
12663    return 0;
12664  emit_insn (pat);
12665  return target;
12666}
12667
12668/* Subroutine of arm_expand_builtin to take care of unop insns.  */
12669
12670static rtx
12671arm_expand_unop_builtin (enum insn_code icode,
12672			 tree arglist, rtx target, int do_load)
12673{
12674  rtx pat;
12675  tree arg0 = TREE_VALUE (arglist);
12676  rtx op0 = expand_normal (arg0);
12677  enum machine_mode tmode = insn_data[icode].operand[0].mode;
12678  enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12679
12680  if (! target
12681      || GET_MODE (target) != tmode
12682      || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12683    target = gen_reg_rtx (tmode);
12684  if (do_load)
12685    op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
12686  else
12687    {
12688      if (VECTOR_MODE_P (mode0))
12689	op0 = safe_vector_operand (op0, mode0);
12690
12691      if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12692	op0 = copy_to_mode_reg (mode0, op0);
12693    }
12694
12695  pat = GEN_FCN (icode) (target, op0);
12696  if (! pat)
12697    return 0;
12698  emit_insn (pat);
12699  return target;
12700}
12701
12702/* Expand an expression EXP that calls a built-in function,
12703   with result going to TARGET if that's convenient
12704   (and in mode MODE if that's convenient).
12705   SUBTARGET may be used as the target for computing one of EXP's operands.
12706   IGNORE is nonzero if the value is to be ignored.  */
12707
12708static rtx
12709arm_expand_builtin (tree exp,
12710		    rtx target,
12711		    rtx subtarget ATTRIBUTE_UNUSED,
12712		    enum machine_mode mode ATTRIBUTE_UNUSED,
12713		    int ignore ATTRIBUTE_UNUSED)
12714{
12715  const struct builtin_description * d;
12716  enum insn_code    icode;
12717  tree              fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
12718  tree              arglist = TREE_OPERAND (exp, 1);
12719  tree              arg0;
12720  tree              arg1;
12721  tree              arg2;
12722  rtx               op0;
12723  rtx               op1;
12724  rtx               op2;
12725  rtx               pat;
12726  int               fcode = DECL_FUNCTION_CODE (fndecl);
12727  size_t            i;
12728  enum machine_mode tmode;
12729  enum machine_mode mode0;
12730  enum machine_mode mode1;
12731  enum machine_mode mode2;
12732
12733  switch (fcode)
12734    {
12735    case ARM_BUILTIN_TEXTRMSB:
12736    case ARM_BUILTIN_TEXTRMUB:
12737    case ARM_BUILTIN_TEXTRMSH:
12738    case ARM_BUILTIN_TEXTRMUH:
12739    case ARM_BUILTIN_TEXTRMSW:
12740    case ARM_BUILTIN_TEXTRMUW:
12741      icode = (fcode == ARM_BUILTIN_TEXTRMSB ? CODE_FOR_iwmmxt_textrmsb
12742	       : fcode == ARM_BUILTIN_TEXTRMUB ? CODE_FOR_iwmmxt_textrmub
12743	       : fcode == ARM_BUILTIN_TEXTRMSH ? CODE_FOR_iwmmxt_textrmsh
12744	       : fcode == ARM_BUILTIN_TEXTRMUH ? CODE_FOR_iwmmxt_textrmuh
12745	       : CODE_FOR_iwmmxt_textrmw);
12746
12747      arg0 = TREE_VALUE (arglist);
12748      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12749      op0 = expand_normal (arg0);
12750      op1 = expand_normal (arg1);
12751      tmode = insn_data[icode].operand[0].mode;
12752      mode0 = insn_data[icode].operand[1].mode;
12753      mode1 = insn_data[icode].operand[2].mode;
12754
12755      if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12756	op0 = copy_to_mode_reg (mode0, op0);
12757      if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12758	{
12759	  /* @@@ better error message */
12760	  error ("selector must be an immediate");
12761	  return gen_reg_rtx (tmode);
12762	}
12763      if (target == 0
12764	  || GET_MODE (target) != tmode
12765	  || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12766	target = gen_reg_rtx (tmode);
12767      pat = GEN_FCN (icode) (target, op0, op1);
12768      if (! pat)
12769	return 0;
12770      emit_insn (pat);
12771      return target;
12772
12773    case ARM_BUILTIN_TINSRB:
12774    case ARM_BUILTIN_TINSRH:
12775    case ARM_BUILTIN_TINSRW:
12776      icode = (fcode == ARM_BUILTIN_TINSRB ? CODE_FOR_iwmmxt_tinsrb
12777	       : fcode == ARM_BUILTIN_TINSRH ? CODE_FOR_iwmmxt_tinsrh
12778	       : CODE_FOR_iwmmxt_tinsrw);
12779      arg0 = TREE_VALUE (arglist);
12780      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12781      arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
12782      op0 = expand_normal (arg0);
12783      op1 = expand_normal (arg1);
12784      op2 = expand_normal (arg2);
12785      tmode = insn_data[icode].operand[0].mode;
12786      mode0 = insn_data[icode].operand[1].mode;
12787      mode1 = insn_data[icode].operand[2].mode;
12788      mode2 = insn_data[icode].operand[3].mode;
12789
12790      if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12791	op0 = copy_to_mode_reg (mode0, op0);
12792      if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12793	op1 = copy_to_mode_reg (mode1, op1);
12794      if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
12795	{
12796	  /* @@@ better error message */
12797	  error ("selector must be an immediate");
12798	  return const0_rtx;
12799	}
12800      if (target == 0
12801	  || GET_MODE (target) != tmode
12802	  || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12803	target = gen_reg_rtx (tmode);
12804      pat = GEN_FCN (icode) (target, op0, op1, op2);
12805      if (! pat)
12806	return 0;
12807      emit_insn (pat);
12808      return target;
12809
12810    case ARM_BUILTIN_SETWCX:
12811      arg0 = TREE_VALUE (arglist);
12812      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12813      op0 = force_reg (SImode, expand_normal (arg0));
12814      op1 = expand_normal (arg1);
12815      emit_insn (gen_iwmmxt_tmcr (op1, op0));
12816      return 0;
12817
12818    case ARM_BUILTIN_GETWCX:
12819      arg0 = TREE_VALUE (arglist);
12820      op0 = expand_normal (arg0);
12821      target = gen_reg_rtx (SImode);
12822      emit_insn (gen_iwmmxt_tmrc (target, op0));
12823      return target;
12824
12825    case ARM_BUILTIN_WSHUFH:
12826      icode = CODE_FOR_iwmmxt_wshufh;
12827      arg0 = TREE_VALUE (arglist);
12828      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12829      op0 = expand_normal (arg0);
12830      op1 = expand_normal (arg1);
12831      tmode = insn_data[icode].operand[0].mode;
12832      mode1 = insn_data[icode].operand[1].mode;
12833      mode2 = insn_data[icode].operand[2].mode;
12834
12835      if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
12836	op0 = copy_to_mode_reg (mode1, op0);
12837      if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
12838	{
12839	  /* @@@ better error message */
12840	  error ("mask must be an immediate");
12841	  return const0_rtx;
12842	}
12843      if (target == 0
12844	  || GET_MODE (target) != tmode
12845	  || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12846	target = gen_reg_rtx (tmode);
12847      pat = GEN_FCN (icode) (target, op0, op1);
12848      if (! pat)
12849	return 0;
12850      emit_insn (pat);
12851      return target;
12852
12853    case ARM_BUILTIN_WSADB:
12854      return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadb, arglist, target);
12855    case ARM_BUILTIN_WSADH:
12856      return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadh, arglist, target);
12857    case ARM_BUILTIN_WSADBZ:
12858      return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, arglist, target);
12859    case ARM_BUILTIN_WSADHZ:
12860      return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, arglist, target);
12861
12862      /* Several three-argument builtins.  */
12863    case ARM_BUILTIN_WMACS:
12864    case ARM_BUILTIN_WMACU:
12865    case ARM_BUILTIN_WALIGN:
12866    case ARM_BUILTIN_TMIA:
12867    case ARM_BUILTIN_TMIAPH:
12868    case ARM_BUILTIN_TMIATT:
12869    case ARM_BUILTIN_TMIATB:
12870    case ARM_BUILTIN_TMIABT:
12871    case ARM_BUILTIN_TMIABB:
12872      icode = (fcode == ARM_BUILTIN_WMACS ? CODE_FOR_iwmmxt_wmacs
12873	       : fcode == ARM_BUILTIN_WMACU ? CODE_FOR_iwmmxt_wmacu
12874	       : fcode == ARM_BUILTIN_TMIA ? CODE_FOR_iwmmxt_tmia
12875	       : fcode == ARM_BUILTIN_TMIAPH ? CODE_FOR_iwmmxt_tmiaph
12876	       : fcode == ARM_BUILTIN_TMIABB ? CODE_FOR_iwmmxt_tmiabb
12877	       : fcode == ARM_BUILTIN_TMIABT ? CODE_FOR_iwmmxt_tmiabt
12878	       : fcode == ARM_BUILTIN_TMIATB ? CODE_FOR_iwmmxt_tmiatb
12879	       : fcode == ARM_BUILTIN_TMIATT ? CODE_FOR_iwmmxt_tmiatt
12880	       : CODE_FOR_iwmmxt_walign);
12881      arg0 = TREE_VALUE (arglist);
12882      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12883      arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
12884      op0 = expand_normal (arg0);
12885      op1 = expand_normal (arg1);
12886      op2 = expand_normal (arg2);
12887      tmode = insn_data[icode].operand[0].mode;
12888      mode0 = insn_data[icode].operand[1].mode;
12889      mode1 = insn_data[icode].operand[2].mode;
12890      mode2 = insn_data[icode].operand[3].mode;
12891
12892      if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12893	op0 = copy_to_mode_reg (mode0, op0);
12894      if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12895	op1 = copy_to_mode_reg (mode1, op1);
12896      if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
12897	op2 = copy_to_mode_reg (mode2, op2);
12898      if (target == 0
12899	  || GET_MODE (target) != tmode
12900	  || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12901	target = gen_reg_rtx (tmode);
12902      pat = GEN_FCN (icode) (target, op0, op1, op2);
12903      if (! pat)
12904	return 0;
12905      emit_insn (pat);
12906      return target;
12907
12908    case ARM_BUILTIN_WZERO:
12909      target = gen_reg_rtx (DImode);
12910      emit_insn (gen_iwmmxt_clrdi (target));
12911      return target;
12912
12913    case ARM_BUILTIN_THREAD_POINTER:
12914      return arm_load_tp (target);
12915
12916    default:
12917      break;
12918    }
12919
12920  for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
12921    if (d->code == (const enum arm_builtins) fcode)
12922      return arm_expand_binop_builtin (d->icode, arglist, target);
12923
12924  for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
12925    if (d->code == (const enum arm_builtins) fcode)
12926      return arm_expand_unop_builtin (d->icode, arglist, target, 0);
12927
12928  /* @@@ Should really do something sensible here.  */
12929  return NULL_RTX;
12930}
12931
12932/* Return the number (counting from 0) of
12933   the least significant set bit in MASK.  */
12934
12935inline static int
12936number_of_first_bit_set (unsigned mask)
12937{
12938  int bit;
12939
12940  for (bit = 0;
12941       (mask & (1 << bit)) == 0;
12942       ++bit)
12943    continue;
12944
12945  return bit;
12946}
12947
12948/* Emit code to push or pop registers to or from the stack.  F is the
12949   assembly file.  MASK is the registers to push or pop.  PUSH is
12950   nonzero if we should push, and zero if we should pop.  For debugging
12951   output, if pushing, adjust CFA_OFFSET by the amount of space added
12952   to the stack.  REAL_REGS should have the same number of bits set as
12953   MASK, and will be used instead (in the same order) to describe which
12954   registers were saved - this is used to mark the save slots when we
12955   push high registers after moving them to low registers.  */
12956static void
12957thumb_pushpop (FILE *f, unsigned long mask, int push, int *cfa_offset,
12958	       unsigned long real_regs)
12959{
12960  int regno;
12961  int lo_mask = mask & 0xFF;
12962  int pushed_words = 0;
12963
12964  gcc_assert (mask);
12965
12966  if (lo_mask == 0 && !push && (mask & (1 << PC_REGNUM)))
12967    {
12968      /* Special case.  Do not generate a POP PC statement here, do it in
12969	 thumb_exit() */
12970      thumb_exit (f, -1);
12971      return;
12972    }
12973
12974  if (ARM_EABI_UNWIND_TABLES && push)
12975    {
12976      fprintf (f, "\t.save\t{");
12977      for (regno = 0; regno < 15; regno++)
12978	{
12979	  if (real_regs & (1 << regno))
12980	    {
12981	      if (real_regs & ((1 << regno) -1))
12982		fprintf (f, ", ");
12983	      asm_fprintf (f, "%r", regno);
12984	    }
12985	}
12986      fprintf (f, "}\n");
12987    }
12988
12989  fprintf (f, "\t%s\t{", push ? "push" : "pop");
12990
12991  /* Look at the low registers first.  */
12992  for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
12993    {
12994      if (lo_mask & 1)
12995	{
12996	  asm_fprintf (f, "%r", regno);
12997
12998	  if ((lo_mask & ~1) != 0)
12999	    fprintf (f, ", ");
13000
13001	  pushed_words++;
13002	}
13003    }
13004
13005  if (push && (mask & (1 << LR_REGNUM)))
13006    {
13007      /* Catch pushing the LR.  */
13008      if (mask & 0xFF)
13009	fprintf (f, ", ");
13010
13011      asm_fprintf (f, "%r", LR_REGNUM);
13012
13013      pushed_words++;
13014    }
13015  else if (!push && (mask & (1 << PC_REGNUM)))
13016    {
13017      /* Catch popping the PC.  */
13018      if (TARGET_INTERWORK || TARGET_BACKTRACE
13019	  || current_function_calls_eh_return)
13020	{
13021	  /* The PC is never poped directly, instead
13022	     it is popped into r3 and then BX is used.  */
13023	  fprintf (f, "}\n");
13024
13025	  thumb_exit (f, -1);
13026
13027	  return;
13028	}
13029      else
13030	{
13031	  if (mask & 0xFF)
13032	    fprintf (f, ", ");
13033
13034	  asm_fprintf (f, "%r", PC_REGNUM);
13035	}
13036    }
13037
13038  fprintf (f, "}\n");
13039
13040  if (push && pushed_words && dwarf2out_do_frame ())
13041    {
13042      char *l = dwarf2out_cfi_label ();
13043      int pushed_mask = real_regs;
13044
13045      *cfa_offset += pushed_words * 4;
13046      dwarf2out_def_cfa (l, SP_REGNUM, *cfa_offset);
13047
13048      pushed_words = 0;
13049      pushed_mask = real_regs;
13050      for (regno = 0; regno <= 14; regno++, pushed_mask >>= 1)
13051	{
13052	  if (pushed_mask & 1)
13053	    dwarf2out_reg_save (l, regno, 4 * pushed_words++ - *cfa_offset);
13054	}
13055    }
13056}
13057
13058/* Generate code to return from a thumb function.
13059   If 'reg_containing_return_addr' is -1, then the return address is
13060   actually on the stack, at the stack pointer.  */
13061static void
13062thumb_exit (FILE *f, int reg_containing_return_addr)
13063{
13064  unsigned regs_available_for_popping;
13065  unsigned regs_to_pop;
13066  int pops_needed;
13067  unsigned available;
13068  unsigned required;
13069  int mode;
13070  int size;
13071  int restore_a4 = FALSE;
13072
13073  /* Compute the registers we need to pop.  */
13074  regs_to_pop = 0;
13075  pops_needed = 0;
13076
13077  if (reg_containing_return_addr == -1)
13078    {
13079      regs_to_pop |= 1 << LR_REGNUM;
13080      ++pops_needed;
13081    }
13082
13083  if (TARGET_BACKTRACE)
13084    {
13085      /* Restore the (ARM) frame pointer and stack pointer.  */
13086      regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
13087      pops_needed += 2;
13088    }
13089
13090  /* If there is nothing to pop then just emit the BX instruction and
13091     return.  */
13092  if (pops_needed == 0)
13093    {
13094      if (current_function_calls_eh_return)
13095	asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
13096
13097      asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
13098      return;
13099    }
13100  /* Otherwise if we are not supporting interworking and we have not created
13101     a backtrace structure and the function was not entered in ARM mode then
13102     just pop the return address straight into the PC.  */
13103  else if (!TARGET_INTERWORK
13104	   && !TARGET_BACKTRACE
13105	   && !is_called_in_ARM_mode (current_function_decl)
13106	   && !current_function_calls_eh_return)
13107    {
13108      asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
13109      return;
13110    }
13111
13112  /* Find out how many of the (return) argument registers we can corrupt.  */
13113  regs_available_for_popping = 0;
13114
13115  /* If returning via __builtin_eh_return, the bottom three registers
13116     all contain information needed for the return.  */
13117  if (current_function_calls_eh_return)
13118    size = 12;
13119  else
13120    {
13121      /* If we can deduce the registers used from the function's
13122	 return value.  This is more reliable that examining
13123	 regs_ever_live[] because that will be set if the register is
13124	 ever used in the function, not just if the register is used
13125	 to hold a return value.  */
13126
13127      if (current_function_return_rtx != 0)
13128	mode = GET_MODE (current_function_return_rtx);
13129      else
13130	mode = DECL_MODE (DECL_RESULT (current_function_decl));
13131
13132      size = GET_MODE_SIZE (mode);
13133
13134      if (size == 0)
13135	{
13136	  /* In a void function we can use any argument register.
13137	     In a function that returns a structure on the stack
13138	     we can use the second and third argument registers.  */
13139	  if (mode == VOIDmode)
13140	    regs_available_for_popping =
13141	      (1 << ARG_REGISTER (1))
13142	      | (1 << ARG_REGISTER (2))
13143	      | (1 << ARG_REGISTER (3));
13144	  else
13145	    regs_available_for_popping =
13146	      (1 << ARG_REGISTER (2))
13147	      | (1 << ARG_REGISTER (3));
13148	}
13149      else if (size <= 4)
13150	regs_available_for_popping =
13151	  (1 << ARG_REGISTER (2))
13152	  | (1 << ARG_REGISTER (3));
13153      else if (size <= 8)
13154	regs_available_for_popping =
13155	  (1 << ARG_REGISTER (3));
13156    }
13157
13158  /* Match registers to be popped with registers into which we pop them.  */
13159  for (available = regs_available_for_popping,
13160       required  = regs_to_pop;
13161       required != 0 && available != 0;
13162       available &= ~(available & - available),
13163       required  &= ~(required  & - required))
13164    -- pops_needed;
13165
13166  /* If we have any popping registers left over, remove them.  */
13167  if (available > 0)
13168    regs_available_for_popping &= ~available;
13169
13170  /* Otherwise if we need another popping register we can use
13171     the fourth argument register.  */
13172  else if (pops_needed)
13173    {
13174      /* If we have not found any free argument registers and
13175	 reg a4 contains the return address, we must move it.  */
13176      if (regs_available_for_popping == 0
13177	  && reg_containing_return_addr == LAST_ARG_REGNUM)
13178	{
13179	  asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
13180	  reg_containing_return_addr = LR_REGNUM;
13181	}
13182      else if (size > 12)
13183	{
13184	  /* Register a4 is being used to hold part of the return value,
13185	     but we have dire need of a free, low register.  */
13186	  restore_a4 = TRUE;
13187
13188	  asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
13189	}
13190
13191      if (reg_containing_return_addr != LAST_ARG_REGNUM)
13192	{
13193	  /* The fourth argument register is available.  */
13194	  regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
13195
13196	  --pops_needed;
13197	}
13198    }
13199
13200  /* Pop as many registers as we can.  */
13201  thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
13202		 regs_available_for_popping);
13203
13204  /* Process the registers we popped.  */
13205  if (reg_containing_return_addr == -1)
13206    {
13207      /* The return address was popped into the lowest numbered register.  */
13208      regs_to_pop &= ~(1 << LR_REGNUM);
13209
13210      reg_containing_return_addr =
13211	number_of_first_bit_set (regs_available_for_popping);
13212
13213      /* Remove this register for the mask of available registers, so that
13214         the return address will not be corrupted by further pops.  */
13215      regs_available_for_popping &= ~(1 << reg_containing_return_addr);
13216    }
13217
13218  /* If we popped other registers then handle them here.  */
13219  if (regs_available_for_popping)
13220    {
13221      int frame_pointer;
13222
13223      /* Work out which register currently contains the frame pointer.  */
13224      frame_pointer = number_of_first_bit_set (regs_available_for_popping);
13225
13226      /* Move it into the correct place.  */
13227      asm_fprintf (f, "\tmov\t%r, %r\n",
13228		   ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
13229
13230      /* (Temporarily) remove it from the mask of popped registers.  */
13231      regs_available_for_popping &= ~(1 << frame_pointer);
13232      regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
13233
13234      if (regs_available_for_popping)
13235	{
13236	  int stack_pointer;
13237
13238	  /* We popped the stack pointer as well,
13239	     find the register that contains it.  */
13240	  stack_pointer = number_of_first_bit_set (regs_available_for_popping);
13241
13242	  /* Move it into the stack register.  */
13243	  asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
13244
13245	  /* At this point we have popped all necessary registers, so
13246	     do not worry about restoring regs_available_for_popping
13247	     to its correct value:
13248
13249	     assert (pops_needed == 0)
13250	     assert (regs_available_for_popping == (1 << frame_pointer))
13251	     assert (regs_to_pop == (1 << STACK_POINTER))  */
13252	}
13253      else
13254	{
13255	  /* Since we have just move the popped value into the frame
13256	     pointer, the popping register is available for reuse, and
13257	     we know that we still have the stack pointer left to pop.  */
13258	  regs_available_for_popping |= (1 << frame_pointer);
13259	}
13260    }
13261
13262  /* If we still have registers left on the stack, but we no longer have
13263     any registers into which we can pop them, then we must move the return
13264     address into the link register and make available the register that
13265     contained it.  */
13266  if (regs_available_for_popping == 0 && pops_needed > 0)
13267    {
13268      regs_available_for_popping |= 1 << reg_containing_return_addr;
13269
13270      asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
13271		   reg_containing_return_addr);
13272
13273      reg_containing_return_addr = LR_REGNUM;
13274    }
13275
13276  /* If we have registers left on the stack then pop some more.
13277     We know that at most we will want to pop FP and SP.  */
13278  if (pops_needed > 0)
13279    {
13280      int  popped_into;
13281      int  move_to;
13282
13283      thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
13284		     regs_available_for_popping);
13285
13286      /* We have popped either FP or SP.
13287	 Move whichever one it is into the correct register.  */
13288      popped_into = number_of_first_bit_set (regs_available_for_popping);
13289      move_to     = number_of_first_bit_set (regs_to_pop);
13290
13291      asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
13292
13293      regs_to_pop &= ~(1 << move_to);
13294
13295      --pops_needed;
13296    }
13297
13298  /* If we still have not popped everything then we must have only
13299     had one register available to us and we are now popping the SP.  */
13300  if (pops_needed > 0)
13301    {
13302      int  popped_into;
13303
13304      thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
13305		     regs_available_for_popping);
13306
13307      popped_into = number_of_first_bit_set (regs_available_for_popping);
13308
13309      asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
13310      /*
13311	assert (regs_to_pop == (1 << STACK_POINTER))
13312	assert (pops_needed == 1)
13313      */
13314    }
13315
13316  /* If necessary restore the a4 register.  */
13317  if (restore_a4)
13318    {
13319      if (reg_containing_return_addr != LR_REGNUM)
13320	{
13321	  asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
13322	  reg_containing_return_addr = LR_REGNUM;
13323	}
13324
13325      asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
13326    }
13327
13328  if (current_function_calls_eh_return)
13329    asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
13330
13331  /* Return to caller.  */
13332  asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
13333}
13334
13335
13336void
13337thumb_final_prescan_insn (rtx insn)
13338{
13339  if (flag_print_asm_name)
13340    asm_fprintf (asm_out_file, "%@ 0x%04x\n",
13341		 INSN_ADDRESSES (INSN_UID (insn)));
13342}
13343
13344int
13345thumb_shiftable_const (unsigned HOST_WIDE_INT val)
13346{
13347  unsigned HOST_WIDE_INT mask = 0xff;
13348  int i;
13349
13350  if (val == 0) /* XXX */
13351    return 0;
13352
13353  for (i = 0; i < 25; i++)
13354    if ((val & (mask << i)) == val)
13355      return 1;
13356
13357  return 0;
13358}
13359
13360/* Returns nonzero if the current function contains,
13361   or might contain a far jump.  */
13362static int
13363thumb_far_jump_used_p (void)
13364{
13365  rtx insn;
13366
13367  /* This test is only important for leaf functions.  */
13368  /* assert (!leaf_function_p ()); */
13369
13370  /* If we have already decided that far jumps may be used,
13371     do not bother checking again, and always return true even if
13372     it turns out that they are not being used.  Once we have made
13373     the decision that far jumps are present (and that hence the link
13374     register will be pushed onto the stack) we cannot go back on it.  */
13375  if (cfun->machine->far_jump_used)
13376    return 1;
13377
13378  /* If this function is not being called from the prologue/epilogue
13379     generation code then it must be being called from the
13380     INITIAL_ELIMINATION_OFFSET macro.  */
13381  if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
13382    {
13383      /* In this case we know that we are being asked about the elimination
13384	 of the arg pointer register.  If that register is not being used,
13385	 then there are no arguments on the stack, and we do not have to
13386	 worry that a far jump might force the prologue to push the link
13387	 register, changing the stack offsets.  In this case we can just
13388	 return false, since the presence of far jumps in the function will
13389	 not affect stack offsets.
13390
13391	 If the arg pointer is live (or if it was live, but has now been
13392	 eliminated and so set to dead) then we do have to test to see if
13393	 the function might contain a far jump.  This test can lead to some
13394	 false negatives, since before reload is completed, then length of
13395	 branch instructions is not known, so gcc defaults to returning their
13396	 longest length, which in turn sets the far jump attribute to true.
13397
13398	 A false negative will not result in bad code being generated, but it
13399	 will result in a needless push and pop of the link register.  We
13400	 hope that this does not occur too often.
13401
13402	 If we need doubleword stack alignment this could affect the other
13403	 elimination offsets so we can't risk getting it wrong.  */
13404      if (regs_ever_live [ARG_POINTER_REGNUM])
13405	cfun->machine->arg_pointer_live = 1;
13406      else if (!cfun->machine->arg_pointer_live)
13407	return 0;
13408    }
13409
13410  /* Check to see if the function contains a branch
13411     insn with the far jump attribute set.  */
13412  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
13413    {
13414      if (GET_CODE (insn) == JUMP_INSN
13415	  /* Ignore tablejump patterns.  */
13416	  && GET_CODE (PATTERN (insn)) != ADDR_VEC
13417	  && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
13418	  && get_attr_far_jump (insn) == FAR_JUMP_YES
13419	  )
13420	{
13421	  /* Record the fact that we have decided that
13422	     the function does use far jumps.  */
13423	  cfun->machine->far_jump_used = 1;
13424	  return 1;
13425	}
13426    }
13427
13428  return 0;
13429}
13430
13431/* Return nonzero if FUNC must be entered in ARM mode.  */
13432int
13433is_called_in_ARM_mode (tree func)
13434{
13435  gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
13436
13437  /* Ignore the problem about functions whose address is taken.  */
13438  if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
13439    return TRUE;
13440
13441#ifdef ARM_PE
13442  return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
13443#else
13444  return FALSE;
13445#endif
13446}
13447
13448/* The bits which aren't usefully expanded as rtl.  */
13449const char *
13450thumb_unexpanded_epilogue (void)
13451{
13452  int regno;
13453  unsigned long live_regs_mask = 0;
13454  int high_regs_pushed = 0;
13455  int had_to_push_lr;
13456  int size;
13457
13458  if (return_used_this_function)
13459    return "";
13460
13461  if (IS_NAKED (arm_current_func_type ()))
13462    return "";
13463
13464  live_regs_mask = thumb_compute_save_reg_mask ();
13465  high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
13466
13467  /* If we can deduce the registers used from the function's return value.
13468     This is more reliable that examining regs_ever_live[] because that
13469     will be set if the register is ever used in the function, not just if
13470     the register is used to hold a return value.  */
13471  size = arm_size_return_regs ();
13472
13473  /* The prolog may have pushed some high registers to use as
13474     work registers.  e.g. the testsuite file:
13475     gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
13476     compiles to produce:
13477	push	{r4, r5, r6, r7, lr}
13478	mov	r7, r9
13479	mov	r6, r8
13480	push	{r6, r7}
13481     as part of the prolog.  We have to undo that pushing here.  */
13482
13483  if (high_regs_pushed)
13484    {
13485      unsigned long mask = live_regs_mask & 0xff;
13486      int next_hi_reg;
13487
13488      /* The available low registers depend on the size of the value we are
13489         returning.  */
13490      if (size <= 12)
13491	mask |=  1 << 3;
13492      if (size <= 8)
13493	mask |= 1 << 2;
13494
13495      if (mask == 0)
13496	/* Oh dear!  We have no low registers into which we can pop
13497           high registers!  */
13498	internal_error
13499	  ("no low registers available for popping high registers");
13500
13501      for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
13502	if (live_regs_mask & (1 << next_hi_reg))
13503	  break;
13504
13505      while (high_regs_pushed)
13506	{
13507	  /* Find lo register(s) into which the high register(s) can
13508             be popped.  */
13509	  for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
13510	    {
13511	      if (mask & (1 << regno))
13512		high_regs_pushed--;
13513	      if (high_regs_pushed == 0)
13514		break;
13515	    }
13516
13517	  mask &= (2 << regno) - 1;	/* A noop if regno == 8 */
13518
13519	  /* Pop the values into the low register(s).  */
13520	  thumb_pushpop (asm_out_file, mask, 0, NULL, mask);
13521
13522	  /* Move the value(s) into the high registers.  */
13523	  for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
13524	    {
13525	      if (mask & (1 << regno))
13526		{
13527		  asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
13528			       regno);
13529
13530		  for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
13531		    if (live_regs_mask & (1 << next_hi_reg))
13532		      break;
13533		}
13534	    }
13535	}
13536      live_regs_mask &= ~0x0f00;
13537    }
13538
13539  had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
13540  live_regs_mask &= 0xff;
13541
13542  if (current_function_pretend_args_size == 0 || TARGET_BACKTRACE)
13543    {
13544      /* Pop the return address into the PC.  */
13545      if (had_to_push_lr)
13546	live_regs_mask |= 1 << PC_REGNUM;
13547
13548      /* Either no argument registers were pushed or a backtrace
13549	 structure was created which includes an adjusted stack
13550	 pointer, so just pop everything.  */
13551      if (live_regs_mask)
13552	thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL,
13553		       live_regs_mask);
13554
13555      /* We have either just popped the return address into the
13556	 PC or it is was kept in LR for the entire function.  */
13557      if (!had_to_push_lr)
13558	thumb_exit (asm_out_file, LR_REGNUM);
13559    }
13560  else
13561    {
13562      /* Pop everything but the return address.  */
13563      if (live_regs_mask)
13564	thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL,
13565		       live_regs_mask);
13566
13567      if (had_to_push_lr)
13568	{
13569	  if (size > 12)
13570	    {
13571	      /* We have no free low regs, so save one.  */
13572	      asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
13573			   LAST_ARG_REGNUM);
13574	    }
13575
13576	  /* Get the return address into a temporary register.  */
13577	  thumb_pushpop (asm_out_file, 1 << LAST_ARG_REGNUM, 0, NULL,
13578			 1 << LAST_ARG_REGNUM);
13579
13580	  if (size > 12)
13581	    {
13582	      /* Move the return address to lr.  */
13583	      asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
13584			   LAST_ARG_REGNUM);
13585	      /* Restore the low register.  */
13586	      asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
13587			   IP_REGNUM);
13588	      regno = LR_REGNUM;
13589	    }
13590	  else
13591	    regno = LAST_ARG_REGNUM;
13592	}
13593      else
13594	regno = LR_REGNUM;
13595
13596      /* Remove the argument registers that were pushed onto the stack.  */
13597      asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
13598		   SP_REGNUM, SP_REGNUM,
13599		   current_function_pretend_args_size);
13600
13601      thumb_exit (asm_out_file, regno);
13602    }
13603
13604  return "";
13605}
13606
13607/* Functions to save and restore machine-specific function data.  */
13608static struct machine_function *
13609arm_init_machine_status (void)
13610{
13611  struct machine_function *machine;
13612  machine = (machine_function *) ggc_alloc_cleared (sizeof (machine_function));
13613
13614#if ARM_FT_UNKNOWN != 0
13615  machine->func_type = ARM_FT_UNKNOWN;
13616#endif
13617  return machine;
13618}
13619
13620/* Return an RTX indicating where the return address to the
13621   calling function can be found.  */
13622rtx
13623arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
13624{
13625  if (count != 0)
13626    return NULL_RTX;
13627
13628  return get_hard_reg_initial_val (Pmode, LR_REGNUM);
13629}
13630
13631/* Do anything needed before RTL is emitted for each function.  */
13632void
13633arm_init_expanders (void)
13634{
13635  /* Arrange to initialize and mark the machine per-function status.  */
13636  init_machine_status = arm_init_machine_status;
13637
13638  /* This is to stop the combine pass optimizing away the alignment
13639     adjustment of va_arg.  */
13640  /* ??? It is claimed that this should not be necessary.  */
13641  if (cfun)
13642    mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
13643}
13644
13645
13646/* Like arm_compute_initial_elimination offset.  Simpler because there
13647   isn't an ABI specified frame pointer for Thumb.  Instead, we set it
13648   to point at the base of the local variables after static stack
13649   space for a function has been allocated.  */
13650
13651HOST_WIDE_INT
13652thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
13653{
13654  arm_stack_offsets *offsets;
13655
13656  offsets = arm_get_frame_offsets ();
13657
13658  switch (from)
13659    {
13660    case ARG_POINTER_REGNUM:
13661      switch (to)
13662	{
13663	case STACK_POINTER_REGNUM:
13664	  return offsets->outgoing_args - offsets->saved_args;
13665
13666	case FRAME_POINTER_REGNUM:
13667	  return offsets->soft_frame - offsets->saved_args;
13668
13669	case ARM_HARD_FRAME_POINTER_REGNUM:
13670	  return offsets->saved_regs - offsets->saved_args;
13671
13672	case THUMB_HARD_FRAME_POINTER_REGNUM:
13673	  return offsets->locals_base - offsets->saved_args;
13674
13675	default:
13676	  gcc_unreachable ();
13677	}
13678      break;
13679
13680    case FRAME_POINTER_REGNUM:
13681      switch (to)
13682	{
13683	case STACK_POINTER_REGNUM:
13684	  return offsets->outgoing_args - offsets->soft_frame;
13685
13686	case ARM_HARD_FRAME_POINTER_REGNUM:
13687	  return offsets->saved_regs - offsets->soft_frame;
13688
13689	case THUMB_HARD_FRAME_POINTER_REGNUM:
13690	  return offsets->locals_base - offsets->soft_frame;
13691
13692	default:
13693	  gcc_unreachable ();
13694	}
13695      break;
13696
13697    default:
13698      gcc_unreachable ();
13699    }
13700}
13701
13702
13703/* Generate the rest of a function's prologue.  */
13704void
13705thumb_expand_prologue (void)
13706{
13707  rtx insn, dwarf;
13708
13709  HOST_WIDE_INT amount;
13710  arm_stack_offsets *offsets;
13711  unsigned long func_type;
13712  int regno;
13713  unsigned long live_regs_mask;
13714
13715  func_type = arm_current_func_type ();
13716
13717  /* Naked functions don't have prologues.  */
13718  if (IS_NAKED (func_type))
13719    return;
13720
13721  if (IS_INTERRUPT (func_type))
13722    {
13723      error ("interrupt Service Routines cannot be coded in Thumb mode");
13724      return;
13725    }
13726
13727  live_regs_mask = thumb_compute_save_reg_mask ();
13728  /* Load the pic register before setting the frame pointer,
13729     so we can use r7 as a temporary work register.  */
13730  if (flag_pic && arm_pic_register != INVALID_REGNUM)
13731    arm_load_pic_register (live_regs_mask);
13732
13733  if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
13734    emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
13735		    stack_pointer_rtx);
13736
13737  offsets = arm_get_frame_offsets ();
13738  amount = offsets->outgoing_args - offsets->saved_regs;
13739  if (amount)
13740    {
13741      if (amount < 512)
13742	{
13743	  insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
13744					GEN_INT (- amount)));
13745	  RTX_FRAME_RELATED_P (insn) = 1;
13746	}
13747      else
13748	{
13749	  rtx reg;
13750
13751	  /* The stack decrement is too big for an immediate value in a single
13752	     insn.  In theory we could issue multiple subtracts, but after
13753	     three of them it becomes more space efficient to place the full
13754	     value in the constant pool and load into a register.  (Also the
13755	     ARM debugger really likes to see only one stack decrement per
13756	     function).  So instead we look for a scratch register into which
13757	     we can load the decrement, and then we subtract this from the
13758	     stack pointer.  Unfortunately on the thumb the only available
13759	     scratch registers are the argument registers, and we cannot use
13760	     these as they may hold arguments to the function.  Instead we
13761	     attempt to locate a call preserved register which is used by this
13762	     function.  If we can find one, then we know that it will have
13763	     been pushed at the start of the prologue and so we can corrupt
13764	     it now.  */
13765	  for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
13766	    if (live_regs_mask & (1 << regno)
13767		&& !(frame_pointer_needed
13768		     && (regno == THUMB_HARD_FRAME_POINTER_REGNUM)))
13769	      break;
13770
13771	  if (regno > LAST_LO_REGNUM) /* Very unlikely.  */
13772	    {
13773	      rtx spare = gen_rtx_REG (SImode, IP_REGNUM);
13774
13775	      /* Choose an arbitrary, non-argument low register.  */
13776	      reg = gen_rtx_REG (SImode, LAST_LO_REGNUM);
13777
13778	      /* Save it by copying it into a high, scratch register.  */
13779	      emit_insn (gen_movsi (spare, reg));
13780	      /* Add a USE to stop propagate_one_insn() from barfing.  */
13781	      emit_insn (gen_prologue_use (spare));
13782
13783	      /* Decrement the stack.  */
13784	      emit_insn (gen_movsi (reg, GEN_INT (- amount)));
13785	      insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
13786					    stack_pointer_rtx, reg));
13787	      RTX_FRAME_RELATED_P (insn) = 1;
13788	      dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
13789				   plus_constant (stack_pointer_rtx,
13790						  -amount));
13791	      RTX_FRAME_RELATED_P (dwarf) = 1;
13792	      REG_NOTES (insn)
13793		= gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
13794				     REG_NOTES (insn));
13795
13796	      /* Restore the low register's original value.  */
13797	      emit_insn (gen_movsi (reg, spare));
13798
13799	      /* Emit a USE of the restored scratch register, so that flow
13800		 analysis will not consider the restore redundant.  The
13801		 register won't be used again in this function and isn't
13802		 restored by the epilogue.  */
13803	      emit_insn (gen_prologue_use (reg));
13804	    }
13805	  else
13806	    {
13807	      reg = gen_rtx_REG (SImode, regno);
13808
13809	      emit_insn (gen_movsi (reg, GEN_INT (- amount)));
13810
13811	      insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
13812					    stack_pointer_rtx, reg));
13813	      RTX_FRAME_RELATED_P (insn) = 1;
13814	      dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
13815				   plus_constant (stack_pointer_rtx,
13816						  -amount));
13817	      RTX_FRAME_RELATED_P (dwarf) = 1;
13818	      REG_NOTES (insn)
13819		= gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
13820				     REG_NOTES (insn));
13821	    }
13822	}
13823    }
13824
13825  if (frame_pointer_needed)
13826    {
13827      amount = offsets->outgoing_args - offsets->locals_base;
13828
13829      if (amount < 1024)
13830	insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
13831				      stack_pointer_rtx, GEN_INT (amount)));
13832      else
13833	{
13834	  emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
13835	  insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
13836					hard_frame_pointer_rtx,
13837					stack_pointer_rtx));
13838	  dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
13839			       plus_constant (stack_pointer_rtx, amount));
13840	  RTX_FRAME_RELATED_P (dwarf) = 1;
13841	  REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
13842						REG_NOTES (insn));
13843	}
13844
13845      RTX_FRAME_RELATED_P (insn) = 1;
13846    }
13847
13848  /* If we are profiling, make sure no instructions are scheduled before
13849     the call to mcount.  Similarly if the user has requested no
13850     scheduling in the prolog.  Similarly if we want non-call exceptions
13851     using the EABI unwinder, to prevent faulting instructions from being
13852     swapped with a stack adjustment.  */
13853  if (current_function_profile || !TARGET_SCHED_PROLOG
13854      || (ARM_EABI_UNWIND_TABLES && flag_non_call_exceptions))
13855    emit_insn (gen_blockage ());
13856
13857  cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
13858  if (live_regs_mask & 0xff)
13859    cfun->machine->lr_save_eliminated = 0;
13860
13861  /* If the link register is being kept alive, with the return address in it,
13862     then make sure that it does not get reused by the ce2 pass.  */
13863  if (cfun->machine->lr_save_eliminated)
13864    emit_insn (gen_prologue_use (gen_rtx_REG (SImode, LR_REGNUM)));
13865}
13866
13867
13868void
13869thumb_expand_epilogue (void)
13870{
13871  HOST_WIDE_INT amount;
13872  arm_stack_offsets *offsets;
13873  int regno;
13874
13875  /* Naked functions don't have prologues.  */
13876  if (IS_NAKED (arm_current_func_type ()))
13877    return;
13878
13879  offsets = arm_get_frame_offsets ();
13880  amount = offsets->outgoing_args - offsets->saved_regs;
13881
13882  if (frame_pointer_needed)
13883    {
13884      emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
13885      amount = offsets->locals_base - offsets->saved_regs;
13886    }
13887
13888  gcc_assert (amount >= 0);
13889  if (amount)
13890    {
13891      if (amount < 512)
13892	emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
13893			       GEN_INT (amount)));
13894      else
13895	{
13896	  /* r3 is always free in the epilogue.  */
13897	  rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
13898
13899	  emit_insn (gen_movsi (reg, GEN_INT (amount)));
13900	  emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
13901	}
13902    }
13903
13904  /* Emit a USE (stack_pointer_rtx), so that
13905     the stack adjustment will not be deleted.  */
13906  emit_insn (gen_prologue_use (stack_pointer_rtx));
13907
13908  if (current_function_profile || !TARGET_SCHED_PROLOG)
13909    emit_insn (gen_blockage ());
13910
13911  /* Emit a clobber for each insn that will be restored in the epilogue,
13912     so that flow2 will get register lifetimes correct.  */
13913  for (regno = 0; regno < 13; regno++)
13914    if (regs_ever_live[regno] && !call_used_regs[regno])
13915      emit_insn (gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, regno)));
13916
13917  if (! regs_ever_live[LR_REGNUM])
13918    emit_insn (gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, LR_REGNUM)));
13919}
13920
13921static void
13922thumb_output_function_prologue (FILE *f, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
13923{
13924  unsigned long live_regs_mask = 0;
13925  unsigned long l_mask;
13926  unsigned high_regs_pushed = 0;
13927  int cfa_offset = 0;
13928  int regno;
13929
13930  if (IS_NAKED (arm_current_func_type ()))
13931    return;
13932
13933  if (is_called_in_ARM_mode (current_function_decl))
13934    {
13935      const char * name;
13936
13937      gcc_assert (GET_CODE (DECL_RTL (current_function_decl)) == MEM);
13938      gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
13939		  == SYMBOL_REF);
13940      name = XSTR  (XEXP (DECL_RTL (current_function_decl), 0), 0);
13941
13942      /* Generate code sequence to switch us into Thumb mode.  */
13943      /* The .code 32 directive has already been emitted by
13944	 ASM_DECLARE_FUNCTION_NAME.  */
13945      asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
13946      asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
13947
13948      /* Generate a label, so that the debugger will notice the
13949	 change in instruction sets.  This label is also used by
13950	 the assembler to bypass the ARM code when this function
13951	 is called from a Thumb encoded function elsewhere in the
13952	 same file.  Hence the definition of STUB_NAME here must
13953	 agree with the definition in gas/config/tc-arm.c.  */
13954
13955#define STUB_NAME ".real_start_of"
13956
13957      fprintf (f, "\t.code\t16\n");
13958#ifdef ARM_PE
13959      if (arm_dllexport_name_p (name))
13960        name = arm_strip_name_encoding (name);
13961#endif
13962      asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
13963      fprintf (f, "\t.thumb_func\n");
13964      asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
13965    }
13966
13967  if (current_function_pretend_args_size)
13968    {
13969      /* Output unwind directive for the stack adjustment.  */
13970      if (ARM_EABI_UNWIND_TABLES)
13971	fprintf (f, "\t.pad #%d\n",
13972		 current_function_pretend_args_size);
13973
13974      if (cfun->machine->uses_anonymous_args)
13975	{
13976	  int num_pushes;
13977
13978	  fprintf (f, "\tpush\t{");
13979
13980	  num_pushes = ARM_NUM_INTS (current_function_pretend_args_size);
13981
13982	  for (regno = LAST_ARG_REGNUM + 1 - num_pushes;
13983	       regno <= LAST_ARG_REGNUM;
13984	       regno++)
13985	    asm_fprintf (f, "%r%s", regno,
13986			 regno == LAST_ARG_REGNUM ? "" : ", ");
13987
13988	  fprintf (f, "}\n");
13989	}
13990      else
13991	asm_fprintf (f, "\tsub\t%r, %r, #%d\n",
13992		     SP_REGNUM, SP_REGNUM,
13993		     current_function_pretend_args_size);
13994
13995      /* We don't need to record the stores for unwinding (would it
13996	 help the debugger any if we did?), but record the change in
13997	 the stack pointer.  */
13998      if (dwarf2out_do_frame ())
13999	{
14000	  char *l = dwarf2out_cfi_label ();
14001
14002	  cfa_offset = cfa_offset + current_function_pretend_args_size;
14003	  dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
14004	}
14005    }
14006
14007  /* Get the registers we are going to push.  */
14008  live_regs_mask = thumb_compute_save_reg_mask ();
14009  /* Extract a mask of the ones we can give to the Thumb's push instruction.  */
14010  l_mask = live_regs_mask & 0x40ff;
14011  /* Then count how many other high registers will need to be pushed.  */
14012  high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
14013
14014  if (TARGET_BACKTRACE)
14015    {
14016      unsigned offset;
14017      unsigned work_register;
14018
14019      /* We have been asked to create a stack backtrace structure.
14020         The code looks like this:
14021
14022	 0   .align 2
14023	 0   func:
14024         0     sub   SP, #16         Reserve space for 4 registers.
14025	 2     push  {R7}            Push low registers.
14026         4     add   R7, SP, #20     Get the stack pointer before the push.
14027         6     str   R7, [SP, #8]    Store the stack pointer (before reserving the space).
14028         8     mov   R7, PC          Get hold of the start of this code plus 12.
14029        10     str   R7, [SP, #16]   Store it.
14030        12     mov   R7, FP          Get hold of the current frame pointer.
14031        14     str   R7, [SP, #4]    Store it.
14032        16     mov   R7, LR          Get hold of the current return address.
14033        18     str   R7, [SP, #12]   Store it.
14034        20     add   R7, SP, #16     Point at the start of the backtrace structure.
14035        22     mov   FP, R7          Put this value into the frame pointer.  */
14036
14037      work_register = thumb_find_work_register (live_regs_mask);
14038
14039      if (ARM_EABI_UNWIND_TABLES)
14040	asm_fprintf (f, "\t.pad #16\n");
14041
14042      asm_fprintf
14043	(f, "\tsub\t%r, %r, #16\t%@ Create stack backtrace structure\n",
14044	 SP_REGNUM, SP_REGNUM);
14045
14046      if (dwarf2out_do_frame ())
14047	{
14048	  char *l = dwarf2out_cfi_label ();
14049
14050	  cfa_offset = cfa_offset + 16;
14051	  dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
14052	}
14053
14054      if (l_mask)
14055	{
14056	  thumb_pushpop (f, l_mask, 1, &cfa_offset, l_mask);
14057	  offset = bit_count (l_mask) * UNITS_PER_WORD;
14058	}
14059      else
14060	offset = 0;
14061
14062      asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM,
14063		   offset + 16 + current_function_pretend_args_size);
14064
14065      asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
14066		   offset + 4);
14067
14068      /* Make sure that the instruction fetching the PC is in the right place
14069	 to calculate "start of backtrace creation code + 12".  */
14070      if (l_mask)
14071	{
14072	  asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM);
14073	  asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
14074		       offset + 12);
14075	  asm_fprintf (f, "\tmov\t%r, %r\n", work_register,
14076		       ARM_HARD_FRAME_POINTER_REGNUM);
14077	  asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
14078		       offset);
14079	}
14080      else
14081	{
14082	  asm_fprintf (f, "\tmov\t%r, %r\n", work_register,
14083		       ARM_HARD_FRAME_POINTER_REGNUM);
14084	  asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
14085		       offset);
14086	  asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM);
14087	  asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
14088		       offset + 12);
14089	}
14090
14091      asm_fprintf (f, "\tmov\t%r, %r\n", work_register, LR_REGNUM);
14092      asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
14093		   offset + 8);
14094      asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM,
14095		   offset + 12);
14096      asm_fprintf (f, "\tmov\t%r, %r\t\t%@ Backtrace structure created\n",
14097		   ARM_HARD_FRAME_POINTER_REGNUM, work_register);
14098    }
14099  /* Optimization:  If we are not pushing any low registers but we are going
14100     to push some high registers then delay our first push.  This will just
14101     be a push of LR and we can combine it with the push of the first high
14102     register.  */
14103  else if ((l_mask & 0xff) != 0
14104	   || (high_regs_pushed == 0 && l_mask))
14105    thumb_pushpop (f, l_mask, 1, &cfa_offset, l_mask);
14106
14107  if (high_regs_pushed)
14108    {
14109      unsigned pushable_regs;
14110      unsigned next_hi_reg;
14111
14112      for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
14113	if (live_regs_mask & (1 << next_hi_reg))
14114	  break;
14115
14116      pushable_regs = l_mask & 0xff;
14117
14118      if (pushable_regs == 0)
14119	pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
14120
14121      while (high_regs_pushed > 0)
14122	{
14123	  unsigned long real_regs_mask = 0;
14124
14125	  for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
14126	    {
14127	      if (pushable_regs & (1 << regno))
14128		{
14129		  asm_fprintf (f, "\tmov\t%r, %r\n", regno, next_hi_reg);
14130
14131		  high_regs_pushed --;
14132		  real_regs_mask |= (1 << next_hi_reg);
14133
14134		  if (high_regs_pushed)
14135		    {
14136		      for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
14137			   next_hi_reg --)
14138			if (live_regs_mask & (1 << next_hi_reg))
14139			  break;
14140		    }
14141		  else
14142		    {
14143		      pushable_regs &= ~((1 << regno) - 1);
14144		      break;
14145		    }
14146		}
14147	    }
14148
14149	  /* If we had to find a work register and we have not yet
14150	     saved the LR then add it to the list of regs to push.  */
14151	  if (l_mask == (1 << LR_REGNUM))
14152	    {
14153	      thumb_pushpop (f, pushable_regs | (1 << LR_REGNUM),
14154			     1, &cfa_offset,
14155			     real_regs_mask | (1 << LR_REGNUM));
14156	      l_mask = 0;
14157	    }
14158	  else
14159	    thumb_pushpop (f, pushable_regs, 1, &cfa_offset, real_regs_mask);
14160	}
14161    }
14162}
14163
14164/* Handle the case of a double word load into a low register from
14165   a computed memory address.  The computed address may involve a
14166   register which is overwritten by the load.  */
14167const char *
14168thumb_load_double_from_address (rtx *operands)
14169{
14170  rtx addr;
14171  rtx base;
14172  rtx offset;
14173  rtx arg1;
14174  rtx arg2;
14175
14176  gcc_assert (GET_CODE (operands[0]) == REG);
14177  gcc_assert (GET_CODE (operands[1]) == MEM);
14178
14179  /* Get the memory address.  */
14180  addr = XEXP (operands[1], 0);
14181
14182  /* Work out how the memory address is computed.  */
14183  switch (GET_CODE (addr))
14184    {
14185    case REG:
14186      operands[2] = adjust_address (operands[1], SImode, 4);
14187
14188      if (REGNO (operands[0]) == REGNO (addr))
14189	{
14190	  output_asm_insn ("ldr\t%H0, %2", operands);
14191	  output_asm_insn ("ldr\t%0, %1", operands);
14192	}
14193      else
14194	{
14195	  output_asm_insn ("ldr\t%0, %1", operands);
14196	  output_asm_insn ("ldr\t%H0, %2", operands);
14197	}
14198      break;
14199
14200    case CONST:
14201      /* Compute <address> + 4 for the high order load.  */
14202      operands[2] = adjust_address (operands[1], SImode, 4);
14203
14204      output_asm_insn ("ldr\t%0, %1", operands);
14205      output_asm_insn ("ldr\t%H0, %2", operands);
14206      break;
14207
14208    case PLUS:
14209      arg1   = XEXP (addr, 0);
14210      arg2   = XEXP (addr, 1);
14211
14212      if (CONSTANT_P (arg1))
14213	base = arg2, offset = arg1;
14214      else
14215	base = arg1, offset = arg2;
14216
14217      gcc_assert (GET_CODE (base) == REG);
14218
14219      /* Catch the case of <address> = <reg> + <reg> */
14220      if (GET_CODE (offset) == REG)
14221	{
14222	  int reg_offset = REGNO (offset);
14223	  int reg_base   = REGNO (base);
14224	  int reg_dest   = REGNO (operands[0]);
14225
14226	  /* Add the base and offset registers together into the
14227             higher destination register.  */
14228	  asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
14229		       reg_dest + 1, reg_base, reg_offset);
14230
14231	  /* Load the lower destination register from the address in
14232             the higher destination register.  */
14233	  asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
14234		       reg_dest, reg_dest + 1);
14235
14236	  /* Load the higher destination register from its own address
14237             plus 4.  */
14238	  asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
14239		       reg_dest + 1, reg_dest + 1);
14240	}
14241      else
14242	{
14243	  /* Compute <address> + 4 for the high order load.  */
14244	  operands[2] = adjust_address (operands[1], SImode, 4);
14245
14246	  /* If the computed address is held in the low order register
14247	     then load the high order register first, otherwise always
14248	     load the low order register first.  */
14249	  if (REGNO (operands[0]) == REGNO (base))
14250	    {
14251	      output_asm_insn ("ldr\t%H0, %2", operands);
14252	      output_asm_insn ("ldr\t%0, %1", operands);
14253	    }
14254	  else
14255	    {
14256	      output_asm_insn ("ldr\t%0, %1", operands);
14257	      output_asm_insn ("ldr\t%H0, %2", operands);
14258	    }
14259	}
14260      break;
14261
14262    case LABEL_REF:
14263      /* With no registers to worry about we can just load the value
14264         directly.  */
14265      operands[2] = adjust_address (operands[1], SImode, 4);
14266
14267      output_asm_insn ("ldr\t%H0, %2", operands);
14268      output_asm_insn ("ldr\t%0, %1", operands);
14269      break;
14270
14271    default:
14272      gcc_unreachable ();
14273    }
14274
14275  return "";
14276}
14277
14278const char *
14279thumb_output_move_mem_multiple (int n, rtx *operands)
14280{
14281  rtx tmp;
14282
14283  switch (n)
14284    {
14285    case 2:
14286      if (REGNO (operands[4]) > REGNO (operands[5]))
14287	{
14288	  tmp = operands[4];
14289	  operands[4] = operands[5];
14290	  operands[5] = tmp;
14291	}
14292      output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
14293      output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
14294      break;
14295
14296    case 3:
14297      if (REGNO (operands[4]) > REGNO (operands[5]))
14298	{
14299	  tmp = operands[4];
14300	  operands[4] = operands[5];
14301	  operands[5] = tmp;
14302	}
14303      if (REGNO (operands[5]) > REGNO (operands[6]))
14304	{
14305	  tmp = operands[5];
14306	  operands[5] = operands[6];
14307	  operands[6] = tmp;
14308	}
14309      if (REGNO (operands[4]) > REGNO (operands[5]))
14310	{
14311	  tmp = operands[4];
14312	  operands[4] = operands[5];
14313	  operands[5] = tmp;
14314	}
14315
14316      output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
14317      output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
14318      break;
14319
14320    default:
14321      gcc_unreachable ();
14322    }
14323
14324  return "";
14325}
14326
14327/* Output a call-via instruction for thumb state.  */
14328const char *
14329thumb_call_via_reg (rtx reg)
14330{
14331  int regno = REGNO (reg);
14332  rtx *labelp;
14333
14334  gcc_assert (regno < LR_REGNUM);
14335
14336  /* If we are in the normal text section we can use a single instance
14337     per compilation unit.  If we are doing function sections, then we need
14338     an entry per section, since we can't rely on reachability.  */
14339  if (in_section == text_section)
14340    {
14341      thumb_call_reg_needed = 1;
14342
14343      if (thumb_call_via_label[regno] == NULL)
14344	thumb_call_via_label[regno] = gen_label_rtx ();
14345      labelp = thumb_call_via_label + regno;
14346    }
14347  else
14348    {
14349      if (cfun->machine->call_via[regno] == NULL)
14350	cfun->machine->call_via[regno] = gen_label_rtx ();
14351      labelp = cfun->machine->call_via + regno;
14352    }
14353
14354  output_asm_insn ("bl\t%a0", labelp);
14355  return "";
14356}
14357
14358/* Routines for generating rtl.  */
14359void
14360thumb_expand_movmemqi (rtx *operands)
14361{
14362  rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
14363  rtx in  = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
14364  HOST_WIDE_INT len = INTVAL (operands[2]);
14365  HOST_WIDE_INT offset = 0;
14366
14367  while (len >= 12)
14368    {
14369      emit_insn (gen_movmem12b (out, in, out, in));
14370      len -= 12;
14371    }
14372
14373  if (len >= 8)
14374    {
14375      emit_insn (gen_movmem8b (out, in, out, in));
14376      len -= 8;
14377    }
14378
14379  if (len >= 4)
14380    {
14381      rtx reg = gen_reg_rtx (SImode);
14382      emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
14383      emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
14384      len -= 4;
14385      offset += 4;
14386    }
14387
14388  if (len >= 2)
14389    {
14390      rtx reg = gen_reg_rtx (HImode);
14391      emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
14392					      plus_constant (in, offset))));
14393      emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (out, offset)),
14394			    reg));
14395      len -= 2;
14396      offset += 2;
14397    }
14398
14399  if (len)
14400    {
14401      rtx reg = gen_reg_rtx (QImode);
14402      emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
14403					      plus_constant (in, offset))));
14404      emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (out, offset)),
14405			    reg));
14406    }
14407}
14408
14409void
14410thumb_reload_out_hi (rtx *operands)
14411{
14412  emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
14413}
14414
14415/* Handle reading a half-word from memory during reload.  */
14416void
14417thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
14418{
14419  gcc_unreachable ();
14420}
14421
14422/* Return the length of a function name prefix
14423    that starts with the character 'c'.  */
14424static int
14425arm_get_strip_length (int c)
14426{
14427  switch (c)
14428    {
14429    ARM_NAME_ENCODING_LENGTHS
14430      default: return 0;
14431    }
14432}
14433
14434/* Return a pointer to a function's name with any
14435   and all prefix encodings stripped from it.  */
14436const char *
14437arm_strip_name_encoding (const char *name)
14438{
14439  int skip;
14440
14441  while ((skip = arm_get_strip_length (* name)))
14442    name += skip;
14443
14444  return name;
14445}
14446
14447/* If there is a '*' anywhere in the name's prefix, then
14448   emit the stripped name verbatim, otherwise prepend an
14449   underscore if leading underscores are being used.  */
14450void
14451arm_asm_output_labelref (FILE *stream, const char *name)
14452{
14453  int skip;
14454  int verbatim = 0;
14455
14456  while ((skip = arm_get_strip_length (* name)))
14457    {
14458      verbatim |= (*name == '*');
14459      name += skip;
14460    }
14461
14462  if (verbatim)
14463    fputs (name, stream);
14464  else
14465    asm_fprintf (stream, "%U%s", name);
14466}
14467
14468static void
14469arm_file_start (void)
14470{
14471  int val;
14472
14473  if (TARGET_BPABI)
14474    {
14475      const char *fpu_name;
14476      if (arm_select[0].string)
14477	asm_fprintf (asm_out_file, "\t.cpu %s\n", arm_select[0].string);
14478      else if (arm_select[1].string)
14479	asm_fprintf (asm_out_file, "\t.arch %s\n", arm_select[1].string);
14480      else
14481	asm_fprintf (asm_out_file, "\t.cpu %s\n",
14482		     all_cores[arm_default_cpu].name);
14483
14484      if (TARGET_SOFT_FLOAT)
14485	{
14486	  if (TARGET_VFP)
14487	    fpu_name = "softvfp";
14488	  else
14489	    fpu_name = "softfpa";
14490	}
14491      else
14492	{
14493	  switch (arm_fpu_arch)
14494	    {
14495	    case FPUTYPE_FPA:
14496	      fpu_name = "fpa";
14497	      break;
14498	    case FPUTYPE_FPA_EMU2:
14499	      fpu_name = "fpe2";
14500	      break;
14501	    case FPUTYPE_FPA_EMU3:
14502	      fpu_name = "fpe3";
14503	      break;
14504	    case FPUTYPE_MAVERICK:
14505	      fpu_name = "maverick";
14506	      break;
14507	    case FPUTYPE_VFP:
14508	      if (TARGET_HARD_FLOAT)
14509		asm_fprintf (asm_out_file, "\t.eabi_attribute 27, 3\n");
14510	      if (TARGET_HARD_FLOAT_ABI)
14511		asm_fprintf (asm_out_file, "\t.eabi_attribute 28, 1\n");
14512	      fpu_name = "vfp";
14513	      break;
14514	    default:
14515	      abort();
14516	    }
14517	}
14518      asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
14519
14520      /* Some of these attributes only apply when the corresponding features
14521         are used.  However we don't have any easy way of figuring this out.
14522	 Conservatively record the setting that would have been used.  */
14523
14524      /* Tag_ABI_PCS_wchar_t.  */
14525      asm_fprintf (asm_out_file, "\t.eabi_attribute 18, %d\n",
14526		   (int)WCHAR_TYPE_SIZE / BITS_PER_UNIT);
14527
14528      /* Tag_ABI_FP_rounding.  */
14529      if (flag_rounding_math)
14530	asm_fprintf (asm_out_file, "\t.eabi_attribute 19, 1\n");
14531      if (!flag_unsafe_math_optimizations)
14532	{
14533	  /* Tag_ABI_FP_denomal.  */
14534	  asm_fprintf (asm_out_file, "\t.eabi_attribute 20, 1\n");
14535	  /* Tag_ABI_FP_exceptions.  */
14536	  asm_fprintf (asm_out_file, "\t.eabi_attribute 21, 1\n");
14537	}
14538      /* Tag_ABI_FP_user_exceptions.  */
14539      if (flag_signaling_nans)
14540	asm_fprintf (asm_out_file, "\t.eabi_attribute 22, 1\n");
14541      /* Tag_ABI_FP_number_model.  */
14542      asm_fprintf (asm_out_file, "\t.eabi_attribute 23, %d\n",
14543		   flag_finite_math_only ? 1 : 3);
14544
14545      /* Tag_ABI_align8_needed.  */
14546      asm_fprintf (asm_out_file, "\t.eabi_attribute 24, 1\n");
14547      /* Tag_ABI_align8_preserved.  */
14548      asm_fprintf (asm_out_file, "\t.eabi_attribute 25, 1\n");
14549      /* Tag_ABI_enum_size.  */
14550      asm_fprintf (asm_out_file, "\t.eabi_attribute 26, %d\n",
14551		   flag_short_enums ? 1 : 2);
14552
14553      /* Tag_ABI_optimization_goals.  */
14554      if (optimize_size)
14555	val = 4;
14556      else if (optimize >= 2)
14557	val = 2;
14558      else if (optimize)
14559	val = 1;
14560      else
14561	val = 6;
14562      asm_fprintf (asm_out_file, "\t.eabi_attribute 30, %d\n", val);
14563    }
14564  default_file_start();
14565}
14566
14567static void
14568arm_file_end (void)
14569{
14570  int regno;
14571
14572  if (! thumb_call_reg_needed)
14573    return;
14574
14575  switch_to_section (text_section);
14576  asm_fprintf (asm_out_file, "\t.code 16\n");
14577  ASM_OUTPUT_ALIGN (asm_out_file, 1);
14578
14579  for (regno = 0; regno < LR_REGNUM; regno++)
14580    {
14581      rtx label = thumb_call_via_label[regno];
14582
14583      if (label != 0)
14584	{
14585	  targetm.asm_out.internal_label (asm_out_file, "L",
14586					  CODE_LABEL_NUMBER (label));
14587	  asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
14588	}
14589    }
14590}
14591
14592rtx aof_pic_label;
14593
14594#ifdef AOF_ASSEMBLER
14595/* Special functions only needed when producing AOF syntax assembler.  */
14596
14597struct pic_chain
14598{
14599  struct pic_chain * next;
14600  const char * symname;
14601};
14602
14603static struct pic_chain * aof_pic_chain = NULL;
14604
14605rtx
14606aof_pic_entry (rtx x)
14607{
14608  struct pic_chain ** chainp;
14609  int offset;
14610
14611  if (aof_pic_label == NULL_RTX)
14612    {
14613      aof_pic_label = gen_rtx_SYMBOL_REF (Pmode, "x$adcons");
14614    }
14615
14616  for (offset = 0, chainp = &aof_pic_chain; *chainp;
14617       offset += 4, chainp = &(*chainp)->next)
14618    if ((*chainp)->symname == XSTR (x, 0))
14619      return plus_constant (aof_pic_label, offset);
14620
14621  *chainp = (struct pic_chain *) xmalloc (sizeof (struct pic_chain));
14622  (*chainp)->next = NULL;
14623  (*chainp)->symname = XSTR (x, 0);
14624  return plus_constant (aof_pic_label, offset);
14625}
14626
14627void
14628aof_dump_pic_table (FILE *f)
14629{
14630  struct pic_chain * chain;
14631
14632  if (aof_pic_chain == NULL)
14633    return;
14634
14635  asm_fprintf (f, "\tAREA |%r$$adcons|, BASED %r\n",
14636	       PIC_OFFSET_TABLE_REGNUM,
14637	       PIC_OFFSET_TABLE_REGNUM);
14638  fputs ("|x$adcons|\n", f);
14639
14640  for (chain = aof_pic_chain; chain; chain = chain->next)
14641    {
14642      fputs ("\tDCD\t", f);
14643      assemble_name (f, chain->symname);
14644      fputs ("\n", f);
14645    }
14646}
14647
14648int arm_text_section_count = 1;
14649
14650/* A get_unnamed_section callback for switching to the text section.  */
14651
14652static void
14653aof_output_text_section_asm_op (const void *data ATTRIBUTE_UNUSED)
14654{
14655  fprintf (asm_out_file, "\tAREA |C$$code%d|, CODE, READONLY",
14656	   arm_text_section_count++);
14657  if (flag_pic)
14658    fprintf (asm_out_file, ", PIC, REENTRANT");
14659  fprintf (asm_out_file, "\n");
14660}
14661
14662static int arm_data_section_count = 1;
14663
14664/* A get_unnamed_section callback for switching to the data section.  */
14665
14666static void
14667aof_output_data_section_asm_op (const void *data ATTRIBUTE_UNUSED)
14668{
14669  fprintf (asm_out_file, "\tAREA |C$$data%d|, DATA\n",
14670	   arm_data_section_count++);
14671}
14672
14673/* Implement TARGET_ASM_INIT_SECTIONS.
14674
14675   AOF Assembler syntax is a nightmare when it comes to areas, since once
14676   we change from one area to another, we can't go back again.  Instead,
14677   we must create a new area with the same attributes and add the new output
14678   to that.  Unfortunately, there is nothing we can do here to guarantee that
14679   two areas with the same attributes will be linked adjacently in the
14680   resulting executable, so we have to be careful not to do pc-relative
14681   addressing across such boundaries.  */
14682
14683static void
14684aof_asm_init_sections (void)
14685{
14686  text_section = get_unnamed_section (SECTION_CODE,
14687				      aof_output_text_section_asm_op, NULL);
14688  data_section = get_unnamed_section (SECTION_WRITE,
14689				      aof_output_data_section_asm_op, NULL);
14690  readonly_data_section = text_section;
14691}
14692
14693void
14694zero_init_section (void)
14695{
14696  static int zero_init_count = 1;
14697
14698  fprintf (asm_out_file, "\tAREA |C$$zidata%d|,NOINIT\n", zero_init_count++);
14699  in_section = NULL;
14700}
14701
14702/* The AOF assembler is religiously strict about declarations of
14703   imported and exported symbols, so that it is impossible to declare
14704   a function as imported near the beginning of the file, and then to
14705   export it later on.  It is, however, possible to delay the decision
14706   until all the functions in the file have been compiled.  To get
14707   around this, we maintain a list of the imports and exports, and
14708   delete from it any that are subsequently defined.  At the end of
14709   compilation we spit the remainder of the list out before the END
14710   directive.  */
14711
14712struct import
14713{
14714  struct import * next;
14715  const char * name;
14716};
14717
14718static struct import * imports_list = NULL;
14719
14720void
14721aof_add_import (const char *name)
14722{
14723  struct import * new;
14724
14725  for (new = imports_list; new; new = new->next)
14726    if (new->name == name)
14727      return;
14728
14729  new = (struct import *) xmalloc (sizeof (struct import));
14730  new->next = imports_list;
14731  imports_list = new;
14732  new->name = name;
14733}
14734
14735void
14736aof_delete_import (const char *name)
14737{
14738  struct import ** old;
14739
14740  for (old = &imports_list; *old; old = & (*old)->next)
14741    {
14742      if ((*old)->name == name)
14743	{
14744	  *old = (*old)->next;
14745	  return;
14746	}
14747    }
14748}
14749
14750int arm_main_function = 0;
14751
14752static void
14753aof_dump_imports (FILE *f)
14754{
14755  /* The AOF assembler needs this to cause the startup code to be extracted
14756     from the library.  Brining in __main causes the whole thing to work
14757     automagically.  */
14758  if (arm_main_function)
14759    {
14760      switch_to_section (text_section);
14761      fputs ("\tIMPORT __main\n", f);
14762      fputs ("\tDCD __main\n", f);
14763    }
14764
14765  /* Now dump the remaining imports.  */
14766  while (imports_list)
14767    {
14768      fprintf (f, "\tIMPORT\t");
14769      assemble_name (f, imports_list->name);
14770      fputc ('\n', f);
14771      imports_list = imports_list->next;
14772    }
14773}
14774
14775static void
14776aof_globalize_label (FILE *stream, const char *name)
14777{
14778  default_globalize_label (stream, name);
14779  if (! strcmp (name, "main"))
14780    arm_main_function = 1;
14781}
14782
14783static void
14784aof_file_start (void)
14785{
14786  fputs ("__r0\tRN\t0\n", asm_out_file);
14787  fputs ("__a1\tRN\t0\n", asm_out_file);
14788  fputs ("__a2\tRN\t1\n", asm_out_file);
14789  fputs ("__a3\tRN\t2\n", asm_out_file);
14790  fputs ("__a4\tRN\t3\n", asm_out_file);
14791  fputs ("__v1\tRN\t4\n", asm_out_file);
14792  fputs ("__v2\tRN\t5\n", asm_out_file);
14793  fputs ("__v3\tRN\t6\n", asm_out_file);
14794  fputs ("__v4\tRN\t7\n", asm_out_file);
14795  fputs ("__v5\tRN\t8\n", asm_out_file);
14796  fputs ("__v6\tRN\t9\n", asm_out_file);
14797  fputs ("__sl\tRN\t10\n", asm_out_file);
14798  fputs ("__fp\tRN\t11\n", asm_out_file);
14799  fputs ("__ip\tRN\t12\n", asm_out_file);
14800  fputs ("__sp\tRN\t13\n", asm_out_file);
14801  fputs ("__lr\tRN\t14\n", asm_out_file);
14802  fputs ("__pc\tRN\t15\n", asm_out_file);
14803  fputs ("__f0\tFN\t0\n", asm_out_file);
14804  fputs ("__f1\tFN\t1\n", asm_out_file);
14805  fputs ("__f2\tFN\t2\n", asm_out_file);
14806  fputs ("__f3\tFN\t3\n", asm_out_file);
14807  fputs ("__f4\tFN\t4\n", asm_out_file);
14808  fputs ("__f5\tFN\t5\n", asm_out_file);
14809  fputs ("__f6\tFN\t6\n", asm_out_file);
14810  fputs ("__f7\tFN\t7\n", asm_out_file);
14811  switch_to_section (text_section);
14812}
14813
14814static void
14815aof_file_end (void)
14816{
14817  if (flag_pic)
14818    aof_dump_pic_table (asm_out_file);
14819  arm_file_end ();
14820  aof_dump_imports (asm_out_file);
14821  fputs ("\tEND\n", asm_out_file);
14822}
14823#endif /* AOF_ASSEMBLER */
14824
14825#ifndef ARM_PE
14826/* Symbols in the text segment can be accessed without indirecting via the
14827   constant pool; it may take an extra binary operation, but this is still
14828   faster than indirecting via memory.  Don't do this when not optimizing,
14829   since we won't be calculating al of the offsets necessary to do this
14830   simplification.  */
14831
14832static void
14833arm_encode_section_info (tree decl, rtx rtl, int first)
14834{
14835  /* This doesn't work with AOF syntax, since the string table may be in
14836     a different AREA.  */
14837#ifndef AOF_ASSEMBLER
14838  if (optimize > 0 && TREE_CONSTANT (decl))
14839    SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
14840#endif
14841
14842  /* If we are referencing a function that is weak then encode a long call
14843     flag in the function name, otherwise if the function is static or
14844     or known to be defined in this file then encode a short call flag.  */
14845  if (first && DECL_P (decl))
14846    {
14847      if (TREE_CODE (decl) == FUNCTION_DECL && DECL_WEAK (decl))
14848        arm_encode_call_attribute (decl, LONG_CALL_FLAG_CHAR);
14849      else if (! TREE_PUBLIC (decl))
14850        arm_encode_call_attribute (decl, SHORT_CALL_FLAG_CHAR);
14851    }
14852
14853  default_encode_section_info (decl, rtl, first);
14854}
14855#endif /* !ARM_PE */
14856
14857static void
14858arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
14859{
14860  if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
14861      && !strcmp (prefix, "L"))
14862    {
14863      arm_ccfsm_state = 0;
14864      arm_target_insn = NULL;
14865    }
14866  default_internal_label (stream, prefix, labelno);
14867}
14868
14869/* Output code to add DELTA to the first argument, and then jump
14870   to FUNCTION.  Used for C++ multiple inheritance.  */
14871static void
14872arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
14873		     HOST_WIDE_INT delta,
14874		     HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
14875		     tree function)
14876{
14877  static int thunk_label = 0;
14878  char label[256];
14879  char labelpc[256];
14880  int mi_delta = delta;
14881  const char *const mi_op = mi_delta < 0 ? "sub" : "add";
14882  int shift = 0;
14883  int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
14884                    ? 1 : 0);
14885  if (mi_delta < 0)
14886    mi_delta = - mi_delta;
14887  if (TARGET_THUMB)
14888    {
14889      int labelno = thunk_label++;
14890      ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
14891      fputs ("\tldr\tr12, ", file);
14892      assemble_name (file, label);
14893      fputc ('\n', file);
14894      if (flag_pic)
14895	{
14896	  /* If we are generating PIC, the ldr instruction below loads
14897	     "(target - 7) - .LTHUNKPCn" into r12.  The pc reads as
14898	     the address of the add + 8, so we have:
14899
14900	     r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
14901	         = target + 1.
14902
14903	     Note that we have "+ 1" because some versions of GNU ld
14904	     don't set the low bit of the result for R_ARM_REL32
14905	     relocations against thumb function symbols.  */
14906	  ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
14907	  assemble_name (file, labelpc);
14908	  fputs (":\n", file);
14909	  fputs ("\tadd\tr12, pc, r12\n", file);
14910	}
14911    }
14912  while (mi_delta != 0)
14913    {
14914      if ((mi_delta & (3 << shift)) == 0)
14915        shift += 2;
14916      else
14917        {
14918          asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
14919                       mi_op, this_regno, this_regno,
14920                       mi_delta & (0xff << shift));
14921          mi_delta &= ~(0xff << shift);
14922          shift += 8;
14923        }
14924    }
14925  if (TARGET_THUMB)
14926    {
14927      fprintf (file, "\tbx\tr12\n");
14928      ASM_OUTPUT_ALIGN (file, 2);
14929      assemble_name (file, label);
14930      fputs (":\n", file);
14931      if (flag_pic)
14932	{
14933	  /* Output ".word .LTHUNKn-7-.LTHUNKPCn".  */
14934	  rtx tem = XEXP (DECL_RTL (function), 0);
14935	  tem = gen_rtx_PLUS (GET_MODE (tem), tem, GEN_INT (-7));
14936	  tem = gen_rtx_MINUS (GET_MODE (tem),
14937			       tem,
14938			       gen_rtx_SYMBOL_REF (Pmode,
14939						   ggc_strdup (labelpc)));
14940	  assemble_integer (tem, 4, BITS_PER_WORD, 1);
14941	}
14942      else
14943	/* Output ".word .LTHUNKn".  */
14944	assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
14945    }
14946  else
14947    {
14948      fputs ("\tb\t", file);
14949      assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
14950      if (NEED_PLT_RELOC)
14951        fputs ("(PLT)", file);
14952      fputc ('\n', file);
14953    }
14954}
14955
14956int
14957arm_emit_vector_const (FILE *file, rtx x)
14958{
14959  int i;
14960  const char * pattern;
14961
14962  gcc_assert (GET_CODE (x) == CONST_VECTOR);
14963
14964  switch (GET_MODE (x))
14965    {
14966    case V2SImode: pattern = "%08x"; break;
14967    case V4HImode: pattern = "%04x"; break;
14968    case V8QImode: pattern = "%02x"; break;
14969    default:       gcc_unreachable ();
14970    }
14971
14972  fprintf (file, "0x");
14973  for (i = CONST_VECTOR_NUNITS (x); i--;)
14974    {
14975      rtx element;
14976
14977      element = CONST_VECTOR_ELT (x, i);
14978      fprintf (file, pattern, INTVAL (element));
14979    }
14980
14981  return 1;
14982}
14983
14984const char *
14985arm_output_load_gr (rtx *operands)
14986{
14987  rtx reg;
14988  rtx offset;
14989  rtx wcgr;
14990  rtx sum;
14991
14992  if (GET_CODE (operands [1]) != MEM
14993      || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
14994      || GET_CODE (reg = XEXP (sum, 0)) != REG
14995      || GET_CODE (offset = XEXP (sum, 1)) != CONST_INT
14996      || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
14997    return "wldrw%?\t%0, %1";
14998
14999  /* Fix up an out-of-range load of a GR register.  */
15000  output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
15001  wcgr = operands[0];
15002  operands[0] = reg;
15003  output_asm_insn ("ldr%?\t%0, %1", operands);
15004
15005  operands[0] = wcgr;
15006  operands[1] = reg;
15007  output_asm_insn ("tmcr%?\t%0, %1", operands);
15008  output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
15009
15010  return "";
15011}
15012
15013/* Worker function for TARGET_SETUP_INCOMING_VARARGS.
15014
15015   On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
15016   named arg and all anonymous args onto the stack.
15017   XXX I know the prologue shouldn't be pushing registers, but it is faster
15018   that way.  */
15019
15020static void
15021arm_setup_incoming_varargs (CUMULATIVE_ARGS *cum,
15022			    enum machine_mode mode ATTRIBUTE_UNUSED,
15023			    tree type ATTRIBUTE_UNUSED,
15024			    int *pretend_size,
15025			    int second_time ATTRIBUTE_UNUSED)
15026{
15027  cfun->machine->uses_anonymous_args = 1;
15028  if (cum->nregs < NUM_ARG_REGS)
15029    *pretend_size = (NUM_ARG_REGS - cum->nregs) * UNITS_PER_WORD;
15030}
15031
15032/* Return nonzero if the CONSUMER instruction (a store) does not need
15033   PRODUCER's value to calculate the address.  */
15034
15035int
15036arm_no_early_store_addr_dep (rtx producer, rtx consumer)
15037{
15038  rtx value = PATTERN (producer);
15039  rtx addr = PATTERN (consumer);
15040
15041  if (GET_CODE (value) == COND_EXEC)
15042    value = COND_EXEC_CODE (value);
15043  if (GET_CODE (value) == PARALLEL)
15044    value = XVECEXP (value, 0, 0);
15045  value = XEXP (value, 0);
15046  if (GET_CODE (addr) == COND_EXEC)
15047    addr = COND_EXEC_CODE (addr);
15048  if (GET_CODE (addr) == PARALLEL)
15049    addr = XVECEXP (addr, 0, 0);
15050  addr = XEXP (addr, 0);
15051
15052  return !reg_overlap_mentioned_p (value, addr);
15053}
15054
15055/* Return nonzero if the CONSUMER instruction (an ALU op) does not
15056   have an early register shift value or amount dependency on the
15057   result of PRODUCER.  */
15058
15059int
15060arm_no_early_alu_shift_dep (rtx producer, rtx consumer)
15061{
15062  rtx value = PATTERN (producer);
15063  rtx op = PATTERN (consumer);
15064  rtx early_op;
15065
15066  if (GET_CODE (value) == COND_EXEC)
15067    value = COND_EXEC_CODE (value);
15068  if (GET_CODE (value) == PARALLEL)
15069    value = XVECEXP (value, 0, 0);
15070  value = XEXP (value, 0);
15071  if (GET_CODE (op) == COND_EXEC)
15072    op = COND_EXEC_CODE (op);
15073  if (GET_CODE (op) == PARALLEL)
15074    op = XVECEXP (op, 0, 0);
15075  op = XEXP (op, 1);
15076
15077  early_op = XEXP (op, 0);
15078  /* This is either an actual independent shift, or a shift applied to
15079     the first operand of another operation.  We want the whole shift
15080     operation.  */
15081  if (GET_CODE (early_op) == REG)
15082    early_op = op;
15083
15084  return !reg_overlap_mentioned_p (value, early_op);
15085}
15086
15087/* Return nonzero if the CONSUMER instruction (an ALU op) does not
15088   have an early register shift value dependency on the result of
15089   PRODUCER.  */
15090
15091int
15092arm_no_early_alu_shift_value_dep (rtx producer, rtx consumer)
15093{
15094  rtx value = PATTERN (producer);
15095  rtx op = PATTERN (consumer);
15096  rtx early_op;
15097
15098  if (GET_CODE (value) == COND_EXEC)
15099    value = COND_EXEC_CODE (value);
15100  if (GET_CODE (value) == PARALLEL)
15101    value = XVECEXP (value, 0, 0);
15102  value = XEXP (value, 0);
15103  if (GET_CODE (op) == COND_EXEC)
15104    op = COND_EXEC_CODE (op);
15105  if (GET_CODE (op) == PARALLEL)
15106    op = XVECEXP (op, 0, 0);
15107  op = XEXP (op, 1);
15108
15109  early_op = XEXP (op, 0);
15110
15111  /* This is either an actual independent shift, or a shift applied to
15112     the first operand of another operation.  We want the value being
15113     shifted, in either case.  */
15114  if (GET_CODE (early_op) != REG)
15115    early_op = XEXP (early_op, 0);
15116
15117  return !reg_overlap_mentioned_p (value, early_op);
15118}
15119
15120/* Return nonzero if the CONSUMER (a mul or mac op) does not
15121   have an early register mult dependency on the result of
15122   PRODUCER.  */
15123
15124int
15125arm_no_early_mul_dep (rtx producer, rtx consumer)
15126{
15127  rtx value = PATTERN (producer);
15128  rtx op = PATTERN (consumer);
15129
15130  if (GET_CODE (value) == COND_EXEC)
15131    value = COND_EXEC_CODE (value);
15132  if (GET_CODE (value) == PARALLEL)
15133    value = XVECEXP (value, 0, 0);
15134  value = XEXP (value, 0);
15135  if (GET_CODE (op) == COND_EXEC)
15136    op = COND_EXEC_CODE (op);
15137  if (GET_CODE (op) == PARALLEL)
15138    op = XVECEXP (op, 0, 0);
15139  op = XEXP (op, 1);
15140
15141  return (GET_CODE (op) == PLUS
15142	  && !reg_overlap_mentioned_p (value, XEXP (op, 0)));
15143}
15144
15145
15146/* We can't rely on the caller doing the proper promotion when
15147   using APCS or ATPCS.  */
15148
15149static bool
15150arm_promote_prototypes (tree t ATTRIBUTE_UNUSED)
15151{
15152    return !TARGET_AAPCS_BASED;
15153}
15154
15155
15156/* AAPCS based ABIs use short enums by default.  */
15157
15158static bool
15159arm_default_short_enums (void)
15160{
15161  return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
15162}
15163
15164
15165/* AAPCS requires that anonymous bitfields affect structure alignment.  */
15166
15167static bool
15168arm_align_anon_bitfield (void)
15169{
15170  return TARGET_AAPCS_BASED;
15171}
15172
15173
15174/* The generic C++ ABI says 64-bit (long long).  The EABI says 32-bit.  */
15175
15176static tree
15177arm_cxx_guard_type (void)
15178{
15179  return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
15180}
15181
15182
15183/* The EABI says test the least significant bit of a guard variable.  */
15184
15185static bool
15186arm_cxx_guard_mask_bit (void)
15187{
15188  return TARGET_AAPCS_BASED;
15189}
15190
15191
15192/* The EABI specifies that all array cookies are 8 bytes long.  */
15193
15194static tree
15195arm_get_cookie_size (tree type)
15196{
15197  tree size;
15198
15199  if (!TARGET_AAPCS_BASED)
15200    return default_cxx_get_cookie_size (type);
15201
15202  size = build_int_cst (sizetype, 8);
15203  return size;
15204}
15205
15206
15207/* The EABI says that array cookies should also contain the element size.  */
15208
15209static bool
15210arm_cookie_has_size (void)
15211{
15212  return TARGET_AAPCS_BASED;
15213}
15214
15215
15216/* The EABI says constructors and destructors should return a pointer to
15217   the object constructed/destroyed.  */
15218
15219static bool
15220arm_cxx_cdtor_returns_this (void)
15221{
15222  return TARGET_AAPCS_BASED;
15223}
15224
15225/* The EABI says that an inline function may never be the key
15226   method.  */
15227
15228static bool
15229arm_cxx_key_method_may_be_inline (void)
15230{
15231  return !TARGET_AAPCS_BASED;
15232}
15233
15234static void
15235arm_cxx_determine_class_data_visibility (tree decl)
15236{
15237  if (!TARGET_AAPCS_BASED)
15238    return;
15239
15240  /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
15241     is exported.  However, on systems without dynamic vague linkage,
15242     \S 3.2.5.6 says that COMDAT class data has hidden linkage.  */
15243  if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
15244    DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
15245  else
15246    DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
15247  DECL_VISIBILITY_SPECIFIED (decl) = 1;
15248}
15249
15250static bool
15251arm_cxx_class_data_always_comdat (void)
15252{
15253  /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
15254     vague linkage if the class has no key function.  */
15255  return !TARGET_AAPCS_BASED;
15256}
15257
15258
15259/* The EABI says __aeabi_atexit should be used to register static
15260   destructors.  */
15261
15262static bool
15263arm_cxx_use_aeabi_atexit (void)
15264{
15265  return TARGET_AAPCS_BASED;
15266}
15267
15268
15269void
15270arm_set_return_address (rtx source, rtx scratch)
15271{
15272  arm_stack_offsets *offsets;
15273  HOST_WIDE_INT delta;
15274  rtx addr;
15275  unsigned long saved_regs;
15276
15277  saved_regs = arm_compute_save_reg_mask ();
15278
15279  if ((saved_regs & (1 << LR_REGNUM)) == 0)
15280    emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
15281  else
15282    {
15283      if (frame_pointer_needed)
15284	addr = plus_constant(hard_frame_pointer_rtx, -4);
15285      else
15286	{
15287	  /* LR will be the first saved register.  */
15288	  offsets = arm_get_frame_offsets ();
15289	  delta = offsets->outgoing_args - (offsets->frame + 4);
15290
15291
15292	  if (delta >= 4096)
15293	    {
15294	      emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
15295				     GEN_INT (delta & ~4095)));
15296	      addr = scratch;
15297	      delta &= 4095;
15298	    }
15299	  else
15300	    addr = stack_pointer_rtx;
15301
15302	  addr = plus_constant (addr, delta);
15303	}
15304      emit_move_insn (gen_frame_mem (Pmode, addr), source);
15305    }
15306}
15307
15308
15309void
15310thumb_set_return_address (rtx source, rtx scratch)
15311{
15312  arm_stack_offsets *offsets;
15313  HOST_WIDE_INT delta;
15314  int reg;
15315  rtx addr;
15316  unsigned long mask;
15317
15318  emit_insn (gen_rtx_USE (VOIDmode, source));
15319
15320  mask = thumb_compute_save_reg_mask ();
15321  if (mask & (1 << LR_REGNUM))
15322    {
15323      offsets = arm_get_frame_offsets ();
15324
15325      /* Find the saved regs.  */
15326      if (frame_pointer_needed)
15327	{
15328	  delta = offsets->soft_frame - offsets->saved_args;
15329	  reg = THUMB_HARD_FRAME_POINTER_REGNUM;
15330	}
15331      else
15332	{
15333	  delta = offsets->outgoing_args - offsets->saved_args;
15334	  reg = SP_REGNUM;
15335	}
15336      /* Allow for the stack frame.  */
15337      if (TARGET_BACKTRACE)
15338	delta -= 16;
15339      /* The link register is always the first saved register.  */
15340      delta -= 4;
15341
15342      /* Construct the address.  */
15343      addr = gen_rtx_REG (SImode, reg);
15344      if ((reg != SP_REGNUM && delta >= 128)
15345	  || delta >= 1024)
15346	{
15347	  emit_insn (gen_movsi (scratch, GEN_INT (delta)));
15348	  emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
15349	  addr = scratch;
15350	}
15351      else
15352	addr = plus_constant (addr, delta);
15353
15354      emit_move_insn (gen_frame_mem (Pmode, addr), source);
15355    }
15356  else
15357    emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
15358}
15359
15360/* Implements target hook vector_mode_supported_p.  */
15361bool
15362arm_vector_mode_supported_p (enum machine_mode mode)
15363{
15364  if ((mode == V2SImode)
15365      || (mode == V4HImode)
15366      || (mode == V8QImode))
15367    return true;
15368
15369  return false;
15370}
15371
15372/* Implement TARGET_SHIFT_TRUNCATION_MASK.  SImode shifts use normal
15373   ARM insns and therefore guarantee that the shift count is modulo 256.
15374   DImode shifts (those implemented by lib1funcs.asm or by optabs.c)
15375   guarantee no particular behavior for out-of-range counts.  */
15376
15377static unsigned HOST_WIDE_INT
15378arm_shift_truncation_mask (enum machine_mode mode)
15379{
15380  return mode == SImode ? 255 : 0;
15381}
15382
15383
15384/* Map internal gcc register numbers to DWARF2 register numbers.  */
15385
15386unsigned int
15387arm_dbx_register_number (unsigned int regno)
15388{
15389  if (regno < 16)
15390    return regno;
15391
15392  /* TODO: Legacy targets output FPA regs as registers 16-23 for backwards
15393     compatibility.  The EABI defines them as registers 96-103.  */
15394  if (IS_FPA_REGNUM (regno))
15395    return (TARGET_AAPCS_BASED ? 96 : 16) + regno - FIRST_FPA_REGNUM;
15396
15397  if (IS_VFP_REGNUM (regno))
15398    return 64 + regno - FIRST_VFP_REGNUM;
15399
15400  if (IS_IWMMXT_GR_REGNUM (regno))
15401    return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
15402
15403  if (IS_IWMMXT_REGNUM (regno))
15404    return 112 + regno - FIRST_IWMMXT_REGNUM;
15405
15406  gcc_unreachable ();
15407}
15408
15409
15410#ifdef TARGET_UNWIND_INFO
15411/* Emit unwind directives for a store-multiple instruction.  This should
15412   only ever be generated by the function prologue code, so we expect it
15413   to have a particular form.  */
15414
15415static void
15416arm_unwind_emit_stm (FILE * asm_out_file, rtx p)
15417{
15418  int i;
15419  HOST_WIDE_INT offset;
15420  HOST_WIDE_INT nregs;
15421  int reg_size;
15422  unsigned reg;
15423  unsigned lastreg;
15424  rtx e;
15425
15426  /* First insn will adjust the stack pointer.  */
15427  e = XVECEXP (p, 0, 0);
15428  if (GET_CODE (e) != SET
15429      || GET_CODE (XEXP (e, 0)) != REG
15430      || REGNO (XEXP (e, 0)) != SP_REGNUM
15431      || GET_CODE (XEXP (e, 1)) != PLUS)
15432    abort ();
15433
15434  offset = -INTVAL (XEXP (XEXP (e, 1), 1));
15435  nregs = XVECLEN (p, 0) - 1;
15436
15437  reg = REGNO (XEXP (XVECEXP (p, 0, 1), 1));
15438  if (reg < 16)
15439    {
15440      /* The function prologue may also push pc, but not annotate it as it is
15441	 never restored.  We turn this into a stack pointer adjustment.  */
15442      if (nregs * 4 == offset - 4)
15443	{
15444	  fprintf (asm_out_file, "\t.pad #4\n");
15445	  offset -= 4;
15446	}
15447      reg_size = 4;
15448    }
15449  else if (IS_VFP_REGNUM (reg))
15450    {
15451      /* FPA register saves use an additional word.  */
15452      offset -= 4;
15453      reg_size = 8;
15454    }
15455  else if (reg >= FIRST_FPA_REGNUM && reg <= LAST_FPA_REGNUM)
15456    {
15457      /* FPA registers are done differently.  */
15458      asm_fprintf (asm_out_file, "\t.save %r, %wd\n", reg, nregs);
15459      return;
15460    }
15461  else
15462    /* Unknown register type.  */
15463    abort ();
15464
15465  /* If the stack increment doesn't match the size of the saved registers,
15466     something has gone horribly wrong.  */
15467  if (offset != nregs * reg_size)
15468    abort ();
15469
15470  fprintf (asm_out_file, "\t.save {");
15471
15472  offset = 0;
15473  lastreg = 0;
15474  /* The remaining insns will describe the stores.  */
15475  for (i = 1; i <= nregs; i++)
15476    {
15477      /* Expect (set (mem <addr>) (reg)).
15478         Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)).  */
15479      e = XVECEXP (p, 0, i);
15480      if (GET_CODE (e) != SET
15481	  || GET_CODE (XEXP (e, 0)) != MEM
15482	  || GET_CODE (XEXP (e, 1)) != REG)
15483	abort ();
15484
15485      reg = REGNO (XEXP (e, 1));
15486      if (reg < lastreg)
15487	abort ();
15488
15489      if (i != 1)
15490	fprintf (asm_out_file, ", ");
15491      /* We can't use %r for vfp because we need to use the
15492	 double precision register names.  */
15493      if (IS_VFP_REGNUM (reg))
15494	asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
15495      else
15496	asm_fprintf (asm_out_file, "%r", reg);
15497
15498#ifdef ENABLE_CHECKING
15499      /* Check that the addresses are consecutive.  */
15500      e = XEXP (XEXP (e, 0), 0);
15501      if (GET_CODE (e) == PLUS)
15502	{
15503	  offset += reg_size;
15504	  if (GET_CODE (XEXP (e, 0)) != REG
15505	      || REGNO (XEXP (e, 0)) != SP_REGNUM
15506	      || GET_CODE (XEXP (e, 1)) != CONST_INT
15507	      || offset != INTVAL (XEXP (e, 1)))
15508	    abort ();
15509	}
15510      else if (i != 1
15511	       || GET_CODE (e) != REG
15512	       || REGNO (e) != SP_REGNUM)
15513	abort ();
15514#endif
15515    }
15516  fprintf (asm_out_file, "}\n");
15517}
15518
15519/*  Emit unwind directives for a SET.  */
15520
15521static void
15522arm_unwind_emit_set (FILE * asm_out_file, rtx p)
15523{
15524  rtx e0;
15525  rtx e1;
15526
15527  e0 = XEXP (p, 0);
15528  e1 = XEXP (p, 1);
15529  switch (GET_CODE (e0))
15530    {
15531    case MEM:
15532      /* Pushing a single register.  */
15533      if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
15534	  || GET_CODE (XEXP (XEXP (e0, 0), 0)) != REG
15535	  || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
15536	abort ();
15537
15538      asm_fprintf (asm_out_file, "\t.save ");
15539      if (IS_VFP_REGNUM (REGNO (e1)))
15540	asm_fprintf(asm_out_file, "{d%d}\n",
15541		    (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
15542      else
15543	asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
15544      break;
15545
15546    case REG:
15547      if (REGNO (e0) == SP_REGNUM)
15548	{
15549	  /* A stack increment.  */
15550	  if (GET_CODE (e1) != PLUS
15551	      || GET_CODE (XEXP (e1, 0)) != REG
15552	      || REGNO (XEXP (e1, 0)) != SP_REGNUM
15553	      || GET_CODE (XEXP (e1, 1)) != CONST_INT)
15554	    abort ();
15555
15556	  asm_fprintf (asm_out_file, "\t.pad #%wd\n",
15557		       -INTVAL (XEXP (e1, 1)));
15558	}
15559      else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
15560	{
15561	  HOST_WIDE_INT offset;
15562	  unsigned reg;
15563
15564	  if (GET_CODE (e1) == PLUS)
15565	    {
15566	      if (GET_CODE (XEXP (e1, 0)) != REG
15567		  || GET_CODE (XEXP (e1, 1)) != CONST_INT)
15568		abort ();
15569	      reg = REGNO (XEXP (e1, 0));
15570	      offset = INTVAL (XEXP (e1, 1));
15571	      asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
15572			   HARD_FRAME_POINTER_REGNUM, reg,
15573			   INTVAL (XEXP (e1, 1)));
15574	    }
15575	  else if (GET_CODE (e1) == REG)
15576	    {
15577	      reg = REGNO (e1);
15578	      asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
15579			   HARD_FRAME_POINTER_REGNUM, reg);
15580	    }
15581	  else
15582	    abort ();
15583	}
15584      else if (GET_CODE (e1) == REG && REGNO (e1) == SP_REGNUM)
15585	{
15586	  /* Move from sp to reg.  */
15587	  asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
15588	}
15589     else if (GET_CODE (e1) == PLUS
15590	      && GET_CODE (XEXP (e1, 0)) == REG
15591	      && REGNO (XEXP (e1, 0)) == SP_REGNUM
15592	      && GET_CODE (XEXP (e1, 1)) == CONST_INT)
15593	{
15594	  /* Set reg to offset from sp.  */
15595	  asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
15596		       REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
15597	}
15598      else
15599	abort ();
15600      break;
15601
15602    default:
15603      abort ();
15604    }
15605}
15606
15607
15608/* Emit unwind directives for the given insn.  */
15609
15610static void
15611arm_unwind_emit (FILE * asm_out_file, rtx insn)
15612{
15613  rtx pat;
15614
15615  if (!ARM_EABI_UNWIND_TABLES)
15616    return;
15617
15618  if (GET_CODE (insn) == NOTE || !RTX_FRAME_RELATED_P (insn))
15619    return;
15620
15621  pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
15622  if (pat)
15623    pat = XEXP (pat, 0);
15624  else
15625    pat = PATTERN (insn);
15626
15627  switch (GET_CODE (pat))
15628    {
15629    case SET:
15630      arm_unwind_emit_set (asm_out_file, pat);
15631      break;
15632
15633    case SEQUENCE:
15634      /* Store multiple.  */
15635      arm_unwind_emit_stm (asm_out_file, pat);
15636      break;
15637
15638    default:
15639      abort();
15640    }
15641}
15642
15643
15644/* Output a reference from a function exception table to the type_info
15645   object X.  The EABI specifies that the symbol should be relocated by
15646   an R_ARM_TARGET2 relocation.  */
15647
15648static bool
15649arm_output_ttype (rtx x)
15650{
15651  fputs ("\t.word\t", asm_out_file);
15652  output_addr_const (asm_out_file, x);
15653  /* Use special relocations for symbol references.  */
15654  if (GET_CODE (x) != CONST_INT)
15655    fputs ("(TARGET2)", asm_out_file);
15656  fputc ('\n', asm_out_file);
15657
15658  return TRUE;
15659}
15660#endif /* TARGET_UNWIND_INFO */
15661
15662
15663/* Output unwind directives for the start/end of a function.  */
15664
15665void
15666arm_output_fn_unwind (FILE * f, bool prologue)
15667{
15668  if (!ARM_EABI_UNWIND_TABLES)
15669    return;
15670
15671  if (prologue)
15672    fputs ("\t.fnstart\n", f);
15673  else
15674    fputs ("\t.fnend\n", f);
15675}
15676
15677static bool
15678arm_emit_tls_decoration (FILE *fp, rtx x)
15679{
15680  enum tls_reloc reloc;
15681  rtx val;
15682
15683  val = XVECEXP (x, 0, 0);
15684  reloc = INTVAL (XVECEXP (x, 0, 1));
15685
15686  output_addr_const (fp, val);
15687
15688  switch (reloc)
15689    {
15690    case TLS_GD32:
15691      fputs ("(tlsgd)", fp);
15692      break;
15693    case TLS_LDM32:
15694      fputs ("(tlsldm)", fp);
15695      break;
15696    case TLS_LDO32:
15697      fputs ("(tlsldo)", fp);
15698      break;
15699    case TLS_IE32:
15700      fputs ("(gottpoff)", fp);
15701      break;
15702    case TLS_LE32:
15703      fputs ("(tpoff)", fp);
15704      break;
15705    default:
15706      gcc_unreachable ();
15707    }
15708
15709  switch (reloc)
15710    {
15711    case TLS_GD32:
15712    case TLS_LDM32:
15713    case TLS_IE32:
15714      fputs (" + (. - ", fp);
15715      output_addr_const (fp, XVECEXP (x, 0, 2));
15716      fputs (" - ", fp);
15717      output_addr_const (fp, XVECEXP (x, 0, 3));
15718      fputc (')', fp);
15719      break;
15720    default:
15721      break;
15722    }
15723
15724  return TRUE;
15725}
15726
15727bool
15728arm_output_addr_const_extra (FILE *fp, rtx x)
15729{
15730  if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
15731    return arm_emit_tls_decoration (fp, x);
15732  else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
15733    {
15734      char label[256];
15735      int labelno = INTVAL (XVECEXP (x, 0, 0));
15736
15737      ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
15738      assemble_name_raw (fp, label);
15739
15740      return TRUE;
15741    }
15742  else if (GET_CODE (x) == CONST_VECTOR)
15743    return arm_emit_vector_const (fp, x);
15744
15745  return FALSE;
15746}
15747
15748#include "gt-arm.h"
15749