1/* Output routines for GCC for ARM.
2   Copyright (C) 1991-2015 Free Software Foundation, Inc.
3   Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4   and Martin Simmons (@harleqn.co.uk).
5   More major hacks by Richard Earnshaw (rearnsha@arm.com).
6
7   This file is part of GCC.
8
9   GCC is free software; you can redistribute it and/or modify it
10   under the terms of the GNU General Public License as published
11   by the Free Software Foundation; either version 3, or (at your
12   option) any later version.
13
14   GCC is distributed in the hope that it will be useful, but WITHOUT
15   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
17   License for more details.
18
19   You should have received a copy of the GNU General Public License
20   along with GCC; see the file COPYING3.  If not see
21   <http://www.gnu.org/licenses/>.  */
22
23#include "config.h"
24#include "system.h"
25#include "coretypes.h"
26#include "hash-table.h"
27#include "tm.h"
28#include "rtl.h"
29#include "hash-set.h"
30#include "machmode.h"
31#include "vec.h"
32#include "double-int.h"
33#include "input.h"
34#include "alias.h"
35#include "symtab.h"
36#include "wide-int.h"
37#include "inchash.h"
38#include "tree.h"
39#include "fold-const.h"
40#include "stringpool.h"
41#include "stor-layout.h"
42#include "calls.h"
43#include "varasm.h"
44#include "obstack.h"
45#include "regs.h"
46#include "hard-reg-set.h"
47#include "insn-config.h"
48#include "conditions.h"
49#include "output.h"
50#include "insn-attr.h"
51#include "flags.h"
52#include "reload.h"
53#include "function.h"
54#include "hashtab.h"
55#include "statistics.h"
56#include "real.h"
57#include "fixed-value.h"
58#include "expmed.h"
59#include "dojump.h"
60#include "explow.h"
61#include "emit-rtl.h"
62#include "stmt.h"
63#include "expr.h"
64#include "insn-codes.h"
65#include "optabs.h"
66#include "diagnostic-core.h"
67#include "recog.h"
68#include "predict.h"
69#include "dominance.h"
70#include "cfg.h"
71#include "cfgrtl.h"
72#include "cfganal.h"
73#include "lcm.h"
74#include "cfgbuild.h"
75#include "cfgcleanup.h"
76#include "basic-block.h"
77#include "hash-map.h"
78#include "is-a.h"
79#include "plugin-api.h"
80#include "ipa-ref.h"
81#include "cgraph.h"
82#include "ggc.h"
83#include "except.h"
84#include "tm_p.h"
85#include "target.h"
86#include "sched-int.h"
87#include "target-def.h"
88#include "debug.h"
89#include "langhooks.h"
90#include "df.h"
91#include "intl.h"
92#include "libfuncs.h"
93#include "params.h"
94#include "opts.h"
95#include "dumpfile.h"
96#include "gimple-expr.h"
97#include "builtins.h"
98#include "tm-constrs.h"
99#include "rtl-iter.h"
100#include "sched-int.h"
101
102/* Forward definitions of types.  */
103typedef struct minipool_node    Mnode;
104typedef struct minipool_fixup   Mfix;
105
106void (*arm_lang_output_object_attributes_hook)(void);
107
108struct four_ints
109{
110  int i[4];
111};
112
113/* Forward function declarations.  */
114static bool arm_const_not_ok_for_debug_p (rtx);
115static bool arm_needs_doubleword_align (machine_mode, const_tree);
116static int arm_compute_static_chain_stack_bytes (void);
117static arm_stack_offsets *arm_get_frame_offsets (void);
118static void arm_add_gc_roots (void);
119static int arm_gen_constant (enum rtx_code, machine_mode, rtx,
120			     HOST_WIDE_INT, rtx, rtx, int, int);
121static unsigned bit_count (unsigned long);
122static int arm_address_register_rtx_p (rtx, int);
123static int arm_legitimate_index_p (machine_mode, rtx, RTX_CODE, int);
124static int thumb2_legitimate_index_p (machine_mode, rtx, int);
125static int thumb1_base_register_rtx_p (rtx, machine_mode, int);
126static rtx arm_legitimize_address (rtx, rtx, machine_mode);
127static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
128static rtx thumb_legitimize_address (rtx, rtx, machine_mode);
129inline static int thumb1_index_register_rtx_p (rtx, int);
130static int thumb_far_jump_used_p (void);
131static bool thumb_force_lr_save (void);
132static unsigned arm_size_return_regs (void);
133static bool arm_assemble_integer (rtx, unsigned int, int);
134static void arm_print_operand (FILE *, rtx, int);
135static void arm_print_operand_address (FILE *, rtx);
136static bool arm_print_operand_punct_valid_p (unsigned char code);
137static const char *fp_const_from_val (REAL_VALUE_TYPE *);
138static arm_cc get_arm_condition_code (rtx);
139static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
140static const char *output_multi_immediate (rtx *, const char *, const char *,
141					   int, HOST_WIDE_INT);
142static const char *shift_op (rtx, HOST_WIDE_INT *);
143static struct machine_function *arm_init_machine_status (void);
144static void thumb_exit (FILE *, int);
145static HOST_WIDE_INT get_jump_table_size (rtx_jump_table_data *);
146static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
147static Mnode *add_minipool_forward_ref (Mfix *);
148static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
149static Mnode *add_minipool_backward_ref (Mfix *);
150static void assign_minipool_offsets (Mfix *);
151static void arm_print_value (FILE *, rtx);
152static void dump_minipool (rtx_insn *);
153static int arm_barrier_cost (rtx);
154static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
155static void push_minipool_barrier (rtx_insn *, HOST_WIDE_INT);
156static void push_minipool_fix (rtx_insn *, HOST_WIDE_INT, rtx *,
157			       machine_mode, rtx);
158static void arm_reorg (void);
159static void note_invalid_constants (rtx_insn *, HOST_WIDE_INT, int);
160static unsigned long arm_compute_save_reg0_reg12_mask (void);
161static unsigned long arm_compute_save_reg_mask (void);
162static unsigned long arm_isr_value (tree);
163static unsigned long arm_compute_func_type (void);
164static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
165static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
166static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
167#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
168static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
169#endif
170static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
171static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
172static int arm_comp_type_attributes (const_tree, const_tree);
173static void arm_set_default_type_attributes (tree);
174static int arm_adjust_cost (rtx_insn *, rtx, rtx_insn *, int);
175static int arm_sched_reorder (FILE *, int, rtx_insn **, int *, int);
176static int optimal_immediate_sequence (enum rtx_code code,
177				       unsigned HOST_WIDE_INT val,
178				       struct four_ints *return_sequence);
179static int optimal_immediate_sequence_1 (enum rtx_code code,
180					 unsigned HOST_WIDE_INT val,
181					 struct four_ints *return_sequence,
182					 int i);
183static int arm_get_strip_length (int);
184static bool arm_function_ok_for_sibcall (tree, tree);
185static machine_mode arm_promote_function_mode (const_tree,
186						    machine_mode, int *,
187						    const_tree, int);
188static bool arm_return_in_memory (const_tree, const_tree);
189static rtx arm_function_value (const_tree, const_tree, bool);
190static rtx arm_libcall_value_1 (machine_mode);
191static rtx arm_libcall_value (machine_mode, const_rtx);
192static bool arm_function_value_regno_p (const unsigned int);
193static void arm_internal_label (FILE *, const char *, unsigned long);
194static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
195				 tree);
196static bool arm_have_conditional_execution (void);
197static bool arm_cannot_force_const_mem (machine_mode, rtx);
198static bool arm_legitimate_constant_p (machine_mode, rtx);
199static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
200static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
201static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
202static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
203static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
204static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
205static bool arm_rtx_costs (rtx, int, int, int, int *, bool);
206static int arm_address_cost (rtx, machine_mode, addr_space_t, bool);
207static int arm_register_move_cost (machine_mode, reg_class_t, reg_class_t);
208static int arm_memory_move_cost (machine_mode, reg_class_t, bool);
209static void emit_constant_insn (rtx cond, rtx pattern);
210static rtx_insn *emit_set_insn (rtx, rtx);
211static rtx emit_multi_reg_push (unsigned long, unsigned long);
212static int arm_arg_partial_bytes (cumulative_args_t, machine_mode,
213				  tree, bool);
214static rtx arm_function_arg (cumulative_args_t, machine_mode,
215			     const_tree, bool);
216static void arm_function_arg_advance (cumulative_args_t, machine_mode,
217				      const_tree, bool);
218static unsigned int arm_function_arg_boundary (machine_mode, const_tree);
219static rtx aapcs_allocate_return_reg (machine_mode, const_tree,
220				      const_tree);
221static rtx aapcs_libcall_value (machine_mode);
222static int aapcs_select_return_coproc (const_tree, const_tree);
223
224#ifdef OBJECT_FORMAT_ELF
225static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
226static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
227#endif
228#ifndef ARM_PE
229static void arm_encode_section_info (tree, rtx, int);
230#endif
231
232static void arm_file_end (void);
233static void arm_file_start (void);
234
235static void arm_setup_incoming_varargs (cumulative_args_t, machine_mode,
236					tree, int *, int);
237static bool arm_pass_by_reference (cumulative_args_t,
238				   machine_mode, const_tree, bool);
239static bool arm_promote_prototypes (const_tree);
240static bool arm_default_short_enums (void);
241static bool arm_align_anon_bitfield (void);
242static bool arm_return_in_msb (const_tree);
243static bool arm_must_pass_in_stack (machine_mode, const_tree);
244static bool arm_return_in_memory (const_tree, const_tree);
245#if ARM_UNWIND_INFO
246static void arm_unwind_emit (FILE *, rtx_insn *);
247static bool arm_output_ttype (rtx);
248static void arm_asm_emit_except_personality (rtx);
249static void arm_asm_init_sections (void);
250#endif
251static rtx arm_dwarf_register_span (rtx);
252
253static tree arm_cxx_guard_type (void);
254static bool arm_cxx_guard_mask_bit (void);
255static tree arm_get_cookie_size (tree);
256static bool arm_cookie_has_size (void);
257static bool arm_cxx_cdtor_returns_this (void);
258static bool arm_cxx_key_method_may_be_inline (void);
259static void arm_cxx_determine_class_data_visibility (tree);
260static bool arm_cxx_class_data_always_comdat (void);
261static bool arm_cxx_use_aeabi_atexit (void);
262static void arm_init_libfuncs (void);
263static tree arm_build_builtin_va_list (void);
264static void arm_expand_builtin_va_start (tree, rtx);
265static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
266static void arm_option_override (void);
267static unsigned HOST_WIDE_INT arm_shift_truncation_mask (machine_mode);
268static bool arm_macro_fusion_p (void);
269static bool arm_cannot_copy_insn_p (rtx_insn *);
270static int arm_issue_rate (void);
271static int arm_first_cycle_multipass_dfa_lookahead (void);
272static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *, int);
273static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
274static bool arm_output_addr_const_extra (FILE *, rtx);
275static bool arm_allocate_stack_slots_for_args (void);
276static bool arm_warn_func_return (tree);
277static const char *arm_invalid_parameter_type (const_tree t);
278static const char *arm_invalid_return_type (const_tree t);
279static tree arm_promoted_type (const_tree t);
280static tree arm_convert_to_type (tree type, tree expr);
281static bool arm_scalar_mode_supported_p (machine_mode);
282static bool arm_frame_pointer_required (void);
283static bool arm_can_eliminate (const int, const int);
284static void arm_asm_trampoline_template (FILE *);
285static void arm_trampoline_init (rtx, tree, rtx);
286static rtx arm_trampoline_adjust_address (rtx);
287static rtx arm_pic_static_addr (rtx orig, rtx reg);
288static bool cortex_a9_sched_adjust_cost (rtx_insn *, rtx, rtx_insn *, int *);
289static bool xscale_sched_adjust_cost (rtx_insn *, rtx, rtx_insn *, int *);
290static bool fa726te_sched_adjust_cost (rtx_insn *, rtx, rtx_insn *, int *);
291static bool arm_array_mode_supported_p (machine_mode,
292					unsigned HOST_WIDE_INT);
293static machine_mode arm_preferred_simd_mode (machine_mode);
294static bool arm_class_likely_spilled_p (reg_class_t);
295static HOST_WIDE_INT arm_vector_alignment (const_tree type);
296static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
297static bool arm_builtin_support_vector_misalignment (machine_mode mode,
298						     const_tree type,
299						     int misalignment,
300						     bool is_packed);
301static void arm_conditional_register_usage (void);
302static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
303static unsigned int arm_autovectorize_vector_sizes (void);
304static int arm_default_branch_cost (bool, bool);
305static int arm_cortex_a5_branch_cost (bool, bool);
306static int arm_cortex_m_branch_cost (bool, bool);
307static int arm_cortex_m7_branch_cost (bool, bool);
308
309static bool arm_vectorize_vec_perm_const_ok (machine_mode vmode,
310					     const unsigned char *sel);
311
312static bool aarch_macro_fusion_pair_p (rtx_insn*, rtx_insn*);
313
314static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
315					   tree vectype,
316					   int misalign ATTRIBUTE_UNUSED);
317static unsigned arm_add_stmt_cost (void *data, int count,
318				   enum vect_cost_for_stmt kind,
319				   struct _stmt_vec_info *stmt_info,
320				   int misalign,
321				   enum vect_cost_model_location where);
322
323static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
324					 bool op0_preserve_value);
325static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
326
327static void arm_sched_fusion_priority (rtx_insn *, int, int *, int*);
328
329/* Table of machine attributes.  */
330static const struct attribute_spec arm_attribute_table[] =
331{
332  /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
333       affects_type_identity } */
334  /* Function calls made to this symbol must be done indirectly, because
335     it may lie outside of the 26 bit addressing range of a normal function
336     call.  */
337  { "long_call",    0, 0, false, true,  true,  NULL, false },
338  /* Whereas these functions are always known to reside within the 26 bit
339     addressing range.  */
340  { "short_call",   0, 0, false, true,  true,  NULL, false },
341  /* Specify the procedure call conventions for a function.  */
342  { "pcs",          1, 1, false, true,  true,  arm_handle_pcs_attribute,
343    false },
344  /* Interrupt Service Routines have special prologue and epilogue requirements.  */
345  { "isr",          0, 1, false, false, false, arm_handle_isr_attribute,
346    false },
347  { "interrupt",    0, 1, false, false, false, arm_handle_isr_attribute,
348    false },
349  { "naked",        0, 0, true,  false, false, arm_handle_fndecl_attribute,
350    false },
351#ifdef ARM_PE
352  /* ARM/PE has three new attributes:
353     interfacearm - ?
354     dllexport - for exporting a function/variable that will live in a dll
355     dllimport - for importing a function/variable from a dll
356
357     Microsoft allows multiple declspecs in one __declspec, separating
358     them with spaces.  We do NOT support this.  Instead, use __declspec
359     multiple times.
360  */
361  { "dllimport",    0, 0, true,  false, false, NULL, false },
362  { "dllexport",    0, 0, true,  false, false, NULL, false },
363  { "interfacearm", 0, 0, true,  false, false, arm_handle_fndecl_attribute,
364    false },
365#elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
366  { "dllimport",    0, 0, false, false, false, handle_dll_attribute, false },
367  { "dllexport",    0, 0, false, false, false, handle_dll_attribute, false },
368  { "notshared",    0, 0, false, true, false, arm_handle_notshared_attribute,
369    false },
370#endif
371  { NULL,           0, 0, false, false, false, NULL, false }
372};
373
374/* Initialize the GCC target structure.  */
375#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
376#undef  TARGET_MERGE_DECL_ATTRIBUTES
377#define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
378#endif
379
380#undef TARGET_LEGITIMIZE_ADDRESS
381#define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
382
383#undef TARGET_LRA_P
384#define TARGET_LRA_P hook_bool_void_true
385
386#undef  TARGET_ATTRIBUTE_TABLE
387#define TARGET_ATTRIBUTE_TABLE arm_attribute_table
388
389#undef TARGET_ASM_FILE_START
390#define TARGET_ASM_FILE_START arm_file_start
391#undef TARGET_ASM_FILE_END
392#define TARGET_ASM_FILE_END arm_file_end
393
394#undef  TARGET_ASM_ALIGNED_SI_OP
395#define TARGET_ASM_ALIGNED_SI_OP NULL
396#undef  TARGET_ASM_INTEGER
397#define TARGET_ASM_INTEGER arm_assemble_integer
398
399#undef TARGET_PRINT_OPERAND
400#define TARGET_PRINT_OPERAND arm_print_operand
401#undef TARGET_PRINT_OPERAND_ADDRESS
402#define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
403#undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
404#define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
405
406#undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
407#define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
408
409#undef  TARGET_ASM_FUNCTION_PROLOGUE
410#define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
411
412#undef  TARGET_ASM_FUNCTION_EPILOGUE
413#define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
414
415#undef  TARGET_OPTION_OVERRIDE
416#define TARGET_OPTION_OVERRIDE arm_option_override
417
418#undef  TARGET_COMP_TYPE_ATTRIBUTES
419#define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
420
421#undef TARGET_SCHED_MACRO_FUSION_P
422#define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
423
424#undef TARGET_SCHED_MACRO_FUSION_PAIR_P
425#define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
426
427#undef  TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
428#define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
429
430#undef  TARGET_SCHED_ADJUST_COST
431#define TARGET_SCHED_ADJUST_COST arm_adjust_cost
432
433#undef TARGET_SCHED_REORDER
434#define TARGET_SCHED_REORDER arm_sched_reorder
435
436#undef TARGET_REGISTER_MOVE_COST
437#define TARGET_REGISTER_MOVE_COST arm_register_move_cost
438
439#undef TARGET_MEMORY_MOVE_COST
440#define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
441
442#undef TARGET_ENCODE_SECTION_INFO
443#ifdef ARM_PE
444#define TARGET_ENCODE_SECTION_INFO  arm_pe_encode_section_info
445#else
446#define TARGET_ENCODE_SECTION_INFO  arm_encode_section_info
447#endif
448
449#undef  TARGET_STRIP_NAME_ENCODING
450#define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
451
452#undef  TARGET_ASM_INTERNAL_LABEL
453#define TARGET_ASM_INTERNAL_LABEL arm_internal_label
454
455#undef  TARGET_FUNCTION_OK_FOR_SIBCALL
456#define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
457
458#undef  TARGET_FUNCTION_VALUE
459#define TARGET_FUNCTION_VALUE arm_function_value
460
461#undef  TARGET_LIBCALL_VALUE
462#define TARGET_LIBCALL_VALUE arm_libcall_value
463
464#undef TARGET_FUNCTION_VALUE_REGNO_P
465#define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
466
467#undef  TARGET_ASM_OUTPUT_MI_THUNK
468#define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
469#undef  TARGET_ASM_CAN_OUTPUT_MI_THUNK
470#define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
471
472#undef  TARGET_RTX_COSTS
473#define TARGET_RTX_COSTS arm_rtx_costs
474#undef  TARGET_ADDRESS_COST
475#define TARGET_ADDRESS_COST arm_address_cost
476
477#undef TARGET_SHIFT_TRUNCATION_MASK
478#define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
479#undef TARGET_VECTOR_MODE_SUPPORTED_P
480#define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
481#undef TARGET_ARRAY_MODE_SUPPORTED_P
482#define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
483#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
484#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
485#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
486#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
487  arm_autovectorize_vector_sizes
488
489#undef  TARGET_MACHINE_DEPENDENT_REORG
490#define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
491
492#undef  TARGET_INIT_BUILTINS
493#define TARGET_INIT_BUILTINS  arm_init_builtins
494#undef  TARGET_EXPAND_BUILTIN
495#define TARGET_EXPAND_BUILTIN arm_expand_builtin
496#undef  TARGET_BUILTIN_DECL
497#define TARGET_BUILTIN_DECL arm_builtin_decl
498
499#undef TARGET_INIT_LIBFUNCS
500#define TARGET_INIT_LIBFUNCS arm_init_libfuncs
501
502#undef TARGET_PROMOTE_FUNCTION_MODE
503#define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
504#undef TARGET_PROMOTE_PROTOTYPES
505#define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
506#undef TARGET_PASS_BY_REFERENCE
507#define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
508#undef TARGET_ARG_PARTIAL_BYTES
509#define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
510#undef TARGET_FUNCTION_ARG
511#define TARGET_FUNCTION_ARG arm_function_arg
512#undef TARGET_FUNCTION_ARG_ADVANCE
513#define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
514#undef TARGET_FUNCTION_ARG_BOUNDARY
515#define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
516
517#undef  TARGET_SETUP_INCOMING_VARARGS
518#define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
519
520#undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
521#define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
522
523#undef TARGET_ASM_TRAMPOLINE_TEMPLATE
524#define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
525#undef TARGET_TRAMPOLINE_INIT
526#define TARGET_TRAMPOLINE_INIT arm_trampoline_init
527#undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
528#define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
529
530#undef TARGET_WARN_FUNC_RETURN
531#define TARGET_WARN_FUNC_RETURN arm_warn_func_return
532
533#undef TARGET_DEFAULT_SHORT_ENUMS
534#define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
535
536#undef TARGET_ALIGN_ANON_BITFIELD
537#define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
538
539#undef TARGET_NARROW_VOLATILE_BITFIELD
540#define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
541
542#undef TARGET_CXX_GUARD_TYPE
543#define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
544
545#undef TARGET_CXX_GUARD_MASK_BIT
546#define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
547
548#undef TARGET_CXX_GET_COOKIE_SIZE
549#define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
550
551#undef TARGET_CXX_COOKIE_HAS_SIZE
552#define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
553
554#undef TARGET_CXX_CDTOR_RETURNS_THIS
555#define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
556
557#undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
558#define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
559
560#undef TARGET_CXX_USE_AEABI_ATEXIT
561#define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
562
563#undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
564#define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
565  arm_cxx_determine_class_data_visibility
566
567#undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
568#define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
569
570#undef TARGET_RETURN_IN_MSB
571#define TARGET_RETURN_IN_MSB arm_return_in_msb
572
573#undef TARGET_RETURN_IN_MEMORY
574#define TARGET_RETURN_IN_MEMORY arm_return_in_memory
575
576#undef TARGET_MUST_PASS_IN_STACK
577#define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
578
579#if ARM_UNWIND_INFO
580#undef TARGET_ASM_UNWIND_EMIT
581#define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
582
583/* EABI unwinding tables use a different format for the typeinfo tables.  */
584#undef TARGET_ASM_TTYPE
585#define TARGET_ASM_TTYPE arm_output_ttype
586
587#undef TARGET_ARM_EABI_UNWINDER
588#define TARGET_ARM_EABI_UNWINDER true
589
590#undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
591#define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
592
593#undef TARGET_ASM_INIT_SECTIONS
594#define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
595#endif /* ARM_UNWIND_INFO */
596
597#undef TARGET_DWARF_REGISTER_SPAN
598#define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
599
600#undef  TARGET_CANNOT_COPY_INSN_P
601#define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
602
603#ifdef HAVE_AS_TLS
604#undef TARGET_HAVE_TLS
605#define TARGET_HAVE_TLS true
606#endif
607
608#undef TARGET_HAVE_CONDITIONAL_EXECUTION
609#define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
610
611#undef TARGET_LEGITIMATE_CONSTANT_P
612#define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
613
614#undef TARGET_CANNOT_FORCE_CONST_MEM
615#define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
616
617#undef TARGET_MAX_ANCHOR_OFFSET
618#define TARGET_MAX_ANCHOR_OFFSET 4095
619
620/* The minimum is set such that the total size of the block
621   for a particular anchor is -4088 + 1 + 4095 bytes, which is
622   divisible by eight, ensuring natural spacing of anchors.  */
623#undef TARGET_MIN_ANCHOR_OFFSET
624#define TARGET_MIN_ANCHOR_OFFSET -4088
625
626#undef TARGET_SCHED_ISSUE_RATE
627#define TARGET_SCHED_ISSUE_RATE arm_issue_rate
628
629#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
630#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
631  arm_first_cycle_multipass_dfa_lookahead
632
633#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
634#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
635  arm_first_cycle_multipass_dfa_lookahead_guard
636
637#undef TARGET_MANGLE_TYPE
638#define TARGET_MANGLE_TYPE arm_mangle_type
639
640#undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
641#define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
642
643#undef TARGET_BUILD_BUILTIN_VA_LIST
644#define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
645#undef TARGET_EXPAND_BUILTIN_VA_START
646#define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
647#undef TARGET_GIMPLIFY_VA_ARG_EXPR
648#define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
649
650#ifdef HAVE_AS_TLS
651#undef TARGET_ASM_OUTPUT_DWARF_DTPREL
652#define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
653#endif
654
655#undef TARGET_LEGITIMATE_ADDRESS_P
656#define TARGET_LEGITIMATE_ADDRESS_P	arm_legitimate_address_p
657
658#undef TARGET_PREFERRED_RELOAD_CLASS
659#define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
660
661#undef TARGET_INVALID_PARAMETER_TYPE
662#define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
663
664#undef TARGET_INVALID_RETURN_TYPE
665#define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
666
667#undef TARGET_PROMOTED_TYPE
668#define TARGET_PROMOTED_TYPE arm_promoted_type
669
670#undef TARGET_CONVERT_TO_TYPE
671#define TARGET_CONVERT_TO_TYPE arm_convert_to_type
672
673#undef TARGET_SCALAR_MODE_SUPPORTED_P
674#define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
675
676#undef TARGET_FRAME_POINTER_REQUIRED
677#define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
678
679#undef TARGET_CAN_ELIMINATE
680#define TARGET_CAN_ELIMINATE arm_can_eliminate
681
682#undef TARGET_CONDITIONAL_REGISTER_USAGE
683#define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
684
685#undef TARGET_CLASS_LIKELY_SPILLED_P
686#define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
687
688#undef TARGET_VECTORIZE_BUILTINS
689#define TARGET_VECTORIZE_BUILTINS
690
691#undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
692#define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
693  arm_builtin_vectorized_function
694
695#undef TARGET_VECTOR_ALIGNMENT
696#define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
697
698#undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
699#define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
700  arm_vector_alignment_reachable
701
702#undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
703#define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
704  arm_builtin_support_vector_misalignment
705
706#undef TARGET_PREFERRED_RENAME_CLASS
707#define TARGET_PREFERRED_RENAME_CLASS \
708  arm_preferred_rename_class
709
710#undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
711#define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
712  arm_vectorize_vec_perm_const_ok
713
714#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
715#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
716  arm_builtin_vectorization_cost
717#undef TARGET_VECTORIZE_ADD_STMT_COST
718#define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
719
720#undef TARGET_CANONICALIZE_COMPARISON
721#define TARGET_CANONICALIZE_COMPARISON \
722  arm_canonicalize_comparison
723
724#undef TARGET_ASAN_SHADOW_OFFSET
725#define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
726
727#undef MAX_INSN_PER_IT_BLOCK
728#define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
729
730#undef TARGET_CAN_USE_DOLOOP_P
731#define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
732
733#undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
734#define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
735
736#undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
737#define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
738
739#undef TARGET_SCHED_FUSION_PRIORITY
740#define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
741
742struct gcc_target targetm = TARGET_INITIALIZER;
743
744/* Obstack for minipool constant handling.  */
745static struct obstack minipool_obstack;
746static char *         minipool_startobj;
747
748/* The maximum number of insns skipped which
749   will be conditionalised if possible.  */
750static int max_insns_skipped = 5;
751
752extern FILE * asm_out_file;
753
754/* True if we are currently building a constant table.  */
755int making_const_table;
756
757/* The processor for which instructions should be scheduled.  */
758enum processor_type arm_tune = arm_none;
759
760/* The current tuning set.  */
761const struct tune_params *current_tune;
762
763/* Which floating point hardware to schedule for.  */
764int arm_fpu_attr;
765
766/* Which floating popint hardware to use.  */
767const struct arm_fpu_desc *arm_fpu_desc;
768
769/* Used for Thumb call_via trampolines.  */
770rtx thumb_call_via_label[14];
771static int thumb_call_reg_needed;
772
773/* The bits in this mask specify which
774   instructions we are allowed to generate.  */
775unsigned long insn_flags = 0;
776
777/* The bits in this mask specify which instruction scheduling options should
778   be used.  */
779unsigned long tune_flags = 0;
780
781/* The highest ARM architecture version supported by the
782   target.  */
783enum base_architecture arm_base_arch = BASE_ARCH_0;
784
785/* The following are used in the arm.md file as equivalents to bits
786   in the above two flag variables.  */
787
788/* Nonzero if this chip supports the ARM Architecture 3M extensions.  */
789int arm_arch3m = 0;
790
791/* Nonzero if this chip supports the ARM Architecture 4 extensions.  */
792int arm_arch4 = 0;
793
794/* Nonzero if this chip supports the ARM Architecture 4t extensions.  */
795int arm_arch4t = 0;
796
797/* Nonzero if this chip supports the ARM Architecture 5 extensions.  */
798int arm_arch5 = 0;
799
800/* Nonzero if this chip supports the ARM Architecture 5E extensions.  */
801int arm_arch5e = 0;
802
803/* Nonzero if this chip supports the ARM Architecture 6 extensions.  */
804int arm_arch6 = 0;
805
806/* Nonzero if this chip supports the ARM 6K extensions.  */
807int arm_arch6k = 0;
808
809/* Nonzero if instructions present in ARMv6-M can be used.  */
810int arm_arch6m = 0;
811
812/* Nonzero if this chip supports the ARM 7 extensions.  */
813int arm_arch7 = 0;
814
815/* Nonzero if instructions not present in the 'M' profile can be used.  */
816int arm_arch_notm = 0;
817
818/* Nonzero if instructions present in ARMv7E-M can be used.  */
819int arm_arch7em = 0;
820
821/* Nonzero if instructions present in ARMv8 can be used.  */
822int arm_arch8 = 0;
823
824/* Nonzero if this chip can benefit from load scheduling.  */
825int arm_ld_sched = 0;
826
827/* Nonzero if this chip is a StrongARM.  */
828int arm_tune_strongarm = 0;
829
830/* Nonzero if this chip supports Intel Wireless MMX technology.  */
831int arm_arch_iwmmxt = 0;
832
833/* Nonzero if this chip supports Intel Wireless MMX2 technology.  */
834int arm_arch_iwmmxt2 = 0;
835
836/* Nonzero if this chip is an XScale.  */
837int arm_arch_xscale = 0;
838
839/* Nonzero if tuning for XScale  */
840int arm_tune_xscale = 0;
841
842/* Nonzero if we want to tune for stores that access the write-buffer.
843   This typically means an ARM6 or ARM7 with MMU or MPU.  */
844int arm_tune_wbuf = 0;
845
846/* Nonzero if tuning for Cortex-A9.  */
847int arm_tune_cortex_a9 = 0;
848
849/* Nonzero if generating Thumb instructions.  */
850int thumb_code = 0;
851
852/* Nonzero if generating Thumb-1 instructions.  */
853int thumb1_code = 0;
854
855/* Nonzero if we should define __THUMB_INTERWORK__ in the
856   preprocessor.
857   XXX This is a bit of a hack, it's intended to help work around
858   problems in GLD which doesn't understand that armv5t code is
859   interworking clean.  */
860int arm_cpp_interwork = 0;
861
862/* Nonzero if chip supports Thumb 2.  */
863int arm_arch_thumb2;
864
865/* Nonzero if chip supports integer division instruction.  */
866int arm_arch_arm_hwdiv;
867int arm_arch_thumb_hwdiv;
868
869/* Nonzero if chip disallows volatile memory access in IT block.  */
870int arm_arch_no_volatile_ce;
871
872/* Nonzero if we should use Neon to handle 64-bits operations rather
873   than core registers.  */
874int prefer_neon_for_64bits = 0;
875
876/* Nonzero if we shouldn't use literal pools.  */
877bool arm_disable_literal_pool = false;
878
879/* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
880   we must report the mode of the memory reference from
881   TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS.  */
882machine_mode output_memory_reference_mode;
883
884/* The register number to be used for the PIC offset register.  */
885unsigned arm_pic_register = INVALID_REGNUM;
886
887enum arm_pcs arm_pcs_default;
888
889/* For an explanation of these variables, see final_prescan_insn below.  */
890int arm_ccfsm_state;
891/* arm_current_cc is also used for Thumb-2 cond_exec blocks.  */
892enum arm_cond_code arm_current_cc;
893
894rtx arm_target_insn;
895int arm_target_label;
896/* The number of conditionally executed insns, including the current insn.  */
897int arm_condexec_count = 0;
898/* A bitmask specifying the patterns for the IT block.
899   Zero means do not output an IT block before this insn. */
900int arm_condexec_mask = 0;
901/* The number of bits used in arm_condexec_mask.  */
902int arm_condexec_masklen = 0;
903
904/* Nonzero if chip supports the ARMv8 CRC instructions.  */
905int arm_arch_crc = 0;
906
907/* Nonzero if the core has a very small, high-latency, multiply unit.  */
908int arm_m_profile_small_mul = 0;
909
910/* The condition codes of the ARM, and the inverse function.  */
911static const char * const arm_condition_codes[] =
912{
913  "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
914  "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
915};
916
917/* The register numbers in sequence, for passing to arm_gen_load_multiple.  */
918int arm_regs_in_sequence[] =
919{
920  0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
921};
922
923#define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
924#define streq(string1, string2) (strcmp (string1, string2) == 0)
925
926#define THUMB2_WORK_REGS (0xff & ~(  (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
927				   | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
928				   | (1 << PIC_OFFSET_TABLE_REGNUM)))
929
930/* Initialization code.  */
931
932struct processors
933{
934  const char *const name;
935  enum processor_type core;
936  const char *arch;
937  enum base_architecture base_arch;
938  const unsigned long flags;
939  const struct tune_params *const tune;
940};
941
942
943#define ARM_PREFETCH_NOT_BENEFICIAL 0, -1, -1
944#define ARM_PREFETCH_BENEFICIAL(prefetch_slots,l1_size,l1_line_size) \
945  prefetch_slots, \
946  l1_size, \
947  l1_line_size
948
949/* arm generic vectorizer costs.  */
950static const
951struct cpu_vec_costs arm_default_vec_cost = {
952  1,					/* scalar_stmt_cost.  */
953  1,					/* scalar load_cost.  */
954  1,					/* scalar_store_cost.  */
955  1,					/* vec_stmt_cost.  */
956  1,					/* vec_to_scalar_cost.  */
957  1,					/* scalar_to_vec_cost.  */
958  1,					/* vec_align_load_cost.  */
959  1,					/* vec_unalign_load_cost.  */
960  1,					/* vec_unalign_store_cost.  */
961  1,					/* vec_store_cost.  */
962  3,					/* cond_taken_branch_cost.  */
963  1,					/* cond_not_taken_branch_cost.  */
964};
965
966/* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h  */
967#include "aarch-cost-tables.h"
968
969
970
971const struct cpu_cost_table cortexa9_extra_costs =
972{
973  /* ALU */
974  {
975    0,			/* arith.  */
976    0,			/* logical.  */
977    0,			/* shift.  */
978    COSTS_N_INSNS (1),	/* shift_reg.  */
979    COSTS_N_INSNS (1),	/* arith_shift.  */
980    COSTS_N_INSNS (2),	/* arith_shift_reg.  */
981    0,			/* log_shift.  */
982    COSTS_N_INSNS (1),	/* log_shift_reg.  */
983    COSTS_N_INSNS (1),	/* extend.  */
984    COSTS_N_INSNS (2),	/* extend_arith.  */
985    COSTS_N_INSNS (1),	/* bfi.  */
986    COSTS_N_INSNS (1),	/* bfx.  */
987    0,			/* clz.  */
988    0,			/* rev.  */
989    0,			/* non_exec.  */
990    true		/* non_exec_costs_exec.  */
991  },
992  {
993    /* MULT SImode */
994    {
995      COSTS_N_INSNS (3),	/* simple.  */
996      COSTS_N_INSNS (3),	/* flag_setting.  */
997      COSTS_N_INSNS (2),	/* extend.  */
998      COSTS_N_INSNS (3),	/* add.  */
999      COSTS_N_INSNS (2),	/* extend_add.  */
1000      COSTS_N_INSNS (30)	/* idiv.  No HW div on Cortex A9.  */
1001    },
1002    /* MULT DImode */
1003    {
1004      0,			/* simple (N/A).  */
1005      0,			/* flag_setting (N/A).  */
1006      COSTS_N_INSNS (4),	/* extend.  */
1007      0,			/* add (N/A).  */
1008      COSTS_N_INSNS (4),	/* extend_add.  */
1009      0				/* idiv (N/A).  */
1010    }
1011  },
1012  /* LD/ST */
1013  {
1014    COSTS_N_INSNS (2),	/* load.  */
1015    COSTS_N_INSNS (2),	/* load_sign_extend.  */
1016    COSTS_N_INSNS (2),	/* ldrd.  */
1017    COSTS_N_INSNS (2),	/* ldm_1st.  */
1018    1,			/* ldm_regs_per_insn_1st.  */
1019    2,			/* ldm_regs_per_insn_subsequent.  */
1020    COSTS_N_INSNS (5),	/* loadf.  */
1021    COSTS_N_INSNS (5),	/* loadd.  */
1022    COSTS_N_INSNS (1),  /* load_unaligned.  */
1023    COSTS_N_INSNS (2),	/* store.  */
1024    COSTS_N_INSNS (2),	/* strd.  */
1025    COSTS_N_INSNS (2),	/* stm_1st.  */
1026    1,			/* stm_regs_per_insn_1st.  */
1027    2,			/* stm_regs_per_insn_subsequent.  */
1028    COSTS_N_INSNS (1),	/* storef.  */
1029    COSTS_N_INSNS (1),	/* stored.  */
1030    COSTS_N_INSNS (1)	/* store_unaligned.  */
1031  },
1032  {
1033    /* FP SFmode */
1034    {
1035      COSTS_N_INSNS (14),	/* div.  */
1036      COSTS_N_INSNS (4),	/* mult.  */
1037      COSTS_N_INSNS (7),	/* mult_addsub. */
1038      COSTS_N_INSNS (30),	/* fma.  */
1039      COSTS_N_INSNS (3),	/* addsub.  */
1040      COSTS_N_INSNS (1),	/* fpconst.  */
1041      COSTS_N_INSNS (1),	/* neg.  */
1042      COSTS_N_INSNS (3),	/* compare.  */
1043      COSTS_N_INSNS (3),	/* widen.  */
1044      COSTS_N_INSNS (3),	/* narrow.  */
1045      COSTS_N_INSNS (3),	/* toint.  */
1046      COSTS_N_INSNS (3),	/* fromint.  */
1047      COSTS_N_INSNS (3)		/* roundint.  */
1048    },
1049    /* FP DFmode */
1050    {
1051      COSTS_N_INSNS (24),	/* div.  */
1052      COSTS_N_INSNS (5),	/* mult.  */
1053      COSTS_N_INSNS (8),	/* mult_addsub.  */
1054      COSTS_N_INSNS (30),	/* fma.  */
1055      COSTS_N_INSNS (3),	/* addsub.  */
1056      COSTS_N_INSNS (1),	/* fpconst.  */
1057      COSTS_N_INSNS (1),	/* neg.  */
1058      COSTS_N_INSNS (3),	/* compare.  */
1059      COSTS_N_INSNS (3),	/* widen.  */
1060      COSTS_N_INSNS (3),	/* narrow.  */
1061      COSTS_N_INSNS (3),	/* toint.  */
1062      COSTS_N_INSNS (3),	/* fromint.  */
1063      COSTS_N_INSNS (3)		/* roundint.  */
1064    }
1065  },
1066  /* Vector */
1067  {
1068    COSTS_N_INSNS (1)	/* alu.  */
1069  }
1070};
1071
1072const struct cpu_cost_table cortexa8_extra_costs =
1073{
1074  /* ALU */
1075  {
1076    0,			/* arith.  */
1077    0,			/* logical.  */
1078    COSTS_N_INSNS (1),	/* shift.  */
1079    0,			/* shift_reg.  */
1080    COSTS_N_INSNS (1),	/* arith_shift.  */
1081    0,			/* arith_shift_reg.  */
1082    COSTS_N_INSNS (1),	/* log_shift.  */
1083    0,			/* log_shift_reg.  */
1084    0,			/* extend.  */
1085    0,			/* extend_arith.  */
1086    0,			/* bfi.  */
1087    0,			/* bfx.  */
1088    0,			/* clz.  */
1089    0,			/* rev.  */
1090    0,			/* non_exec.  */
1091    true		/* non_exec_costs_exec.  */
1092  },
1093  {
1094    /* MULT SImode */
1095    {
1096      COSTS_N_INSNS (1),	/* simple.  */
1097      COSTS_N_INSNS (1),	/* flag_setting.  */
1098      COSTS_N_INSNS (1),	/* extend.  */
1099      COSTS_N_INSNS (1),	/* add.  */
1100      COSTS_N_INSNS (1),	/* extend_add.  */
1101      COSTS_N_INSNS (30)	/* idiv.  No HW div on Cortex A8.  */
1102    },
1103    /* MULT DImode */
1104    {
1105      0,			/* simple (N/A).  */
1106      0,			/* flag_setting (N/A).  */
1107      COSTS_N_INSNS (2),	/* extend.  */
1108      0,			/* add (N/A).  */
1109      COSTS_N_INSNS (2),	/* extend_add.  */
1110      0				/* idiv (N/A).  */
1111    }
1112  },
1113  /* LD/ST */
1114  {
1115    COSTS_N_INSNS (1),	/* load.  */
1116    COSTS_N_INSNS (1),	/* load_sign_extend.  */
1117    COSTS_N_INSNS (1),	/* ldrd.  */
1118    COSTS_N_INSNS (1),	/* ldm_1st.  */
1119    1,			/* ldm_regs_per_insn_1st.  */
1120    2,			/* ldm_regs_per_insn_subsequent.  */
1121    COSTS_N_INSNS (1),	/* loadf.  */
1122    COSTS_N_INSNS (1),	/* loadd.  */
1123    COSTS_N_INSNS (1),  /* load_unaligned.  */
1124    COSTS_N_INSNS (1),	/* store.  */
1125    COSTS_N_INSNS (1),	/* strd.  */
1126    COSTS_N_INSNS (1),	/* stm_1st.  */
1127    1,			/* stm_regs_per_insn_1st.  */
1128    2,			/* stm_regs_per_insn_subsequent.  */
1129    COSTS_N_INSNS (1),	/* storef.  */
1130    COSTS_N_INSNS (1),	/* stored.  */
1131    COSTS_N_INSNS (1)	/* store_unaligned.  */
1132  },
1133  {
1134    /* FP SFmode */
1135    {
1136      COSTS_N_INSNS (36),	/* div.  */
1137      COSTS_N_INSNS (11),	/* mult.  */
1138      COSTS_N_INSNS (20),	/* mult_addsub. */
1139      COSTS_N_INSNS (30),	/* fma.  */
1140      COSTS_N_INSNS (9),	/* addsub.  */
1141      COSTS_N_INSNS (3),	/* fpconst.  */
1142      COSTS_N_INSNS (3),	/* neg.  */
1143      COSTS_N_INSNS (6),	/* compare.  */
1144      COSTS_N_INSNS (4),	/* widen.  */
1145      COSTS_N_INSNS (4),	/* narrow.  */
1146      COSTS_N_INSNS (8),	/* toint.  */
1147      COSTS_N_INSNS (8),	/* fromint.  */
1148      COSTS_N_INSNS (8)		/* roundint.  */
1149    },
1150    /* FP DFmode */
1151    {
1152      COSTS_N_INSNS (64),	/* div.  */
1153      COSTS_N_INSNS (16),	/* mult.  */
1154      COSTS_N_INSNS (25),	/* mult_addsub.  */
1155      COSTS_N_INSNS (30),	/* fma.  */
1156      COSTS_N_INSNS (9),	/* addsub.  */
1157      COSTS_N_INSNS (3),	/* fpconst.  */
1158      COSTS_N_INSNS (3),	/* neg.  */
1159      COSTS_N_INSNS (6),	/* compare.  */
1160      COSTS_N_INSNS (6),	/* widen.  */
1161      COSTS_N_INSNS (6),	/* narrow.  */
1162      COSTS_N_INSNS (8),	/* toint.  */
1163      COSTS_N_INSNS (8),	/* fromint.  */
1164      COSTS_N_INSNS (8)		/* roundint.  */
1165    }
1166  },
1167  /* Vector */
1168  {
1169    COSTS_N_INSNS (1)	/* alu.  */
1170  }
1171};
1172
1173const struct cpu_cost_table cortexa5_extra_costs =
1174{
1175  /* ALU */
1176  {
1177    0,			/* arith.  */
1178    0,			/* logical.  */
1179    COSTS_N_INSNS (1),	/* shift.  */
1180    COSTS_N_INSNS (1),	/* shift_reg.  */
1181    COSTS_N_INSNS (1),	/* arith_shift.  */
1182    COSTS_N_INSNS (1),	/* arith_shift_reg.  */
1183    COSTS_N_INSNS (1),	/* log_shift.  */
1184    COSTS_N_INSNS (1),	/* log_shift_reg.  */
1185    COSTS_N_INSNS (1),	/* extend.  */
1186    COSTS_N_INSNS (1),	/* extend_arith.  */
1187    COSTS_N_INSNS (1),	/* bfi.  */
1188    COSTS_N_INSNS (1),	/* bfx.  */
1189    COSTS_N_INSNS (1),	/* clz.  */
1190    COSTS_N_INSNS (1),	/* rev.  */
1191    0,			/* non_exec.  */
1192    true		/* non_exec_costs_exec.  */
1193  },
1194
1195  {
1196    /* MULT SImode */
1197    {
1198      0,			/* simple.  */
1199      COSTS_N_INSNS (1),	/* flag_setting.  */
1200      COSTS_N_INSNS (1),	/* extend.  */
1201      COSTS_N_INSNS (1),	/* add.  */
1202      COSTS_N_INSNS (1),	/* extend_add.  */
1203      COSTS_N_INSNS (7)		/* idiv.  */
1204    },
1205    /* MULT DImode */
1206    {
1207      0,			/* simple (N/A).  */
1208      0,			/* flag_setting (N/A).  */
1209      COSTS_N_INSNS (1),	/* extend.  */
1210      0,			/* add.  */
1211      COSTS_N_INSNS (2),	/* extend_add.  */
1212      0				/* idiv (N/A).  */
1213    }
1214  },
1215  /* LD/ST */
1216  {
1217    COSTS_N_INSNS (1),	/* load.  */
1218    COSTS_N_INSNS (1),	/* load_sign_extend.  */
1219    COSTS_N_INSNS (6),	/* ldrd.  */
1220    COSTS_N_INSNS (1),	/* ldm_1st.  */
1221    1,			/* ldm_regs_per_insn_1st.  */
1222    2,			/* ldm_regs_per_insn_subsequent.  */
1223    COSTS_N_INSNS (2),	/* loadf.  */
1224    COSTS_N_INSNS (4),	/* loadd.  */
1225    COSTS_N_INSNS (1),	/* load_unaligned.  */
1226    COSTS_N_INSNS (1),	/* store.  */
1227    COSTS_N_INSNS (3),	/* strd.  */
1228    COSTS_N_INSNS (1),	/* stm_1st.  */
1229    1,			/* stm_regs_per_insn_1st.  */
1230    2,			/* stm_regs_per_insn_subsequent.  */
1231    COSTS_N_INSNS (2),	/* storef.  */
1232    COSTS_N_INSNS (2),	/* stored.  */
1233    COSTS_N_INSNS (1)	/* store_unaligned.  */
1234  },
1235  {
1236    /* FP SFmode */
1237    {
1238      COSTS_N_INSNS (15),	/* div.  */
1239      COSTS_N_INSNS (3),	/* mult.  */
1240      COSTS_N_INSNS (7),	/* mult_addsub. */
1241      COSTS_N_INSNS (7),	/* fma.  */
1242      COSTS_N_INSNS (3),	/* addsub.  */
1243      COSTS_N_INSNS (3),	/* fpconst.  */
1244      COSTS_N_INSNS (3),	/* neg.  */
1245      COSTS_N_INSNS (3),	/* compare.  */
1246      COSTS_N_INSNS (3),	/* widen.  */
1247      COSTS_N_INSNS (3),	/* narrow.  */
1248      COSTS_N_INSNS (3),	/* toint.  */
1249      COSTS_N_INSNS (3),	/* fromint.  */
1250      COSTS_N_INSNS (3)		/* roundint.  */
1251    },
1252    /* FP DFmode */
1253    {
1254      COSTS_N_INSNS (30),	/* div.  */
1255      COSTS_N_INSNS (6),	/* mult.  */
1256      COSTS_N_INSNS (10),	/* mult_addsub.  */
1257      COSTS_N_INSNS (7),	/* fma.  */
1258      COSTS_N_INSNS (3),	/* addsub.  */
1259      COSTS_N_INSNS (3),	/* fpconst.  */
1260      COSTS_N_INSNS (3),	/* neg.  */
1261      COSTS_N_INSNS (3),	/* compare.  */
1262      COSTS_N_INSNS (3),	/* widen.  */
1263      COSTS_N_INSNS (3),	/* narrow.  */
1264      COSTS_N_INSNS (3),	/* toint.  */
1265      COSTS_N_INSNS (3),	/* fromint.  */
1266      COSTS_N_INSNS (3)		/* roundint.  */
1267    }
1268  },
1269  /* Vector */
1270  {
1271    COSTS_N_INSNS (1)	/* alu.  */
1272  }
1273};
1274
1275
1276const struct cpu_cost_table cortexa7_extra_costs =
1277{
1278  /* ALU */
1279  {
1280    0,			/* arith.  */
1281    0,			/* logical.  */
1282    COSTS_N_INSNS (1),	/* shift.  */
1283    COSTS_N_INSNS (1),	/* shift_reg.  */
1284    COSTS_N_INSNS (1),	/* arith_shift.  */
1285    COSTS_N_INSNS (1),	/* arith_shift_reg.  */
1286    COSTS_N_INSNS (1),	/* log_shift.  */
1287    COSTS_N_INSNS (1),	/* log_shift_reg.  */
1288    COSTS_N_INSNS (1),	/* extend.  */
1289    COSTS_N_INSNS (1),	/* extend_arith.  */
1290    COSTS_N_INSNS (1),	/* bfi.  */
1291    COSTS_N_INSNS (1),	/* bfx.  */
1292    COSTS_N_INSNS (1),	/* clz.  */
1293    COSTS_N_INSNS (1),	/* rev.  */
1294    0,			/* non_exec.  */
1295    true		/* non_exec_costs_exec.  */
1296  },
1297
1298  {
1299    /* MULT SImode */
1300    {
1301      0,			/* simple.  */
1302      COSTS_N_INSNS (1),	/* flag_setting.  */
1303      COSTS_N_INSNS (1),	/* extend.  */
1304      COSTS_N_INSNS (1),	/* add.  */
1305      COSTS_N_INSNS (1),	/* extend_add.  */
1306      COSTS_N_INSNS (7)		/* idiv.  */
1307    },
1308    /* MULT DImode */
1309    {
1310      0,			/* simple (N/A).  */
1311      0,			/* flag_setting (N/A).  */
1312      COSTS_N_INSNS (1),	/* extend.  */
1313      0,			/* add.  */
1314      COSTS_N_INSNS (2),	/* extend_add.  */
1315      0				/* idiv (N/A).  */
1316    }
1317  },
1318  /* LD/ST */
1319  {
1320    COSTS_N_INSNS (1),	/* load.  */
1321    COSTS_N_INSNS (1),	/* load_sign_extend.  */
1322    COSTS_N_INSNS (3),	/* ldrd.  */
1323    COSTS_N_INSNS (1),	/* ldm_1st.  */
1324    1,			/* ldm_regs_per_insn_1st.  */
1325    2,			/* ldm_regs_per_insn_subsequent.  */
1326    COSTS_N_INSNS (2),	/* loadf.  */
1327    COSTS_N_INSNS (2),	/* loadd.  */
1328    COSTS_N_INSNS (1),	/* load_unaligned.  */
1329    COSTS_N_INSNS (1),	/* store.  */
1330    COSTS_N_INSNS (3),	/* strd.  */
1331    COSTS_N_INSNS (1),	/* stm_1st.  */
1332    1,			/* stm_regs_per_insn_1st.  */
1333    2,			/* stm_regs_per_insn_subsequent.  */
1334    COSTS_N_INSNS (2),	/* storef.  */
1335    COSTS_N_INSNS (2),	/* stored.  */
1336    COSTS_N_INSNS (1)	/* store_unaligned.  */
1337  },
1338  {
1339    /* FP SFmode */
1340    {
1341      COSTS_N_INSNS (15),	/* div.  */
1342      COSTS_N_INSNS (3),	/* mult.  */
1343      COSTS_N_INSNS (7),	/* mult_addsub. */
1344      COSTS_N_INSNS (7),	/* fma.  */
1345      COSTS_N_INSNS (3),	/* addsub.  */
1346      COSTS_N_INSNS (3),	/* fpconst.  */
1347      COSTS_N_INSNS (3),	/* neg.  */
1348      COSTS_N_INSNS (3),	/* compare.  */
1349      COSTS_N_INSNS (3),	/* widen.  */
1350      COSTS_N_INSNS (3),	/* narrow.  */
1351      COSTS_N_INSNS (3),	/* toint.  */
1352      COSTS_N_INSNS (3),	/* fromint.  */
1353      COSTS_N_INSNS (3)		/* roundint.  */
1354    },
1355    /* FP DFmode */
1356    {
1357      COSTS_N_INSNS (30),	/* div.  */
1358      COSTS_N_INSNS (6),	/* mult.  */
1359      COSTS_N_INSNS (10),	/* mult_addsub.  */
1360      COSTS_N_INSNS (7),	/* fma.  */
1361      COSTS_N_INSNS (3),	/* addsub.  */
1362      COSTS_N_INSNS (3),	/* fpconst.  */
1363      COSTS_N_INSNS (3),	/* neg.  */
1364      COSTS_N_INSNS (3),	/* compare.  */
1365      COSTS_N_INSNS (3),	/* widen.  */
1366      COSTS_N_INSNS (3),	/* narrow.  */
1367      COSTS_N_INSNS (3),	/* toint.  */
1368      COSTS_N_INSNS (3),	/* fromint.  */
1369      COSTS_N_INSNS (3)		/* roundint.  */
1370    }
1371  },
1372  /* Vector */
1373  {
1374    COSTS_N_INSNS (1)	/* alu.  */
1375  }
1376};
1377
1378const struct cpu_cost_table cortexa12_extra_costs =
1379{
1380  /* ALU */
1381  {
1382    0,			/* arith.  */
1383    0,			/* logical.  */
1384    0,			/* shift.  */
1385    COSTS_N_INSNS (1),	/* shift_reg.  */
1386    COSTS_N_INSNS (1),	/* arith_shift.  */
1387    COSTS_N_INSNS (1),	/* arith_shift_reg.  */
1388    COSTS_N_INSNS (1),	/* log_shift.  */
1389    COSTS_N_INSNS (1),	/* log_shift_reg.  */
1390    0,			/* extend.  */
1391    COSTS_N_INSNS (1),	/* extend_arith.  */
1392    0,			/* bfi.  */
1393    COSTS_N_INSNS (1),	/* bfx.  */
1394    COSTS_N_INSNS (1),	/* clz.  */
1395    COSTS_N_INSNS (1),	/* rev.  */
1396    0,			/* non_exec.  */
1397    true		/* non_exec_costs_exec.  */
1398  },
1399  /* MULT SImode */
1400  {
1401    {
1402      COSTS_N_INSNS (2),	/* simple.  */
1403      COSTS_N_INSNS (3),	/* flag_setting.  */
1404      COSTS_N_INSNS (2),	/* extend.  */
1405      COSTS_N_INSNS (3),	/* add.  */
1406      COSTS_N_INSNS (2),	/* extend_add.  */
1407      COSTS_N_INSNS (18)	/* idiv.  */
1408    },
1409    /* MULT DImode */
1410    {
1411      0,			/* simple (N/A).  */
1412      0,			/* flag_setting (N/A).  */
1413      COSTS_N_INSNS (3),	/* extend.  */
1414      0,			/* add (N/A).  */
1415      COSTS_N_INSNS (3),	/* extend_add.  */
1416      0				/* idiv (N/A).  */
1417    }
1418  },
1419  /* LD/ST */
1420  {
1421    COSTS_N_INSNS (3),	/* load.  */
1422    COSTS_N_INSNS (3),	/* load_sign_extend.  */
1423    COSTS_N_INSNS (3),	/* ldrd.  */
1424    COSTS_N_INSNS (3),	/* ldm_1st.  */
1425    1,			/* ldm_regs_per_insn_1st.  */
1426    2,			/* ldm_regs_per_insn_subsequent.  */
1427    COSTS_N_INSNS (3),	/* loadf.  */
1428    COSTS_N_INSNS (3),	/* loadd.  */
1429    0,			/* load_unaligned.  */
1430    0,			/* store.  */
1431    0,			/* strd.  */
1432    0,			/* stm_1st.  */
1433    1,			/* stm_regs_per_insn_1st.  */
1434    2,			/* stm_regs_per_insn_subsequent.  */
1435    COSTS_N_INSNS (2),	/* storef.  */
1436    COSTS_N_INSNS (2),	/* stored.  */
1437    0			/* store_unaligned.  */
1438  },
1439  {
1440    /* FP SFmode */
1441    {
1442      COSTS_N_INSNS (17),	/* div.  */
1443      COSTS_N_INSNS (4),	/* mult.  */
1444      COSTS_N_INSNS (8),	/* mult_addsub. */
1445      COSTS_N_INSNS (8),	/* fma.  */
1446      COSTS_N_INSNS (4),	/* addsub.  */
1447      COSTS_N_INSNS (2),	/* fpconst. */
1448      COSTS_N_INSNS (2),	/* neg.  */
1449      COSTS_N_INSNS (2),	/* compare.  */
1450      COSTS_N_INSNS (4),	/* widen.  */
1451      COSTS_N_INSNS (4),	/* narrow.  */
1452      COSTS_N_INSNS (4),	/* toint.  */
1453      COSTS_N_INSNS (4),	/* fromint.  */
1454      COSTS_N_INSNS (4)		/* roundint.  */
1455    },
1456    /* FP DFmode */
1457    {
1458      COSTS_N_INSNS (31),	/* div.  */
1459      COSTS_N_INSNS (4),	/* mult.  */
1460      COSTS_N_INSNS (8),	/* mult_addsub.  */
1461      COSTS_N_INSNS (8),	/* fma.  */
1462      COSTS_N_INSNS (4),	/* addsub.  */
1463      COSTS_N_INSNS (2),	/* fpconst.  */
1464      COSTS_N_INSNS (2),	/* neg.  */
1465      COSTS_N_INSNS (2),	/* compare.  */
1466      COSTS_N_INSNS (4),	/* widen.  */
1467      COSTS_N_INSNS (4),	/* narrow.  */
1468      COSTS_N_INSNS (4),	/* toint.  */
1469      COSTS_N_INSNS (4),	/* fromint.  */
1470      COSTS_N_INSNS (4)		/* roundint.  */
1471    }
1472  },
1473  /* Vector */
1474  {
1475    COSTS_N_INSNS (1)	/* alu.  */
1476  }
1477};
1478
1479const struct cpu_cost_table cortexa15_extra_costs =
1480{
1481  /* ALU */
1482  {
1483    0,			/* arith.  */
1484    0,			/* logical.  */
1485    0,			/* shift.  */
1486    0,			/* shift_reg.  */
1487    COSTS_N_INSNS (1),	/* arith_shift.  */
1488    COSTS_N_INSNS (1),	/* arith_shift_reg.  */
1489    COSTS_N_INSNS (1),	/* log_shift.  */
1490    COSTS_N_INSNS (1),	/* log_shift_reg.  */
1491    0,			/* extend.  */
1492    COSTS_N_INSNS (1),	/* extend_arith.  */
1493    COSTS_N_INSNS (1),	/* bfi.  */
1494    0,			/* bfx.  */
1495    0,			/* clz.  */
1496    0,			/* rev.  */
1497    0,			/* non_exec.  */
1498    true		/* non_exec_costs_exec.  */
1499  },
1500  /* MULT SImode */
1501  {
1502    {
1503      COSTS_N_INSNS (2),	/* simple.  */
1504      COSTS_N_INSNS (3),	/* flag_setting.  */
1505      COSTS_N_INSNS (2),	/* extend.  */
1506      COSTS_N_INSNS (2),	/* add.  */
1507      COSTS_N_INSNS (2),	/* extend_add.  */
1508      COSTS_N_INSNS (18)	/* idiv.  */
1509    },
1510    /* MULT DImode */
1511    {
1512      0,			/* simple (N/A).  */
1513      0,			/* flag_setting (N/A).  */
1514      COSTS_N_INSNS (3),	/* extend.  */
1515      0,			/* add (N/A).  */
1516      COSTS_N_INSNS (3),	/* extend_add.  */
1517      0				/* idiv (N/A).  */
1518    }
1519  },
1520  /* LD/ST */
1521  {
1522    COSTS_N_INSNS (3),	/* load.  */
1523    COSTS_N_INSNS (3),	/* load_sign_extend.  */
1524    COSTS_N_INSNS (3),	/* ldrd.  */
1525    COSTS_N_INSNS (4),	/* ldm_1st.  */
1526    1,			/* ldm_regs_per_insn_1st.  */
1527    2,			/* ldm_regs_per_insn_subsequent.  */
1528    COSTS_N_INSNS (4),	/* loadf.  */
1529    COSTS_N_INSNS (4),	/* loadd.  */
1530    0,			/* load_unaligned.  */
1531    0,			/* store.  */
1532    0,			/* strd.  */
1533    COSTS_N_INSNS (1),	/* stm_1st.  */
1534    1,			/* stm_regs_per_insn_1st.  */
1535    2,			/* stm_regs_per_insn_subsequent.  */
1536    0,			/* storef.  */
1537    0,			/* stored.  */
1538    0			/* store_unaligned.  */
1539  },
1540  {
1541    /* FP SFmode */
1542    {
1543      COSTS_N_INSNS (17),	/* div.  */
1544      COSTS_N_INSNS (4),	/* mult.  */
1545      COSTS_N_INSNS (8),	/* mult_addsub. */
1546      COSTS_N_INSNS (8),	/* fma.  */
1547      COSTS_N_INSNS (4),	/* addsub.  */
1548      COSTS_N_INSNS (2),	/* fpconst. */
1549      COSTS_N_INSNS (2),	/* neg.  */
1550      COSTS_N_INSNS (5),	/* compare.  */
1551      COSTS_N_INSNS (4),	/* widen.  */
1552      COSTS_N_INSNS (4),	/* narrow.  */
1553      COSTS_N_INSNS (4),	/* toint.  */
1554      COSTS_N_INSNS (4),	/* fromint.  */
1555      COSTS_N_INSNS (4)		/* roundint.  */
1556    },
1557    /* FP DFmode */
1558    {
1559      COSTS_N_INSNS (31),	/* div.  */
1560      COSTS_N_INSNS (4),	/* mult.  */
1561      COSTS_N_INSNS (8),	/* mult_addsub.  */
1562      COSTS_N_INSNS (8),	/* fma.  */
1563      COSTS_N_INSNS (4),	/* addsub.  */
1564      COSTS_N_INSNS (2),	/* fpconst.  */
1565      COSTS_N_INSNS (2),	/* neg.  */
1566      COSTS_N_INSNS (2),	/* compare.  */
1567      COSTS_N_INSNS (4),	/* widen.  */
1568      COSTS_N_INSNS (4),	/* narrow.  */
1569      COSTS_N_INSNS (4),	/* toint.  */
1570      COSTS_N_INSNS (4),	/* fromint.  */
1571      COSTS_N_INSNS (4)		/* roundint.  */
1572    }
1573  },
1574  /* Vector */
1575  {
1576    COSTS_N_INSNS (1)	/* alu.  */
1577  }
1578};
1579
1580const struct cpu_cost_table v7m_extra_costs =
1581{
1582  /* ALU */
1583  {
1584    0,			/* arith.  */
1585    0,			/* logical.  */
1586    0,			/* shift.  */
1587    0,			/* shift_reg.  */
1588    0,			/* arith_shift.  */
1589    COSTS_N_INSNS (1),	/* arith_shift_reg.  */
1590    0,			/* log_shift.  */
1591    COSTS_N_INSNS (1),	/* log_shift_reg.  */
1592    0,			/* extend.  */
1593    COSTS_N_INSNS (1),	/* extend_arith.  */
1594    0,			/* bfi.  */
1595    0,			/* bfx.  */
1596    0,			/* clz.  */
1597    0,			/* rev.  */
1598    COSTS_N_INSNS (1),	/* non_exec.  */
1599    false		/* non_exec_costs_exec.  */
1600  },
1601  {
1602    /* MULT SImode */
1603    {
1604      COSTS_N_INSNS (1),	/* simple.  */
1605      COSTS_N_INSNS (1),	/* flag_setting.  */
1606      COSTS_N_INSNS (2),	/* extend.  */
1607      COSTS_N_INSNS (1),	/* add.  */
1608      COSTS_N_INSNS (3),	/* extend_add.  */
1609      COSTS_N_INSNS (8)		/* idiv.  */
1610    },
1611    /* MULT DImode */
1612    {
1613      0,			/* simple (N/A).  */
1614      0,			/* flag_setting (N/A).  */
1615      COSTS_N_INSNS (2),	/* extend.  */
1616      0,			/* add (N/A).  */
1617      COSTS_N_INSNS (3),	/* extend_add.  */
1618      0				/* idiv (N/A).  */
1619    }
1620  },
1621  /* LD/ST */
1622  {
1623    COSTS_N_INSNS (2),	/* load.  */
1624    0,			/* load_sign_extend.  */
1625    COSTS_N_INSNS (3),	/* ldrd.  */
1626    COSTS_N_INSNS (2),	/* ldm_1st.  */
1627    1,			/* ldm_regs_per_insn_1st.  */
1628    1,			/* ldm_regs_per_insn_subsequent.  */
1629    COSTS_N_INSNS (2),	/* loadf.  */
1630    COSTS_N_INSNS (3),	/* loadd.  */
1631    COSTS_N_INSNS (1),  /* load_unaligned.  */
1632    COSTS_N_INSNS (2),	/* store.  */
1633    COSTS_N_INSNS (3),	/* strd.  */
1634    COSTS_N_INSNS (2),	/* stm_1st.  */
1635    1,			/* stm_regs_per_insn_1st.  */
1636    1,			/* stm_regs_per_insn_subsequent.  */
1637    COSTS_N_INSNS (2),	/* storef.  */
1638    COSTS_N_INSNS (3),	/* stored.  */
1639    COSTS_N_INSNS (1)  /* store_unaligned.  */
1640  },
1641  {
1642    /* FP SFmode */
1643    {
1644      COSTS_N_INSNS (7),	/* div.  */
1645      COSTS_N_INSNS (2),	/* mult.  */
1646      COSTS_N_INSNS (5),	/* mult_addsub.  */
1647      COSTS_N_INSNS (3),	/* fma.  */
1648      COSTS_N_INSNS (1),	/* addsub.  */
1649      0,			/* fpconst.  */
1650      0,			/* neg.  */
1651      0,			/* compare.  */
1652      0,			/* widen.  */
1653      0,			/* narrow.  */
1654      0,			/* toint.  */
1655      0,			/* fromint.  */
1656      0				/* roundint.  */
1657    },
1658    /* FP DFmode */
1659    {
1660      COSTS_N_INSNS (15),	/* div.  */
1661      COSTS_N_INSNS (5),	/* mult.  */
1662      COSTS_N_INSNS (7),	/* mult_addsub.  */
1663      COSTS_N_INSNS (7),	/* fma.  */
1664      COSTS_N_INSNS (3),	/* addsub.  */
1665      0,			/* fpconst.  */
1666      0,			/* neg.  */
1667      0,			/* compare.  */
1668      0,			/* widen.  */
1669      0,			/* narrow.  */
1670      0,			/* toint.  */
1671      0,			/* fromint.  */
1672      0				/* roundint.  */
1673    }
1674  },
1675  /* Vector */
1676  {
1677    COSTS_N_INSNS (1)	/* alu.  */
1678  }
1679};
1680
1681#define ARM_FUSE_NOTHING	(0)
1682#define ARM_FUSE_MOVW_MOVT	(1 << 0)
1683
1684const struct tune_params arm_slowmul_tune =
1685{
1686  arm_slowmul_rtx_costs,
1687  NULL,
1688  NULL,						/* Sched adj cost.  */
1689  3,						/* Constant limit.  */
1690  5,						/* Max cond insns.  */
1691  ARM_PREFETCH_NOT_BENEFICIAL,
1692  true,						/* Prefer constant pool.  */
1693  arm_default_branch_cost,
1694  false,					/* Prefer LDRD/STRD.  */
1695  {true, true},					/* Prefer non short circuit.  */
1696  &arm_default_vec_cost,                        /* Vectorizer costs.  */
1697  false,                                        /* Prefer Neon for 64-bits bitops.  */
1698  false, false,                                 /* Prefer 32-bit encodings.  */
1699  false,					/* Prefer Neon for stringops.  */
1700  8,						/* Maximum insns to inline memset.  */
1701  ARM_FUSE_NOTHING,				/* Fuseable pairs of instructions.  */
1702  ARM_SCHED_AUTOPREF_OFF			/* Sched L2 autopref.  */
1703};
1704
1705const struct tune_params arm_fastmul_tune =
1706{
1707  arm_fastmul_rtx_costs,
1708  NULL,
1709  NULL,						/* Sched adj cost.  */
1710  1,						/* Constant limit.  */
1711  5,						/* Max cond insns.  */
1712  ARM_PREFETCH_NOT_BENEFICIAL,
1713  true,						/* Prefer constant pool.  */
1714  arm_default_branch_cost,
1715  false,					/* Prefer LDRD/STRD.  */
1716  {true, true},					/* Prefer non short circuit.  */
1717  &arm_default_vec_cost,                        /* Vectorizer costs.  */
1718  false,                                        /* Prefer Neon for 64-bits bitops.  */
1719  false, false,                                 /* Prefer 32-bit encodings.  */
1720  false,					/* Prefer Neon for stringops.  */
1721  8,						/* Maximum insns to inline memset.  */
1722  ARM_FUSE_NOTHING,				/* Fuseable pairs of instructions.  */
1723  ARM_SCHED_AUTOPREF_OFF			/* Sched L2 autopref.  */
1724};
1725
1726/* StrongARM has early execution of branches, so a sequence that is worth
1727   skipping is shorter.  Set max_insns_skipped to a lower value.  */
1728
1729const struct tune_params arm_strongarm_tune =
1730{
1731  arm_fastmul_rtx_costs,
1732  NULL,
1733  NULL,						/* Sched adj cost.  */
1734  1,						/* Constant limit.  */
1735  3,						/* Max cond insns.  */
1736  ARM_PREFETCH_NOT_BENEFICIAL,
1737  true,						/* Prefer constant pool.  */
1738  arm_default_branch_cost,
1739  false,					/* Prefer LDRD/STRD.  */
1740  {true, true},					/* Prefer non short circuit.  */
1741  &arm_default_vec_cost,                        /* Vectorizer costs.  */
1742  false,                                        /* Prefer Neon for 64-bits bitops.  */
1743  false, false,                                 /* Prefer 32-bit encodings.  */
1744  false,					/* Prefer Neon for stringops.  */
1745  8,						/* Maximum insns to inline memset.  */
1746  ARM_FUSE_NOTHING,				/* Fuseable pairs of instructions.  */
1747  ARM_SCHED_AUTOPREF_OFF			/* Sched L2 autopref.  */
1748};
1749
1750const struct tune_params arm_xscale_tune =
1751{
1752  arm_xscale_rtx_costs,
1753  NULL,
1754  xscale_sched_adjust_cost,
1755  2,						/* Constant limit.  */
1756  3,						/* Max cond insns.  */
1757  ARM_PREFETCH_NOT_BENEFICIAL,
1758  true,						/* Prefer constant pool.  */
1759  arm_default_branch_cost,
1760  false,					/* Prefer LDRD/STRD.  */
1761  {true, true},					/* Prefer non short circuit.  */
1762  &arm_default_vec_cost,                        /* Vectorizer costs.  */
1763  false,                                        /* Prefer Neon for 64-bits bitops.  */
1764  false, false,                                 /* Prefer 32-bit encodings.  */
1765  false,					/* Prefer Neon for stringops.  */
1766  8,						/* Maximum insns to inline memset.  */
1767  ARM_FUSE_NOTHING,				/* Fuseable pairs of instructions.  */
1768  ARM_SCHED_AUTOPREF_OFF			/* Sched L2 autopref.  */
1769};
1770
1771const struct tune_params arm_9e_tune =
1772{
1773  arm_9e_rtx_costs,
1774  NULL,
1775  NULL,						/* Sched adj cost.  */
1776  1,						/* Constant limit.  */
1777  5,						/* Max cond insns.  */
1778  ARM_PREFETCH_NOT_BENEFICIAL,
1779  true,						/* Prefer constant pool.  */
1780  arm_default_branch_cost,
1781  false,					/* Prefer LDRD/STRD.  */
1782  {true, true},					/* Prefer non short circuit.  */
1783  &arm_default_vec_cost,                        /* Vectorizer costs.  */
1784  false,                                        /* Prefer Neon for 64-bits bitops.  */
1785  false, false,                                 /* Prefer 32-bit encodings.  */
1786  false,					/* Prefer Neon for stringops.  */
1787  8,						/* Maximum insns to inline memset.  */
1788  ARM_FUSE_NOTHING,				/* Fuseable pairs of instructions.  */
1789  ARM_SCHED_AUTOPREF_OFF			/* Sched L2 autopref.  */
1790};
1791
1792const struct tune_params arm_v6t2_tune =
1793{
1794  arm_9e_rtx_costs,
1795  NULL,
1796  NULL,						/* Sched adj cost.  */
1797  1,						/* Constant limit.  */
1798  5,						/* Max cond insns.  */
1799  ARM_PREFETCH_NOT_BENEFICIAL,
1800  false,					/* Prefer constant pool.  */
1801  arm_default_branch_cost,
1802  false,					/* Prefer LDRD/STRD.  */
1803  {true, true},					/* Prefer non short circuit.  */
1804  &arm_default_vec_cost,                        /* Vectorizer costs.  */
1805  false,                                        /* Prefer Neon for 64-bits bitops.  */
1806  false, false,                                 /* Prefer 32-bit encodings.  */
1807  false,					/* Prefer Neon for stringops.  */
1808  8,						/* Maximum insns to inline memset.  */
1809  ARM_FUSE_NOTHING,				/* Fuseable pairs of instructions.  */
1810  ARM_SCHED_AUTOPREF_OFF			/* Sched L2 autopref.  */
1811};
1812
1813/* Generic Cortex tuning.  Use more specific tunings if appropriate.  */
1814const struct tune_params arm_cortex_tune =
1815{
1816  arm_9e_rtx_costs,
1817  &generic_extra_costs,
1818  NULL,						/* Sched adj cost.  */
1819  1,						/* Constant limit.  */
1820  5,						/* Max cond insns.  */
1821  ARM_PREFETCH_NOT_BENEFICIAL,
1822  false,					/* Prefer constant pool.  */
1823  arm_default_branch_cost,
1824  false,					/* Prefer LDRD/STRD.  */
1825  {true, true},					/* Prefer non short circuit.  */
1826  &arm_default_vec_cost,                        /* Vectorizer costs.  */
1827  false,                                        /* Prefer Neon for 64-bits bitops.  */
1828  false, false,                                 /* Prefer 32-bit encodings.  */
1829  false,					/* Prefer Neon for stringops.  */
1830  8,						/* Maximum insns to inline memset.  */
1831  ARM_FUSE_NOTHING,				/* Fuseable pairs of instructions.  */
1832  ARM_SCHED_AUTOPREF_OFF			/* Sched L2 autopref.  */
1833};
1834
1835const struct tune_params arm_cortex_a8_tune =
1836{
1837  arm_9e_rtx_costs,
1838  &cortexa8_extra_costs,
1839  NULL,						/* Sched adj cost.  */
1840  1,						/* Constant limit.  */
1841  5,						/* Max cond insns.  */
1842  ARM_PREFETCH_NOT_BENEFICIAL,
1843  false,					/* Prefer constant pool.  */
1844  arm_default_branch_cost,
1845  false,					/* Prefer LDRD/STRD.  */
1846  {true, true},					/* Prefer non short circuit.  */
1847  &arm_default_vec_cost,                        /* Vectorizer costs.  */
1848  false,                                        /* Prefer Neon for 64-bits bitops.  */
1849  false, false,                                 /* Prefer 32-bit encodings.  */
1850  true,						/* Prefer Neon for stringops.  */
1851  8,						/* Maximum insns to inline memset.  */
1852  ARM_FUSE_NOTHING,				/* Fuseable pairs of instructions.  */
1853  ARM_SCHED_AUTOPREF_OFF			/* Sched L2 autopref.  */
1854};
1855
1856const struct tune_params arm_cortex_a7_tune =
1857{
1858  arm_9e_rtx_costs,
1859  &cortexa7_extra_costs,
1860  NULL,
1861  1,						/* Constant limit.  */
1862  5,						/* Max cond insns.  */
1863  ARM_PREFETCH_NOT_BENEFICIAL,
1864  false,					/* Prefer constant pool.  */
1865  arm_default_branch_cost,
1866  false,					/* Prefer LDRD/STRD.  */
1867  {true, true},					/* Prefer non short circuit.  */
1868  &arm_default_vec_cost,			/* Vectorizer costs.  */
1869  false,					/* Prefer Neon for 64-bits bitops.  */
1870  false, false,                                 /* Prefer 32-bit encodings.  */
1871  true,						/* Prefer Neon for stringops.  */
1872  8,						/* Maximum insns to inline memset.  */
1873  ARM_FUSE_NOTHING,				/* Fuseable pairs of instructions.  */
1874  ARM_SCHED_AUTOPREF_OFF			/* Sched L2 autopref.  */
1875};
1876
1877const struct tune_params arm_cortex_a15_tune =
1878{
1879  arm_9e_rtx_costs,
1880  &cortexa15_extra_costs,
1881  NULL,						/* Sched adj cost.  */
1882  1,						/* Constant limit.  */
1883  2,						/* Max cond insns.  */
1884  ARM_PREFETCH_NOT_BENEFICIAL,
1885  false,					/* Prefer constant pool.  */
1886  arm_default_branch_cost,
1887  true,						/* Prefer LDRD/STRD.  */
1888  {true, true},					/* Prefer non short circuit.  */
1889  &arm_default_vec_cost,                        /* Vectorizer costs.  */
1890  false,                                        /* Prefer Neon for 64-bits bitops.  */
1891  true, true,                                   /* Prefer 32-bit encodings.  */
1892  true,						/* Prefer Neon for stringops.  */
1893  8,						/* Maximum insns to inline memset.  */
1894  ARM_FUSE_NOTHING,				/* Fuseable pairs of instructions.  */
1895  ARM_SCHED_AUTOPREF_FULL			/* Sched L2 autopref.  */
1896};
1897
1898const struct tune_params arm_cortex_a53_tune =
1899{
1900  arm_9e_rtx_costs,
1901  &cortexa53_extra_costs,
1902  NULL,						/* Scheduler cost adjustment.  */
1903  1,						/* Constant limit.  */
1904  5,						/* Max cond insns.  */
1905  ARM_PREFETCH_NOT_BENEFICIAL,
1906  false,					/* Prefer constant pool.  */
1907  arm_default_branch_cost,
1908  false,					/* Prefer LDRD/STRD.  */
1909  {true, true},					/* Prefer non short circuit.  */
1910  &arm_default_vec_cost,			/* Vectorizer costs.  */
1911  false,					/* Prefer Neon for 64-bits bitops.  */
1912  false, false,                                 /* Prefer 32-bit encodings.  */
1913  true,						/* Prefer Neon for stringops.  */
1914  8,						/* Maximum insns to inline memset.  */
1915  ARM_FUSE_MOVW_MOVT,				/* Fuseable pairs of instructions.  */
1916  ARM_SCHED_AUTOPREF_OFF			/* Sched L2 autopref.  */
1917};
1918
1919const struct tune_params arm_cortex_a57_tune =
1920{
1921  arm_9e_rtx_costs,
1922  &cortexa57_extra_costs,
1923  NULL,                                         /* Scheduler cost adjustment.  */
1924  1,                                           /* Constant limit.  */
1925  2,                                           /* Max cond insns.  */
1926  ARM_PREFETCH_NOT_BENEFICIAL,
1927  false,                                       /* Prefer constant pool.  */
1928  arm_default_branch_cost,
1929  true,                                       /* Prefer LDRD/STRD.  */
1930  {true, true},                                /* Prefer non short circuit.  */
1931  &arm_default_vec_cost,                       /* Vectorizer costs.  */
1932  false,                                       /* Prefer Neon for 64-bits bitops.  */
1933  true, true,                                  /* Prefer 32-bit encodings.  */
1934  true,						/* Prefer Neon for stringops.  */
1935  8,						/* Maximum insns to inline memset.  */
1936  ARM_FUSE_MOVW_MOVT,				/* Fuseable pairs of instructions.  */
1937  ARM_SCHED_AUTOPREF_FULL			/* Sched L2 autopref.  */
1938};
1939
1940const struct tune_params arm_xgene1_tune =
1941{
1942  arm_9e_rtx_costs,
1943  &xgene1_extra_costs,
1944  NULL,                                        /* Scheduler cost adjustment.  */
1945  1,                                           /* Constant limit.  */
1946  2,                                           /* Max cond insns.  */
1947  ARM_PREFETCH_NOT_BENEFICIAL,
1948  false,                                       /* Prefer constant pool.  */
1949  arm_default_branch_cost,
1950  true,                                        /* Prefer LDRD/STRD.  */
1951  {true, true},                                /* Prefer non short circuit.  */
1952  &arm_default_vec_cost,                       /* Vectorizer costs.  */
1953  false,                                       /* Prefer Neon for 64-bits bitops.  */
1954  true, true,                                  /* Prefer 32-bit encodings.  */
1955  false,				       /* Prefer Neon for stringops.  */
1956  32,					       /* Maximum insns to inline memset.  */
1957  ARM_FUSE_NOTHING,				/* Fuseable pairs of instructions.  */
1958  ARM_SCHED_AUTOPREF_OFF			/* Sched L2 autopref.  */
1959};
1960
1961/* Branches can be dual-issued on Cortex-A5, so conditional execution is
1962   less appealing.  Set max_insns_skipped to a low value.  */
1963
1964const struct tune_params arm_cortex_a5_tune =
1965{
1966  arm_9e_rtx_costs,
1967  &cortexa5_extra_costs,
1968  NULL,						/* Sched adj cost.  */
1969  1,						/* Constant limit.  */
1970  1,						/* Max cond insns.  */
1971  ARM_PREFETCH_NOT_BENEFICIAL,
1972  false,					/* Prefer constant pool.  */
1973  arm_cortex_a5_branch_cost,
1974  false,					/* Prefer LDRD/STRD.  */
1975  {false, false},				/* Prefer non short circuit.  */
1976  &arm_default_vec_cost,                        /* Vectorizer costs.  */
1977  false,                                        /* Prefer Neon for 64-bits bitops.  */
1978  false, false,                                 /* Prefer 32-bit encodings.  */
1979  true,						/* Prefer Neon for stringops.  */
1980  8,						/* Maximum insns to inline memset.  */
1981  ARM_FUSE_NOTHING,				/* Fuseable pairs of instructions.  */
1982  ARM_SCHED_AUTOPREF_OFF			/* Sched L2 autopref.  */
1983};
1984
1985const struct tune_params arm_cortex_a9_tune =
1986{
1987  arm_9e_rtx_costs,
1988  &cortexa9_extra_costs,
1989  cortex_a9_sched_adjust_cost,
1990  1,						/* Constant limit.  */
1991  5,						/* Max cond insns.  */
1992  ARM_PREFETCH_BENEFICIAL(4,32,32),
1993  false,					/* Prefer constant pool.  */
1994  arm_default_branch_cost,
1995  false,					/* Prefer LDRD/STRD.  */
1996  {true, true},					/* Prefer non short circuit.  */
1997  &arm_default_vec_cost,                        /* Vectorizer costs.  */
1998  false,                                        /* Prefer Neon for 64-bits bitops.  */
1999  false, false,                                 /* Prefer 32-bit encodings.  */
2000  false,					/* Prefer Neon for stringops.  */
2001  8,						/* Maximum insns to inline memset.  */
2002  ARM_FUSE_NOTHING,				/* Fuseable pairs of instructions.  */
2003  ARM_SCHED_AUTOPREF_OFF			/* Sched L2 autopref.  */
2004};
2005
2006const struct tune_params arm_cortex_a12_tune =
2007{
2008  arm_9e_rtx_costs,
2009  &cortexa12_extra_costs,
2010  NULL,						/* Sched adj cost.  */
2011  1,						/* Constant limit.  */
2012  2,						/* Max cond insns.  */
2013  ARM_PREFETCH_NOT_BENEFICIAL,
2014  false,					/* Prefer constant pool.  */
2015  arm_default_branch_cost,
2016  true,						/* Prefer LDRD/STRD.  */
2017  {true, true},					/* Prefer non short circuit.  */
2018  &arm_default_vec_cost,                        /* Vectorizer costs.  */
2019  false,                                        /* Prefer Neon for 64-bits bitops.  */
2020  true, true,                                   /* Prefer 32-bit encodings.  */
2021  true,						/* Prefer Neon for stringops.  */
2022  8,						/* Maximum insns to inline memset.  */
2023  ARM_FUSE_MOVW_MOVT,				/* Fuseable pairs of instructions.  */
2024  ARM_SCHED_AUTOPREF_OFF			/* Sched L2 autopref.  */
2025};
2026
2027/* armv7m tuning.  On Cortex-M4 cores for example, MOVW/MOVT take a single
2028   cycle to execute each.  An LDR from the constant pool also takes two cycles
2029   to execute, but mildly increases pipelining opportunity (consecutive
2030   loads/stores can be pipelined together, saving one cycle), and may also
2031   improve icache utilisation.  Hence we prefer the constant pool for such
2032   processors.  */
2033
2034const struct tune_params arm_v7m_tune =
2035{
2036  arm_9e_rtx_costs,
2037  &v7m_extra_costs,
2038  NULL,						/* Sched adj cost.  */
2039  1,						/* Constant limit.  */
2040  2,						/* Max cond insns.  */
2041  ARM_PREFETCH_NOT_BENEFICIAL,
2042  true,						/* Prefer constant pool.  */
2043  arm_cortex_m_branch_cost,
2044  false,					/* Prefer LDRD/STRD.  */
2045  {false, false},				/* Prefer non short circuit.  */
2046  &arm_default_vec_cost,                        /* Vectorizer costs.  */
2047  false,                                        /* Prefer Neon for 64-bits bitops.  */
2048  false, false,                                 /* Prefer 32-bit encodings.  */
2049  false,					/* Prefer Neon for stringops.  */
2050  8,						/* Maximum insns to inline memset.  */
2051  ARM_FUSE_NOTHING,				/* Fuseable pairs of instructions.  */
2052  ARM_SCHED_AUTOPREF_OFF			/* Sched L2 autopref.  */
2053};
2054
2055/* Cortex-M7 tuning.  */
2056
2057const struct tune_params arm_cortex_m7_tune =
2058{
2059  arm_9e_rtx_costs,
2060  &v7m_extra_costs,
2061  NULL,						/* Sched adj cost.  */
2062  0,						/* Constant limit.  */
2063  1,						/* Max cond insns.  */
2064  ARM_PREFETCH_NOT_BENEFICIAL,
2065  true,						/* Prefer constant pool.  */
2066  arm_cortex_m7_branch_cost,
2067  false,					/* Prefer LDRD/STRD.  */
2068  {true, true},					/* Prefer non short circuit.  */
2069  &arm_default_vec_cost,                        /* Vectorizer costs.  */
2070  false,                                        /* Prefer Neon for 64-bits bitops.  */
2071  false, false,                                 /* Prefer 32-bit encodings.  */
2072  false,					/* Prefer Neon for stringops.  */
2073  8,						/* Maximum insns to inline memset.  */
2074  ARM_FUSE_NOTHING,				/* Fuseable pairs of instructions.  */
2075  ARM_SCHED_AUTOPREF_OFF			/* Sched L2 autopref.  */
2076};
2077
2078/* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2079   arm_v6t2_tune. It is used for cortex-m0, cortex-m1 and cortex-m0plus.  */
2080const struct tune_params arm_v6m_tune =
2081{
2082  arm_9e_rtx_costs,
2083  NULL,
2084  NULL,						/* Sched adj cost.  */
2085  1,						/* Constant limit.  */
2086  5,						/* Max cond insns.  */
2087  ARM_PREFETCH_NOT_BENEFICIAL,
2088  false,					/* Prefer constant pool.  */
2089  arm_default_branch_cost,
2090  false,					/* Prefer LDRD/STRD.  */
2091  {false, false},				/* Prefer non short circuit.  */
2092  &arm_default_vec_cost,                        /* Vectorizer costs.  */
2093  false,                                        /* Prefer Neon for 64-bits bitops.  */
2094  false, false,                                 /* Prefer 32-bit encodings.  */
2095  false,					/* Prefer Neon for stringops.  */
2096  8,						/* Maximum insns to inline memset.  */
2097  ARM_FUSE_NOTHING,				/* Fuseable pairs of instructions.  */
2098  ARM_SCHED_AUTOPREF_OFF			/* Sched L2 autopref.  */
2099};
2100
2101const struct tune_params arm_fa726te_tune =
2102{
2103  arm_9e_rtx_costs,
2104  NULL,
2105  fa726te_sched_adjust_cost,
2106  1,						/* Constant limit.  */
2107  5,						/* Max cond insns.  */
2108  ARM_PREFETCH_NOT_BENEFICIAL,
2109  true,						/* Prefer constant pool.  */
2110  arm_default_branch_cost,
2111  false,					/* Prefer LDRD/STRD.  */
2112  {true, true},					/* Prefer non short circuit.  */
2113  &arm_default_vec_cost,                        /* Vectorizer costs.  */
2114  false,                                        /* Prefer Neon for 64-bits bitops.  */
2115  false, false,                                 /* Prefer 32-bit encodings.  */
2116  false,					/* Prefer Neon for stringops.  */
2117  8,						/* Maximum insns to inline memset.  */
2118  ARM_FUSE_NOTHING,				/* Fuseable pairs of instructions.  */
2119  ARM_SCHED_AUTOPREF_OFF			/* Sched L2 autopref.  */
2120};
2121
2122
2123/* Not all of these give usefully different compilation alternatives,
2124   but there is no simple way of generalizing them.  */
2125static const struct processors all_cores[] =
2126{
2127  /* ARM Cores */
2128#define ARM_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
2129  {NAME, IDENT, #ARCH, BASE_ARCH_##ARCH,	  \
2130    FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
2131#include "arm-cores.def"
2132#undef ARM_CORE
2133  {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
2134};
2135
2136static const struct processors all_architectures[] =
2137{
2138  /* ARM Architectures */
2139  /* We don't specify tuning costs here as it will be figured out
2140     from the core.  */
2141
2142#define ARM_ARCH(NAME, CORE, ARCH, FLAGS) \
2143  {NAME, CORE, #ARCH, BASE_ARCH_##ARCH, FLAGS, NULL},
2144#include "arm-arches.def"
2145#undef ARM_ARCH
2146  {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
2147};
2148
2149
2150/* These are populated as commandline arguments are processed, or NULL
2151   if not specified.  */
2152static const struct processors *arm_selected_arch;
2153static const struct processors *arm_selected_cpu;
2154static const struct processors *arm_selected_tune;
2155
2156/* The name of the preprocessor macro to define for this architecture.  */
2157
2158char arm_arch_name[] = "__ARM_ARCH_0UNK__";
2159
2160/* Available values for -mfpu=.  */
2161
2162static const struct arm_fpu_desc all_fpus[] =
2163{
2164#define ARM_FPU(NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO) \
2165  { NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO },
2166#include "arm-fpus.def"
2167#undef ARM_FPU
2168};
2169
2170
2171/* Supported TLS relocations.  */
2172
2173enum tls_reloc {
2174  TLS_GD32,
2175  TLS_LDM32,
2176  TLS_LDO32,
2177  TLS_IE32,
2178  TLS_LE32,
2179  TLS_DESCSEQ	/* GNU scheme */
2180};
2181
2182/* The maximum number of insns to be used when loading a constant.  */
2183inline static int
2184arm_constant_limit (bool size_p)
2185{
2186  return size_p ? 1 : current_tune->constant_limit;
2187}
2188
2189/* Emit an insn that's a simple single-set.  Both the operands must be known
2190   to be valid.  */
2191inline static rtx_insn *
2192emit_set_insn (rtx x, rtx y)
2193{
2194  return emit_insn (gen_rtx_SET (VOIDmode, x, y));
2195}
2196
2197/* Return the number of bits set in VALUE.  */
2198static unsigned
2199bit_count (unsigned long value)
2200{
2201  unsigned long count = 0;
2202
2203  while (value)
2204    {
2205      count++;
2206      value &= value - 1;  /* Clear the least-significant set bit.  */
2207    }
2208
2209  return count;
2210}
2211
2212typedef struct
2213{
2214  machine_mode mode;
2215  const char *name;
2216} arm_fixed_mode_set;
2217
2218/* A small helper for setting fixed-point library libfuncs.  */
2219
2220static void
2221arm_set_fixed_optab_libfunc (optab optable, machine_mode mode,
2222			     const char *funcname, const char *modename,
2223			     int num_suffix)
2224{
2225  char buffer[50];
2226
2227  if (num_suffix == 0)
2228    sprintf (buffer, "__gnu_%s%s", funcname, modename);
2229  else
2230    sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
2231
2232  set_optab_libfunc (optable, mode, buffer);
2233}
2234
2235static void
2236arm_set_fixed_conv_libfunc (convert_optab optable, machine_mode to,
2237			    machine_mode from, const char *funcname,
2238			    const char *toname, const char *fromname)
2239{
2240  char buffer[50];
2241  const char *maybe_suffix_2 = "";
2242
2243  /* Follow the logic for selecting a "2" suffix in fixed-bit.h.  */
2244  if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
2245      && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
2246      && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
2247    maybe_suffix_2 = "2";
2248
2249  sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
2250	   maybe_suffix_2);
2251
2252  set_conv_libfunc (optable, to, from, buffer);
2253}
2254
2255/* Set up library functions unique to ARM.  */
2256
2257static void
2258arm_init_libfuncs (void)
2259{
2260  /* For Linux, we have access to kernel support for atomic operations.  */
2261  if (arm_abi == ARM_ABI_AAPCS_LINUX)
2262    init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE);
2263
2264  /* There are no special library functions unless we are using the
2265     ARM BPABI.  */
2266  if (!TARGET_BPABI)
2267    return;
2268
2269  /* The functions below are described in Section 4 of the "Run-Time
2270     ABI for the ARM architecture", Version 1.0.  */
2271
2272  /* Double-precision floating-point arithmetic.  Table 2.  */
2273  set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
2274  set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
2275  set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
2276  set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
2277  set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
2278
2279  /* Double-precision comparisons.  Table 3.  */
2280  set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
2281  set_optab_libfunc (ne_optab, DFmode, NULL);
2282  set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
2283  set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
2284  set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
2285  set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
2286  set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
2287
2288  /* Single-precision floating-point arithmetic.  Table 4.  */
2289  set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
2290  set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
2291  set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
2292  set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
2293  set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
2294
2295  /* Single-precision comparisons.  Table 5.  */
2296  set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
2297  set_optab_libfunc (ne_optab, SFmode, NULL);
2298  set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
2299  set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
2300  set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
2301  set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
2302  set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
2303
2304  /* Floating-point to integer conversions.  Table 6.  */
2305  set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
2306  set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
2307  set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
2308  set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
2309  set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
2310  set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
2311  set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
2312  set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
2313
2314  /* Conversions between floating types.  Table 7.  */
2315  set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
2316  set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
2317
2318  /* Integer to floating-point conversions.  Table 8.  */
2319  set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
2320  set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
2321  set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
2322  set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
2323  set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
2324  set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
2325  set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
2326  set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
2327
2328  /* Long long.  Table 9.  */
2329  set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
2330  set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
2331  set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
2332  set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
2333  set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
2334  set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
2335  set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
2336  set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
2337
2338  /* Integer (32/32->32) division.  \S 4.3.1.  */
2339  set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
2340  set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
2341
2342  /* The divmod functions are designed so that they can be used for
2343     plain division, even though they return both the quotient and the
2344     remainder.  The quotient is returned in the usual location (i.e.,
2345     r0 for SImode, {r0, r1} for DImode), just as would be expected
2346     for an ordinary division routine.  Because the AAPCS calling
2347     conventions specify that all of { r0, r1, r2, r3 } are
2348     callee-saved registers, there is no need to tell the compiler
2349     explicitly that those registers are clobbered by these
2350     routines.  */
2351  set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
2352  set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
2353
2354  /* For SImode division the ABI provides div-without-mod routines,
2355     which are faster.  */
2356  set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
2357  set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
2358
2359  /* We don't have mod libcalls.  Fortunately gcc knows how to use the
2360     divmod libcalls instead.  */
2361  set_optab_libfunc (smod_optab, DImode, NULL);
2362  set_optab_libfunc (umod_optab, DImode, NULL);
2363  set_optab_libfunc (smod_optab, SImode, NULL);
2364  set_optab_libfunc (umod_optab, SImode, NULL);
2365
2366  /* Half-precision float operations.  The compiler handles all operations
2367     with NULL libfuncs by converting the SFmode.  */
2368  switch (arm_fp16_format)
2369    {
2370    case ARM_FP16_FORMAT_IEEE:
2371    case ARM_FP16_FORMAT_ALTERNATIVE:
2372
2373      /* Conversions.  */
2374      set_conv_libfunc (trunc_optab, HFmode, SFmode,
2375			(arm_fp16_format == ARM_FP16_FORMAT_IEEE
2376			 ? "__gnu_f2h_ieee"
2377			 : "__gnu_f2h_alternative"));
2378      set_conv_libfunc (sext_optab, SFmode, HFmode,
2379			(arm_fp16_format == ARM_FP16_FORMAT_IEEE
2380			 ? "__gnu_h2f_ieee"
2381			 : "__gnu_h2f_alternative"));
2382
2383      /* Arithmetic.  */
2384      set_optab_libfunc (add_optab, HFmode, NULL);
2385      set_optab_libfunc (sdiv_optab, HFmode, NULL);
2386      set_optab_libfunc (smul_optab, HFmode, NULL);
2387      set_optab_libfunc (neg_optab, HFmode, NULL);
2388      set_optab_libfunc (sub_optab, HFmode, NULL);
2389
2390      /* Comparisons.  */
2391      set_optab_libfunc (eq_optab, HFmode, NULL);
2392      set_optab_libfunc (ne_optab, HFmode, NULL);
2393      set_optab_libfunc (lt_optab, HFmode, NULL);
2394      set_optab_libfunc (le_optab, HFmode, NULL);
2395      set_optab_libfunc (ge_optab, HFmode, NULL);
2396      set_optab_libfunc (gt_optab, HFmode, NULL);
2397      set_optab_libfunc (unord_optab, HFmode, NULL);
2398      break;
2399
2400    default:
2401      break;
2402    }
2403
2404  /* Use names prefixed with __gnu_ for fixed-point helper functions.  */
2405  {
2406    const arm_fixed_mode_set fixed_arith_modes[] =
2407      {
2408	{ QQmode, "qq" },
2409	{ UQQmode, "uqq" },
2410	{ HQmode, "hq" },
2411	{ UHQmode, "uhq" },
2412	{ SQmode, "sq" },
2413	{ USQmode, "usq" },
2414	{ DQmode, "dq" },
2415	{ UDQmode, "udq" },
2416	{ TQmode, "tq" },
2417	{ UTQmode, "utq" },
2418	{ HAmode, "ha" },
2419	{ UHAmode, "uha" },
2420	{ SAmode, "sa" },
2421	{ USAmode, "usa" },
2422	{ DAmode, "da" },
2423	{ UDAmode, "uda" },
2424	{ TAmode, "ta" },
2425	{ UTAmode, "uta" }
2426      };
2427    const arm_fixed_mode_set fixed_conv_modes[] =
2428      {
2429	{ QQmode, "qq" },
2430	{ UQQmode, "uqq" },
2431	{ HQmode, "hq" },
2432	{ UHQmode, "uhq" },
2433	{ SQmode, "sq" },
2434	{ USQmode, "usq" },
2435	{ DQmode, "dq" },
2436	{ UDQmode, "udq" },
2437	{ TQmode, "tq" },
2438	{ UTQmode, "utq" },
2439	{ HAmode, "ha" },
2440	{ UHAmode, "uha" },
2441	{ SAmode, "sa" },
2442	{ USAmode, "usa" },
2443	{ DAmode, "da" },
2444	{ UDAmode, "uda" },
2445	{ TAmode, "ta" },
2446	{ UTAmode, "uta" },
2447	{ QImode, "qi" },
2448	{ HImode, "hi" },
2449	{ SImode, "si" },
2450	{ DImode, "di" },
2451	{ TImode, "ti" },
2452	{ SFmode, "sf" },
2453	{ DFmode, "df" }
2454      };
2455    unsigned int i, j;
2456
2457    for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2458      {
2459	arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2460				     "add", fixed_arith_modes[i].name, 3);
2461	arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2462				     "ssadd", fixed_arith_modes[i].name, 3);
2463	arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2464				     "usadd", fixed_arith_modes[i].name, 3);
2465	arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2466				     "sub", fixed_arith_modes[i].name, 3);
2467	arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2468				     "sssub", fixed_arith_modes[i].name, 3);
2469	arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2470				     "ussub", fixed_arith_modes[i].name, 3);
2471	arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2472				     "mul", fixed_arith_modes[i].name, 3);
2473	arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2474				     "ssmul", fixed_arith_modes[i].name, 3);
2475	arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2476				     "usmul", fixed_arith_modes[i].name, 3);
2477	arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2478				     "div", fixed_arith_modes[i].name, 3);
2479	arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2480				     "udiv", fixed_arith_modes[i].name, 3);
2481	arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2482				     "ssdiv", fixed_arith_modes[i].name, 3);
2483	arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2484				     "usdiv", fixed_arith_modes[i].name, 3);
2485	arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2486				     "neg", fixed_arith_modes[i].name, 2);
2487	arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2488				     "ssneg", fixed_arith_modes[i].name, 2);
2489	arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2490				     "usneg", fixed_arith_modes[i].name, 2);
2491	arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2492				     "ashl", fixed_arith_modes[i].name, 3);
2493	arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2494				     "ashr", fixed_arith_modes[i].name, 3);
2495	arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2496				     "lshr", fixed_arith_modes[i].name, 3);
2497	arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2498				     "ssashl", fixed_arith_modes[i].name, 3);
2499	arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2500				     "usashl", fixed_arith_modes[i].name, 3);
2501	arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2502				     "cmp", fixed_arith_modes[i].name, 2);
2503      }
2504
2505    for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2506      for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2507	{
2508	  if (i == j
2509	      || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2510		  && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2511	    continue;
2512
2513	  arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2514				      fixed_conv_modes[j].mode, "fract",
2515				      fixed_conv_modes[i].name,
2516				      fixed_conv_modes[j].name);
2517	  arm_set_fixed_conv_libfunc (satfract_optab,
2518				      fixed_conv_modes[i].mode,
2519				      fixed_conv_modes[j].mode, "satfract",
2520				      fixed_conv_modes[i].name,
2521				      fixed_conv_modes[j].name);
2522	  arm_set_fixed_conv_libfunc (fractuns_optab,
2523				      fixed_conv_modes[i].mode,
2524				      fixed_conv_modes[j].mode, "fractuns",
2525				      fixed_conv_modes[i].name,
2526				      fixed_conv_modes[j].name);
2527	  arm_set_fixed_conv_libfunc (satfractuns_optab,
2528				      fixed_conv_modes[i].mode,
2529				      fixed_conv_modes[j].mode, "satfractuns",
2530				      fixed_conv_modes[i].name,
2531				      fixed_conv_modes[j].name);
2532	}
2533  }
2534
2535  if (TARGET_AAPCS_BASED)
2536    synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2537}
2538
2539/* On AAPCS systems, this is the "struct __va_list".  */
2540static GTY(()) tree va_list_type;
2541
2542/* Return the type to use as __builtin_va_list.  */
2543static tree
2544arm_build_builtin_va_list (void)
2545{
2546  tree va_list_name;
2547  tree ap_field;
2548
2549  if (!TARGET_AAPCS_BASED)
2550    return std_build_builtin_va_list ();
2551
2552  /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2553     defined as:
2554
2555       struct __va_list
2556       {
2557	 void *__ap;
2558       };
2559
2560     The C Library ABI further reinforces this definition in \S
2561     4.1.
2562
2563     We must follow this definition exactly.  The structure tag
2564     name is visible in C++ mangled names, and thus forms a part
2565     of the ABI.  The field name may be used by people who
2566     #include <stdarg.h>.  */
2567  /* Create the type.  */
2568  va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2569  /* Give it the required name.  */
2570  va_list_name = build_decl (BUILTINS_LOCATION,
2571			     TYPE_DECL,
2572			     get_identifier ("__va_list"),
2573			     va_list_type);
2574  DECL_ARTIFICIAL (va_list_name) = 1;
2575  TYPE_NAME (va_list_type) = va_list_name;
2576  TYPE_STUB_DECL (va_list_type) = va_list_name;
2577  /* Create the __ap field.  */
2578  ap_field = build_decl (BUILTINS_LOCATION,
2579			 FIELD_DECL,
2580			 get_identifier ("__ap"),
2581			 ptr_type_node);
2582  DECL_ARTIFICIAL (ap_field) = 1;
2583  DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2584  TYPE_FIELDS (va_list_type) = ap_field;
2585  /* Compute its layout.  */
2586  layout_type (va_list_type);
2587
2588  return va_list_type;
2589}
2590
2591/* Return an expression of type "void *" pointing to the next
2592   available argument in a variable-argument list.  VALIST is the
2593   user-level va_list object, of type __builtin_va_list.  */
2594static tree
2595arm_extract_valist_ptr (tree valist)
2596{
2597  if (TREE_TYPE (valist) == error_mark_node)
2598    return error_mark_node;
2599
2600  /* On an AAPCS target, the pointer is stored within "struct
2601     va_list".  */
2602  if (TARGET_AAPCS_BASED)
2603    {
2604      tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2605      valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2606		       valist, ap_field, NULL_TREE);
2607    }
2608
2609  return valist;
2610}
2611
2612/* Implement TARGET_EXPAND_BUILTIN_VA_START.  */
2613static void
2614arm_expand_builtin_va_start (tree valist, rtx nextarg)
2615{
2616  valist = arm_extract_valist_ptr (valist);
2617  std_expand_builtin_va_start (valist, nextarg);
2618}
2619
2620/* Implement TARGET_GIMPLIFY_VA_ARG_EXPR.  */
2621static tree
2622arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2623			  gimple_seq *post_p)
2624{
2625  valist = arm_extract_valist_ptr (valist);
2626  return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2627}
2628
2629/* Fix up any incompatible options that the user has specified.  */
2630static void
2631arm_option_override (void)
2632{
2633  arm_selected_arch = NULL;
2634  arm_selected_cpu = NULL;
2635  arm_selected_tune = NULL;
2636
2637  if (global_options_set.x_arm_arch_option)
2638    arm_selected_arch = &all_architectures[arm_arch_option];
2639
2640  if (global_options_set.x_arm_cpu_option)
2641    {
2642      arm_selected_cpu = &all_cores[(int) arm_cpu_option];
2643      arm_selected_tune = &all_cores[(int) arm_cpu_option];
2644    }
2645
2646  if (global_options_set.x_arm_tune_option)
2647    arm_selected_tune = &all_cores[(int) arm_tune_option];
2648
2649#ifdef SUBTARGET_OVERRIDE_OPTIONS
2650  SUBTARGET_OVERRIDE_OPTIONS;
2651#endif
2652
2653  if (arm_selected_arch)
2654    {
2655      if (arm_selected_cpu)
2656	{
2657	  /* Check for conflict between mcpu and march.  */
2658	  if ((arm_selected_cpu->flags ^ arm_selected_arch->flags) & ~FL_TUNE)
2659	    {
2660	      warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
2661		       arm_selected_cpu->name, arm_selected_arch->name);
2662	      /* -march wins for code generation.
2663	         -mcpu wins for default tuning.  */
2664	      if (!arm_selected_tune)
2665		arm_selected_tune = arm_selected_cpu;
2666
2667	      arm_selected_cpu = arm_selected_arch;
2668	    }
2669	  else
2670	    /* -mcpu wins.  */
2671	    arm_selected_arch = NULL;
2672	}
2673      else
2674	/* Pick a CPU based on the architecture.  */
2675	arm_selected_cpu = arm_selected_arch;
2676    }
2677
2678  /* If the user did not specify a processor, choose one for them.  */
2679  if (!arm_selected_cpu)
2680    {
2681      const struct processors * sel;
2682      unsigned int        sought;
2683
2684      arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
2685      if (!arm_selected_cpu->name)
2686	{
2687#ifdef SUBTARGET_CPU_DEFAULT
2688	  /* Use the subtarget default CPU if none was specified by
2689	     configure.  */
2690	  arm_selected_cpu = &all_cores[SUBTARGET_CPU_DEFAULT];
2691#endif
2692	  /* Default to ARM6.  */
2693	  if (!arm_selected_cpu->name)
2694	    arm_selected_cpu = &all_cores[arm6];
2695	}
2696
2697      sel = arm_selected_cpu;
2698      insn_flags = sel->flags;
2699
2700      /* Now check to see if the user has specified some command line
2701	 switch that require certain abilities from the cpu.  */
2702      sought = 0;
2703
2704      if (TARGET_INTERWORK || TARGET_THUMB)
2705	{
2706	  sought |= (FL_THUMB | FL_MODE32);
2707
2708	  /* There are no ARM processors that support both APCS-26 and
2709	     interworking.  Therefore we force FL_MODE26 to be removed
2710	     from insn_flags here (if it was set), so that the search
2711	     below will always be able to find a compatible processor.  */
2712	  insn_flags &= ~FL_MODE26;
2713	}
2714
2715      if (sought != 0 && ((sought & insn_flags) != sought))
2716	{
2717	  /* Try to locate a CPU type that supports all of the abilities
2718	     of the default CPU, plus the extra abilities requested by
2719	     the user.  */
2720	  for (sel = all_cores; sel->name != NULL; sel++)
2721	    if ((sel->flags & sought) == (sought | insn_flags))
2722	      break;
2723
2724	  if (sel->name == NULL)
2725	    {
2726	      unsigned current_bit_count = 0;
2727	      const struct processors * best_fit = NULL;
2728
2729	      /* Ideally we would like to issue an error message here
2730		 saying that it was not possible to find a CPU compatible
2731		 with the default CPU, but which also supports the command
2732		 line options specified by the programmer, and so they
2733		 ought to use the -mcpu=<name> command line option to
2734		 override the default CPU type.
2735
2736		 If we cannot find a cpu that has both the
2737		 characteristics of the default cpu and the given
2738		 command line options we scan the array again looking
2739		 for a best match.  */
2740	      for (sel = all_cores; sel->name != NULL; sel++)
2741		if ((sel->flags & sought) == sought)
2742		  {
2743		    unsigned count;
2744
2745		    count = bit_count (sel->flags & insn_flags);
2746
2747		    if (count >= current_bit_count)
2748		      {
2749			best_fit = sel;
2750			current_bit_count = count;
2751		      }
2752		  }
2753
2754	      gcc_assert (best_fit);
2755	      sel = best_fit;
2756	    }
2757
2758	  arm_selected_cpu = sel;
2759	}
2760    }
2761
2762  gcc_assert (arm_selected_cpu);
2763  /* The selected cpu may be an architecture, so lookup tuning by core ID.  */
2764  if (!arm_selected_tune)
2765    arm_selected_tune = &all_cores[arm_selected_cpu->core];
2766
2767  sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_selected_cpu->arch);
2768  insn_flags = arm_selected_cpu->flags;
2769  arm_base_arch = arm_selected_cpu->base_arch;
2770
2771  arm_tune = arm_selected_tune->core;
2772  tune_flags = arm_selected_tune->flags;
2773  current_tune = arm_selected_tune->tune;
2774
2775  /* Make sure that the processor choice does not conflict with any of the
2776     other command line choices.  */
2777  if (TARGET_ARM && !(insn_flags & FL_NOTM))
2778    error ("target CPU does not support ARM mode");
2779
2780  /* BPABI targets use linker tricks to allow interworking on cores
2781     without thumb support.  */
2782  if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI))
2783    {
2784      warning (0, "target CPU does not support interworking" );
2785      target_flags &= ~MASK_INTERWORK;
2786    }
2787
2788  if (TARGET_THUMB && !(insn_flags & FL_THUMB))
2789    {
2790      warning (0, "target CPU does not support THUMB instructions");
2791      target_flags &= ~MASK_THUMB;
2792    }
2793
2794  if (TARGET_APCS_FRAME && TARGET_THUMB)
2795    {
2796      /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
2797      target_flags &= ~MASK_APCS_FRAME;
2798    }
2799
2800  /* Callee super interworking implies thumb interworking.  Adding
2801     this to the flags here simplifies the logic elsewhere.  */
2802  if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
2803    target_flags |= MASK_INTERWORK;
2804
2805  /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
2806     from here where no function is being compiled currently.  */
2807  if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
2808    warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2809
2810  if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
2811    warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2812
2813  if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
2814    {
2815      warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
2816      target_flags |= MASK_APCS_FRAME;
2817    }
2818
2819  if (TARGET_POKE_FUNCTION_NAME)
2820    target_flags |= MASK_APCS_FRAME;
2821
2822  if (TARGET_APCS_REENT && flag_pic)
2823    error ("-fpic and -mapcs-reent are incompatible");
2824
2825  if (TARGET_APCS_REENT)
2826    warning (0, "APCS reentrant code not supported.  Ignored");
2827
2828  /* If this target is normally configured to use APCS frames, warn if they
2829     are turned off and debugging is turned on.  */
2830  if (TARGET_ARM
2831      && write_symbols != NO_DEBUG
2832      && !TARGET_APCS_FRAME
2833      && (TARGET_DEFAULT & MASK_APCS_FRAME))
2834    warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2835
2836  if (TARGET_APCS_FLOAT)
2837    warning (0, "passing floating point arguments in fp regs not yet supported");
2838
2839  /* Initialize boolean versions of the flags, for use in the arm.md file.  */
2840  arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
2841  arm_arch4 = (insn_flags & FL_ARCH4) != 0;
2842  arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
2843  arm_arch5 = (insn_flags & FL_ARCH5) != 0;
2844  arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
2845  arm_arch6 = (insn_flags & FL_ARCH6) != 0;
2846  arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
2847  arm_arch_notm = (insn_flags & FL_NOTM) != 0;
2848  arm_arch6m = arm_arch6 && !arm_arch_notm;
2849  arm_arch7 = (insn_flags & FL_ARCH7) != 0;
2850  arm_arch7em = (insn_flags & FL_ARCH7EM) != 0;
2851  arm_arch8 = (insn_flags & FL_ARCH8) != 0;
2852  arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
2853  arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
2854
2855  arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
2856  arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
2857  thumb_code = TARGET_ARM == 0;
2858  thumb1_code = TARGET_THUMB1 != 0;
2859  arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
2860  arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
2861  arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
2862  arm_arch_iwmmxt2 = (insn_flags & FL_IWMMXT2) != 0;
2863  arm_arch_thumb_hwdiv = (insn_flags & FL_THUMB_DIV) != 0;
2864  arm_arch_arm_hwdiv = (insn_flags & FL_ARM_DIV) != 0;
2865  arm_arch_no_volatile_ce = (insn_flags & FL_NO_VOLATILE_CE) != 0;
2866  arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
2867  arm_arch_crc = (insn_flags & FL_CRC32) != 0;
2868  arm_m_profile_small_mul = (insn_flags & FL_SMALLMUL) != 0;
2869  if (arm_restrict_it == 2)
2870    arm_restrict_it = arm_arch8 && TARGET_THUMB2;
2871
2872  if (!TARGET_THUMB2)
2873    arm_restrict_it = 0;
2874
2875  /* If we are not using the default (ARM mode) section anchor offset
2876     ranges, then set the correct ranges now.  */
2877  if (TARGET_THUMB1)
2878    {
2879      /* Thumb-1 LDR instructions cannot have negative offsets.
2880         Permissible positive offset ranges are 5-bit (for byte loads),
2881         6-bit (for halfword loads), or 7-bit (for word loads).
2882         Empirical results suggest a 7-bit anchor range gives the best
2883         overall code size.  */
2884      targetm.min_anchor_offset = 0;
2885      targetm.max_anchor_offset = 127;
2886    }
2887  else if (TARGET_THUMB2)
2888    {
2889      /* The minimum is set such that the total size of the block
2890         for a particular anchor is 248 + 1 + 4095 bytes, which is
2891         divisible by eight, ensuring natural spacing of anchors.  */
2892      targetm.min_anchor_offset = -248;
2893      targetm.max_anchor_offset = 4095;
2894    }
2895
2896  /* V5 code we generate is completely interworking capable, so we turn off
2897     TARGET_INTERWORK here to avoid many tests later on.  */
2898
2899  /* XXX However, we must pass the right pre-processor defines to CPP
2900     or GLD can get confused.  This is a hack.  */
2901  if (TARGET_INTERWORK)
2902    arm_cpp_interwork = 1;
2903
2904  if (arm_arch5)
2905    target_flags &= ~MASK_INTERWORK;
2906
2907  if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
2908    error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
2909
2910  if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
2911    error ("iwmmxt abi requires an iwmmxt capable cpu");
2912
2913  if (!global_options_set.x_arm_fpu_index)
2914    {
2915      const char *target_fpu_name;
2916      bool ok;
2917
2918#ifdef FPUTYPE_DEFAULT
2919      target_fpu_name = FPUTYPE_DEFAULT;
2920#else
2921      target_fpu_name = "vfp";
2922#endif
2923
2924      ok = opt_enum_arg_to_value (OPT_mfpu_, target_fpu_name, &arm_fpu_index,
2925				  CL_TARGET);
2926      gcc_assert (ok);
2927    }
2928
2929  arm_fpu_desc = &all_fpus[arm_fpu_index];
2930
2931  switch (arm_fpu_desc->model)
2932    {
2933    case ARM_FP_MODEL_VFP:
2934      arm_fpu_attr = FPU_VFP;
2935      break;
2936
2937    default:
2938      gcc_unreachable();
2939    }
2940
2941  if (TARGET_AAPCS_BASED)
2942    {
2943      if (TARGET_CALLER_INTERWORKING)
2944	error ("AAPCS does not support -mcaller-super-interworking");
2945      else
2946	if (TARGET_CALLEE_INTERWORKING)
2947	  error ("AAPCS does not support -mcallee-super-interworking");
2948    }
2949
2950  /* iWMMXt and NEON are incompatible.  */
2951  if (TARGET_IWMMXT && TARGET_NEON)
2952    error ("iWMMXt and NEON are incompatible");
2953
2954  /* iWMMXt unsupported under Thumb mode.  */
2955  if (TARGET_THUMB && TARGET_IWMMXT)
2956    error ("iWMMXt unsupported under Thumb mode");
2957
2958  /* __fp16 support currently assumes the core has ldrh.  */
2959  if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
2960    sorry ("__fp16 and no ldrh");
2961
2962  /* If soft-float is specified then don't use FPU.  */
2963  if (TARGET_SOFT_FLOAT)
2964    arm_fpu_attr = FPU_NONE;
2965
2966  if (TARGET_AAPCS_BASED)
2967    {
2968      if (arm_abi == ARM_ABI_IWMMXT)
2969	arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
2970      else if (arm_float_abi == ARM_FLOAT_ABI_HARD
2971	       && TARGET_HARD_FLOAT
2972	       && TARGET_VFP)
2973	arm_pcs_default = ARM_PCS_AAPCS_VFP;
2974      else
2975	arm_pcs_default = ARM_PCS_AAPCS;
2976    }
2977  else
2978    {
2979      if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
2980	sorry ("-mfloat-abi=hard and VFP");
2981
2982      if (arm_abi == ARM_ABI_APCS)
2983	arm_pcs_default = ARM_PCS_APCS;
2984      else
2985	arm_pcs_default = ARM_PCS_ATPCS;
2986    }
2987
2988  /* For arm2/3 there is no need to do any scheduling if we are doing
2989     software floating-point.  */
2990  if (TARGET_SOFT_FLOAT && (tune_flags & FL_MODE32) == 0)
2991    flag_schedule_insns = flag_schedule_insns_after_reload = 0;
2992
2993  /* Use the cp15 method if it is available.  */
2994  if (target_thread_pointer == TP_AUTO)
2995    {
2996      if (arm_arch6k && !TARGET_THUMB1)
2997	target_thread_pointer = TP_CP15;
2998      else
2999	target_thread_pointer = TP_SOFT;
3000    }
3001
3002  if (TARGET_HARD_TP && TARGET_THUMB1)
3003    error ("can not use -mtp=cp15 with 16-bit Thumb");
3004
3005  /* Override the default structure alignment for AAPCS ABI.  */
3006  if (!global_options_set.x_arm_structure_size_boundary)
3007    {
3008      if (TARGET_AAPCS_BASED)
3009	arm_structure_size_boundary = 8;
3010    }
3011  else
3012    {
3013      if (arm_structure_size_boundary != 8
3014	  && arm_structure_size_boundary != 32
3015	  && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
3016	{
3017	  if (ARM_DOUBLEWORD_ALIGN)
3018	    warning (0,
3019		     "structure size boundary can only be set to 8, 32 or 64");
3020	  else
3021	    warning (0, "structure size boundary can only be set to 8 or 32");
3022	  arm_structure_size_boundary
3023	    = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
3024	}
3025    }
3026
3027  if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
3028    {
3029      error ("RTP PIC is incompatible with Thumb");
3030      flag_pic = 0;
3031    }
3032
3033  /* If stack checking is disabled, we can use r10 as the PIC register,
3034     which keeps r9 available.  The EABI specifies r9 as the PIC register.  */
3035  if (flag_pic && TARGET_SINGLE_PIC_BASE)
3036    {
3037      if (TARGET_VXWORKS_RTP)
3038	warning (0, "RTP PIC is incompatible with -msingle-pic-base");
3039      arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
3040    }
3041
3042  if (flag_pic && TARGET_VXWORKS_RTP)
3043    arm_pic_register = 9;
3044
3045  if (arm_pic_register_string != NULL)
3046    {
3047      int pic_register = decode_reg_name (arm_pic_register_string);
3048
3049      if (!flag_pic)
3050	warning (0, "-mpic-register= is useless without -fpic");
3051
3052      /* Prevent the user from choosing an obviously stupid PIC register.  */
3053      else if (pic_register < 0 || call_used_regs[pic_register]
3054	       || pic_register == HARD_FRAME_POINTER_REGNUM
3055	       || pic_register == STACK_POINTER_REGNUM
3056	       || pic_register >= PC_REGNUM
3057	       || (TARGET_VXWORKS_RTP
3058		   && (unsigned int) pic_register != arm_pic_register))
3059	error ("unable to use '%s' for PIC register", arm_pic_register_string);
3060      else
3061	arm_pic_register = pic_register;
3062    }
3063
3064  if (TARGET_VXWORKS_RTP
3065      && !global_options_set.x_arm_pic_data_is_text_relative)
3066    arm_pic_data_is_text_relative = 0;
3067
3068  /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores.  */
3069  if (fix_cm3_ldrd == 2)
3070    {
3071      if (arm_selected_cpu->core == cortexm3)
3072	fix_cm3_ldrd = 1;
3073      else
3074	fix_cm3_ldrd = 0;
3075    }
3076
3077  /* Enable -munaligned-access by default for
3078     - all ARMv6 architecture-based processors
3079     - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
3080     - ARMv8 architecture-base processors.
3081
3082     Disable -munaligned-access by default for
3083     - all pre-ARMv6 architecture-based processors
3084     - ARMv6-M architecture-based processors.  */
3085
3086  if (unaligned_access == 2)
3087    {
3088      if (arm_arch6 && (arm_arch_notm || arm_arch7))
3089	unaligned_access = 1;
3090      else
3091	unaligned_access = 0;
3092    }
3093  else if (unaligned_access == 1
3094	   && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
3095    {
3096      warning (0, "target CPU does not support unaligned accesses");
3097      unaligned_access = 0;
3098    }
3099
3100  if (TARGET_THUMB1 && flag_schedule_insns)
3101    {
3102      /* Don't warn since it's on by default in -O2.  */
3103      flag_schedule_insns = 0;
3104    }
3105
3106  if (optimize_size)
3107    {
3108      /* If optimizing for size, bump the number of instructions that we
3109         are prepared to conditionally execute (even on a StrongARM).  */
3110      max_insns_skipped = 6;
3111
3112      /* For THUMB2, we limit the conditional sequence to one IT block.  */
3113      if (TARGET_THUMB2)
3114	max_insns_skipped = MAX_INSN_PER_IT_BLOCK;
3115    }
3116  else
3117    max_insns_skipped = current_tune->max_insns_skipped;
3118
3119  /* Hot/Cold partitioning is not currently supported, since we can't
3120     handle literal pool placement in that case.  */
3121  if (flag_reorder_blocks_and_partition)
3122    {
3123      inform (input_location,
3124	      "-freorder-blocks-and-partition not supported on this architecture");
3125      flag_reorder_blocks_and_partition = 0;
3126      flag_reorder_blocks = 1;
3127    }
3128
3129  if (flag_pic)
3130    /* Hoisting PIC address calculations more aggressively provides a small,
3131       but measurable, size reduction for PIC code.  Therefore, we decrease
3132       the bar for unrestricted expression hoisting to the cost of PIC address
3133       calculation, which is 2 instructions.  */
3134    maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
3135			   global_options.x_param_values,
3136			   global_options_set.x_param_values);
3137
3138  /* ARM EABI defaults to strict volatile bitfields.  */
3139  if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
3140      && abi_version_at_least(2))
3141    flag_strict_volatile_bitfields = 1;
3142
3143  /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we have deemed
3144     it beneficial (signified by setting num_prefetch_slots to 1 or more.)  */
3145  if (flag_prefetch_loop_arrays < 0
3146      && HAVE_prefetch
3147      && optimize >= 3
3148      && current_tune->num_prefetch_slots > 0)
3149    flag_prefetch_loop_arrays = 1;
3150
3151  /* Set up parameters to be used in prefetching algorithm.  Do not override the
3152     defaults unless we are tuning for a core we have researched values for.  */
3153  if (current_tune->num_prefetch_slots > 0)
3154    maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
3155                           current_tune->num_prefetch_slots,
3156                           global_options.x_param_values,
3157                           global_options_set.x_param_values);
3158  if (current_tune->l1_cache_line_size >= 0)
3159    maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
3160                           current_tune->l1_cache_line_size,
3161                           global_options.x_param_values,
3162                           global_options_set.x_param_values);
3163  if (current_tune->l1_cache_size >= 0)
3164    maybe_set_param_value (PARAM_L1_CACHE_SIZE,
3165                           current_tune->l1_cache_size,
3166                           global_options.x_param_values,
3167                           global_options_set.x_param_values);
3168
3169  /* Use Neon to perform 64-bits operations rather than core
3170     registers.  */
3171  prefer_neon_for_64bits = current_tune->prefer_neon_for_64bits;
3172  if (use_neon_for_64bits == 1)
3173     prefer_neon_for_64bits = true;
3174
3175  /* Use the alternative scheduling-pressure algorithm by default.  */
3176  maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, SCHED_PRESSURE_MODEL,
3177                         global_options.x_param_values,
3178                         global_options_set.x_param_values);
3179
3180  /* Look through ready list and all of queue for instructions
3181     relevant for L2 auto-prefetcher.  */
3182  int param_sched_autopref_queue_depth;
3183  if (current_tune->sched_autopref == ARM_SCHED_AUTOPREF_OFF)
3184    param_sched_autopref_queue_depth = -1;
3185  else if (current_tune->sched_autopref == ARM_SCHED_AUTOPREF_RANK)
3186    param_sched_autopref_queue_depth = 0;
3187  else if (current_tune->sched_autopref == ARM_SCHED_AUTOPREF_FULL)
3188    param_sched_autopref_queue_depth = max_insn_queue_index + 1;
3189  else
3190    gcc_unreachable ();
3191  maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH,
3192			 param_sched_autopref_queue_depth,
3193                         global_options.x_param_values,
3194                         global_options_set.x_param_values);
3195
3196  /* Disable shrink-wrap when optimizing function for size, since it tends to
3197     generate additional returns.  */
3198  if (optimize_function_for_size_p (cfun) && TARGET_THUMB2)
3199    flag_shrink_wrap = false;
3200  /* TBD: Dwarf info for apcs frame is not handled yet.  */
3201  if (TARGET_APCS_FRAME)
3202    flag_shrink_wrap = false;
3203
3204  /* We only support -mslow-flash-data on armv7-m targets.  */
3205  if (target_slow_flash_data
3206      && ((!(arm_arch7 && !arm_arch_notm) && !arm_arch7em)
3207	  || (TARGET_THUMB1 || flag_pic || TARGET_NEON)))
3208    error ("-mslow-flash-data only supports non-pic code on armv7-m targets");
3209
3210  /* Currently, for slow flash data, we just disable literal pools.  */
3211  if (target_slow_flash_data)
3212    arm_disable_literal_pool = true;
3213
3214  /* Thumb2 inline assembly code should always use unified syntax.
3215     This will apply to ARM and Thumb1 eventually.  */
3216  if (TARGET_THUMB2)
3217    inline_asm_unified = 1;
3218
3219  /* Disable scheduling fusion by default if it's not armv7 processor
3220     or doesn't prefer ldrd/strd.  */
3221  if (flag_schedule_fusion == 2
3222      && (!arm_arch7 || !current_tune->prefer_ldrd_strd))
3223    flag_schedule_fusion = 0;
3224
3225  /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
3226     - epilogue_insns - does not accurately model the corresponding insns
3227     emitted in the asm file.  In particular, see the comment in thumb_exit
3228     'Find out how many of the (return) argument registers we can corrupt'.
3229     As a consequence, the epilogue may clobber registers without fipa-ra
3230     finding out about it.  Therefore, disable fipa-ra in Thumb1 mode.
3231     TODO: Accurately model clobbers for epilogue_insns and reenable
3232     fipa-ra.  */
3233  if (TARGET_THUMB1)
3234    flag_ipa_ra = 0;
3235
3236  /* Register global variables with the garbage collector.  */
3237  arm_add_gc_roots ();
3238}
3239
3240static void
3241arm_add_gc_roots (void)
3242{
3243  gcc_obstack_init(&minipool_obstack);
3244  minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
3245}
3246
3247/* A table of known ARM exception types.
3248   For use with the interrupt function attribute.  */
3249
3250typedef struct
3251{
3252  const char *const arg;
3253  const unsigned long return_value;
3254}
3255isr_attribute_arg;
3256
3257static const isr_attribute_arg isr_attribute_args [] =
3258{
3259  { "IRQ",   ARM_FT_ISR },
3260  { "irq",   ARM_FT_ISR },
3261  { "FIQ",   ARM_FT_FIQ },
3262  { "fiq",   ARM_FT_FIQ },
3263  { "ABORT", ARM_FT_ISR },
3264  { "abort", ARM_FT_ISR },
3265  { "ABORT", ARM_FT_ISR },
3266  { "abort", ARM_FT_ISR },
3267  { "UNDEF", ARM_FT_EXCEPTION },
3268  { "undef", ARM_FT_EXCEPTION },
3269  { "SWI",   ARM_FT_EXCEPTION },
3270  { "swi",   ARM_FT_EXCEPTION },
3271  { NULL,    ARM_FT_NORMAL }
3272};
3273
3274/* Returns the (interrupt) function type of the current
3275   function, or ARM_FT_UNKNOWN if the type cannot be determined.  */
3276
3277static unsigned long
3278arm_isr_value (tree argument)
3279{
3280  const isr_attribute_arg * ptr;
3281  const char *              arg;
3282
3283  if (!arm_arch_notm)
3284    return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
3285
3286  /* No argument - default to IRQ.  */
3287  if (argument == NULL_TREE)
3288    return ARM_FT_ISR;
3289
3290  /* Get the value of the argument.  */
3291  if (TREE_VALUE (argument) == NULL_TREE
3292      || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
3293    return ARM_FT_UNKNOWN;
3294
3295  arg = TREE_STRING_POINTER (TREE_VALUE (argument));
3296
3297  /* Check it against the list of known arguments.  */
3298  for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
3299    if (streq (arg, ptr->arg))
3300      return ptr->return_value;
3301
3302  /* An unrecognized interrupt type.  */
3303  return ARM_FT_UNKNOWN;
3304}
3305
3306/* Computes the type of the current function.  */
3307
3308static unsigned long
3309arm_compute_func_type (void)
3310{
3311  unsigned long type = ARM_FT_UNKNOWN;
3312  tree a;
3313  tree attr;
3314
3315  gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
3316
3317  /* Decide if the current function is volatile.  Such functions
3318     never return, and many memory cycles can be saved by not storing
3319     register values that will never be needed again.  This optimization
3320     was added to speed up context switching in a kernel application.  */
3321  if (optimize > 0
3322      && (TREE_NOTHROW (current_function_decl)
3323          || !(flag_unwind_tables
3324               || (flag_exceptions
3325		   && arm_except_unwind_info (&global_options) != UI_SJLJ)))
3326      && TREE_THIS_VOLATILE (current_function_decl))
3327    type |= ARM_FT_VOLATILE;
3328
3329  if (cfun->static_chain_decl != NULL)
3330    type |= ARM_FT_NESTED;
3331
3332  attr = DECL_ATTRIBUTES (current_function_decl);
3333
3334  a = lookup_attribute ("naked", attr);
3335  if (a != NULL_TREE)
3336    type |= ARM_FT_NAKED;
3337
3338  a = lookup_attribute ("isr", attr);
3339  if (a == NULL_TREE)
3340    a = lookup_attribute ("interrupt", attr);
3341
3342  if (a == NULL_TREE)
3343    type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
3344  else
3345    type |= arm_isr_value (TREE_VALUE (a));
3346
3347  return type;
3348}
3349
3350/* Returns the type of the current function.  */
3351
3352unsigned long
3353arm_current_func_type (void)
3354{
3355  if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
3356    cfun->machine->func_type = arm_compute_func_type ();
3357
3358  return cfun->machine->func_type;
3359}
3360
3361bool
3362arm_allocate_stack_slots_for_args (void)
3363{
3364  /* Naked functions should not allocate stack slots for arguments.  */
3365  return !IS_NAKED (arm_current_func_type ());
3366}
3367
3368static bool
3369arm_warn_func_return (tree decl)
3370{
3371  /* Naked functions are implemented entirely in assembly, including the
3372     return sequence, so suppress warnings about this.  */
3373  return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
3374}
3375
3376
3377/* Output assembler code for a block containing the constant parts
3378   of a trampoline, leaving space for the variable parts.
3379
3380   On the ARM, (if r8 is the static chain regnum, and remembering that
3381   referencing pc adds an offset of 8) the trampoline looks like:
3382	   ldr 		r8, [pc, #0]
3383	   ldr		pc, [pc]
3384	   .word	static chain value
3385	   .word	function's address
3386   XXX FIXME: When the trampoline returns, r8 will be clobbered.  */
3387
3388static void
3389arm_asm_trampoline_template (FILE *f)
3390{
3391  if (TARGET_ARM)
3392    {
3393      asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
3394      asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
3395    }
3396  else if (TARGET_THUMB2)
3397    {
3398      /* The Thumb-2 trampoline is similar to the arm implementation.
3399	 Unlike 16-bit Thumb, we enter the stub in thumb mode.  */
3400      asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
3401		   STATIC_CHAIN_REGNUM, PC_REGNUM);
3402      asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
3403    }
3404  else
3405    {
3406      ASM_OUTPUT_ALIGN (f, 2);
3407      fprintf (f, "\t.code\t16\n");
3408      fprintf (f, ".Ltrampoline_start:\n");
3409      asm_fprintf (f, "\tpush\t{r0, r1}\n");
3410      asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3411      asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
3412      asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3413      asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
3414      asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
3415    }
3416  assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3417  assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3418}
3419
3420/* Emit RTL insns to initialize the variable parts of a trampoline.  */
3421
3422static void
3423arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
3424{
3425  rtx fnaddr, mem, a_tramp;
3426
3427  emit_block_move (m_tramp, assemble_trampoline_template (),
3428		   GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
3429
3430  mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
3431  emit_move_insn (mem, chain_value);
3432
3433  mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
3434  fnaddr = XEXP (DECL_RTL (fndecl), 0);
3435  emit_move_insn (mem, fnaddr);
3436
3437  a_tramp = XEXP (m_tramp, 0);
3438  emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
3439		     LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
3440		     plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
3441}
3442
3443/* Thumb trampolines should be entered in thumb mode, so set
3444   the bottom bit of the address.  */
3445
3446static rtx
3447arm_trampoline_adjust_address (rtx addr)
3448{
3449  if (TARGET_THUMB)
3450    addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
3451				NULL, 0, OPTAB_LIB_WIDEN);
3452  return addr;
3453}
3454
3455/* Return 1 if it is possible to return using a single instruction.
3456   If SIBLING is non-null, this is a test for a return before a sibling
3457   call.  SIBLING is the call insn, so we can examine its register usage.  */
3458
3459int
3460use_return_insn (int iscond, rtx sibling)
3461{
3462  int regno;
3463  unsigned int func_type;
3464  unsigned long saved_int_regs;
3465  unsigned HOST_WIDE_INT stack_adjust;
3466  arm_stack_offsets *offsets;
3467
3468  /* Never use a return instruction before reload has run.  */
3469  if (!reload_completed)
3470    return 0;
3471
3472  func_type = arm_current_func_type ();
3473
3474  /* Naked, volatile and stack alignment functions need special
3475     consideration.  */
3476  if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
3477    return 0;
3478
3479  /* So do interrupt functions that use the frame pointer and Thumb
3480     interrupt functions.  */
3481  if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
3482    return 0;
3483
3484  if (TARGET_LDRD && current_tune->prefer_ldrd_strd
3485      && !optimize_function_for_size_p (cfun))
3486    return 0;
3487
3488  offsets = arm_get_frame_offsets ();
3489  stack_adjust = offsets->outgoing_args - offsets->saved_regs;
3490
3491  /* As do variadic functions.  */
3492  if (crtl->args.pretend_args_size
3493      || cfun->machine->uses_anonymous_args
3494      /* Or if the function calls __builtin_eh_return () */
3495      || crtl->calls_eh_return
3496      /* Or if the function calls alloca */
3497      || cfun->calls_alloca
3498      /* Or if there is a stack adjustment.  However, if the stack pointer
3499	 is saved on the stack, we can use a pre-incrementing stack load.  */
3500      || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
3501				 && stack_adjust == 4)))
3502    return 0;
3503
3504  saved_int_regs = offsets->saved_regs_mask;
3505
3506  /* Unfortunately, the insn
3507
3508       ldmib sp, {..., sp, ...}
3509
3510     triggers a bug on most SA-110 based devices, such that the stack
3511     pointer won't be correctly restored if the instruction takes a
3512     page fault.  We work around this problem by popping r3 along with
3513     the other registers, since that is never slower than executing
3514     another instruction.
3515
3516     We test for !arm_arch5 here, because code for any architecture
3517     less than this could potentially be run on one of the buggy
3518     chips.  */
3519  if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
3520    {
3521      /* Validate that r3 is a call-clobbered register (always true in
3522	 the default abi) ...  */
3523      if (!call_used_regs[3])
3524	return 0;
3525
3526      /* ... that it isn't being used for a return value ... */
3527      if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
3528	return 0;
3529
3530      /* ... or for a tail-call argument ...  */
3531      if (sibling)
3532	{
3533	  gcc_assert (CALL_P (sibling));
3534
3535	  if (find_regno_fusage (sibling, USE, 3))
3536	    return 0;
3537	}
3538
3539      /* ... and that there are no call-saved registers in r0-r2
3540	 (always true in the default ABI).  */
3541      if (saved_int_regs & 0x7)
3542	return 0;
3543    }
3544
3545  /* Can't be done if interworking with Thumb, and any registers have been
3546     stacked.  */
3547  if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
3548    return 0;
3549
3550  /* On StrongARM, conditional returns are expensive if they aren't
3551     taken and multiple registers have been stacked.  */
3552  if (iscond && arm_tune_strongarm)
3553    {
3554      /* Conditional return when just the LR is stored is a simple
3555	 conditional-load instruction, that's not expensive.  */
3556      if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
3557	return 0;
3558
3559      if (flag_pic
3560	  && arm_pic_register != INVALID_REGNUM
3561	  && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
3562	return 0;
3563    }
3564
3565  /* If there are saved registers but the LR isn't saved, then we need
3566     two instructions for the return.  */
3567  if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
3568    return 0;
3569
3570  /* Can't be done if any of the VFP regs are pushed,
3571     since this also requires an insn.  */
3572  if (TARGET_HARD_FLOAT && TARGET_VFP)
3573    for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
3574      if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
3575	return 0;
3576
3577  if (TARGET_REALLY_IWMMXT)
3578    for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
3579      if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
3580	return 0;
3581
3582  return 1;
3583}
3584
3585/* Return TRUE if we should try to use a simple_return insn, i.e. perform
3586   shrink-wrapping if possible.  This is the case if we need to emit a
3587   prologue, which we can test by looking at the offsets.  */
3588bool
3589use_simple_return_p (void)
3590{
3591  arm_stack_offsets *offsets;
3592
3593  offsets = arm_get_frame_offsets ();
3594  return offsets->outgoing_args != 0;
3595}
3596
3597/* Return TRUE if int I is a valid immediate ARM constant.  */
3598
3599int
3600const_ok_for_arm (HOST_WIDE_INT i)
3601{
3602  int lowbit;
3603
3604  /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
3605     be all zero, or all one.  */
3606  if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
3607      && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
3608	  != ((~(unsigned HOST_WIDE_INT) 0)
3609	      & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
3610    return FALSE;
3611
3612  i &= (unsigned HOST_WIDE_INT) 0xffffffff;
3613
3614  /* Fast return for 0 and small values.  We must do this for zero, since
3615     the code below can't handle that one case.  */
3616  if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
3617    return TRUE;
3618
3619  /* Get the number of trailing zeros.  */
3620  lowbit = ffs((int) i) - 1;
3621
3622  /* Only even shifts are allowed in ARM mode so round down to the
3623     nearest even number.  */
3624  if (TARGET_ARM)
3625    lowbit &= ~1;
3626
3627  if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
3628    return TRUE;
3629
3630  if (TARGET_ARM)
3631    {
3632      /* Allow rotated constants in ARM mode.  */
3633      if (lowbit <= 4
3634	   && ((i & ~0xc000003f) == 0
3635	       || (i & ~0xf000000f) == 0
3636	       || (i & ~0xfc000003) == 0))
3637	return TRUE;
3638    }
3639  else
3640    {
3641      HOST_WIDE_INT v;
3642
3643      /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY.  */
3644      v = i & 0xff;
3645      v |= v << 16;
3646      if (i == v || i == (v | (v << 8)))
3647	return TRUE;
3648
3649      /* Allow repeated pattern 0xXY00XY00.  */
3650      v = i & 0xff00;
3651      v |= v << 16;
3652      if (i == v)
3653	return TRUE;
3654    }
3655
3656  return FALSE;
3657}
3658
3659/* Return true if I is a valid constant for the operation CODE.  */
3660int
3661const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
3662{
3663  if (const_ok_for_arm (i))
3664    return 1;
3665
3666  switch (code)
3667    {
3668    case SET:
3669      /* See if we can use movw.  */
3670      if (arm_arch_thumb2 && (i & 0xffff0000) == 0)
3671	return 1;
3672      else
3673	/* Otherwise, try mvn.  */
3674	return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3675
3676    case PLUS:
3677      /* See if we can use addw or subw.  */
3678      if (TARGET_THUMB2
3679	  && ((i & 0xfffff000) == 0
3680	      || ((-i) & 0xfffff000) == 0))
3681	return 1;
3682      /* else fall through.  */
3683
3684    case COMPARE:
3685    case EQ:
3686    case NE:
3687    case GT:
3688    case LE:
3689    case LT:
3690    case GE:
3691    case GEU:
3692    case LTU:
3693    case GTU:
3694    case LEU:
3695    case UNORDERED:
3696    case ORDERED:
3697    case UNEQ:
3698    case UNGE:
3699    case UNLT:
3700    case UNGT:
3701    case UNLE:
3702      return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
3703
3704    case MINUS:		/* Should only occur with (MINUS I reg) => rsb */
3705    case XOR:
3706      return 0;
3707
3708    case IOR:
3709      if (TARGET_THUMB2)
3710	return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3711      return 0;
3712
3713    case AND:
3714      return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3715
3716    default:
3717      gcc_unreachable ();
3718    }
3719}
3720
3721/* Return true if I is a valid di mode constant for the operation CODE.  */
3722int
3723const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
3724{
3725  HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
3726  HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
3727  rtx hi = GEN_INT (hi_val);
3728  rtx lo = GEN_INT (lo_val);
3729
3730  if (TARGET_THUMB1)
3731    return 0;
3732
3733  switch (code)
3734    {
3735    case AND:
3736    case IOR:
3737    case XOR:
3738      return (const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF)
3739              && (const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF);
3740    case PLUS:
3741      return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
3742
3743    default:
3744      return 0;
3745    }
3746}
3747
3748/* Emit a sequence of insns to handle a large constant.
3749   CODE is the code of the operation required, it can be any of SET, PLUS,
3750   IOR, AND, XOR, MINUS;
3751   MODE is the mode in which the operation is being performed;
3752   VAL is the integer to operate on;
3753   SOURCE is the other operand (a register, or a null-pointer for SET);
3754   SUBTARGETS means it is safe to create scratch registers if that will
3755   either produce a simpler sequence, or we will want to cse the values.
3756   Return value is the number of insns emitted.  */
3757
3758/* ??? Tweak this for thumb2.  */
3759int
3760arm_split_constant (enum rtx_code code, machine_mode mode, rtx insn,
3761		    HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
3762{
3763  rtx cond;
3764
3765  if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
3766    cond = COND_EXEC_TEST (PATTERN (insn));
3767  else
3768    cond = NULL_RTX;
3769
3770  if (subtargets || code == SET
3771      || (REG_P (target) && REG_P (source)
3772	  && REGNO (target) != REGNO (source)))
3773    {
3774      /* After arm_reorg has been called, we can't fix up expensive
3775	 constants by pushing them into memory so we must synthesize
3776	 them in-line, regardless of the cost.  This is only likely to
3777	 be more costly on chips that have load delay slots and we are
3778	 compiling without running the scheduler (so no splitting
3779	 occurred before the final instruction emission).
3780
3781	 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
3782      */
3783      if (!cfun->machine->after_arm_reorg
3784	  && !cond
3785	  && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
3786				1, 0)
3787	      > (arm_constant_limit (optimize_function_for_size_p (cfun))
3788		 + (code != SET))))
3789	{
3790	  if (code == SET)
3791	    {
3792	      /* Currently SET is the only monadic value for CODE, all
3793		 the rest are diadic.  */
3794	      if (TARGET_USE_MOVT)
3795		arm_emit_movpair (target, GEN_INT (val));
3796	      else
3797		emit_set_insn (target, GEN_INT (val));
3798
3799	      return 1;
3800	    }
3801	  else
3802	    {
3803	      rtx temp = subtargets ? gen_reg_rtx (mode) : target;
3804
3805	      if (TARGET_USE_MOVT)
3806		arm_emit_movpair (temp, GEN_INT (val));
3807	      else
3808		emit_set_insn (temp, GEN_INT (val));
3809
3810	      /* For MINUS, the value is subtracted from, since we never
3811		 have subtraction of a constant.  */
3812	      if (code == MINUS)
3813		emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
3814	      else
3815		emit_set_insn (target,
3816			       gen_rtx_fmt_ee (code, mode, source, temp));
3817	      return 2;
3818	    }
3819	}
3820    }
3821
3822  return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
3823			   1);
3824}
3825
3826/* Return a sequence of integers, in RETURN_SEQUENCE that fit into
3827   ARM/THUMB2 immediates, and add up to VAL.
3828   Thr function return value gives the number of insns required.  */
3829static int
3830optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
3831			    struct four_ints *return_sequence)
3832{
3833  int best_consecutive_zeros = 0;
3834  int i;
3835  int best_start = 0;
3836  int insns1, insns2;
3837  struct four_ints tmp_sequence;
3838
3839  /* If we aren't targeting ARM, the best place to start is always at
3840     the bottom, otherwise look more closely.  */
3841  if (TARGET_ARM)
3842    {
3843      for (i = 0; i < 32; i += 2)
3844	{
3845	  int consecutive_zeros = 0;
3846
3847	  if (!(val & (3 << i)))
3848	    {
3849	      while ((i < 32) && !(val & (3 << i)))
3850		{
3851		  consecutive_zeros += 2;
3852		  i += 2;
3853		}
3854	      if (consecutive_zeros > best_consecutive_zeros)
3855		{
3856		  best_consecutive_zeros = consecutive_zeros;
3857		  best_start = i - consecutive_zeros;
3858		}
3859	      i -= 2;
3860	    }
3861	}
3862    }
3863
3864  /* So long as it won't require any more insns to do so, it's
3865     desirable to emit a small constant (in bits 0...9) in the last
3866     insn.  This way there is more chance that it can be combined with
3867     a later addressing insn to form a pre-indexed load or store
3868     operation.  Consider:
3869
3870	   *((volatile int *)0xe0000100) = 1;
3871	   *((volatile int *)0xe0000110) = 2;
3872
3873     We want this to wind up as:
3874
3875	    mov rA, #0xe0000000
3876	    mov rB, #1
3877	    str rB, [rA, #0x100]
3878	    mov rB, #2
3879	    str rB, [rA, #0x110]
3880
3881     rather than having to synthesize both large constants from scratch.
3882
3883     Therefore, we calculate how many insns would be required to emit
3884     the constant starting from `best_start', and also starting from
3885     zero (i.e. with bit 31 first to be output).  If `best_start' doesn't
3886     yield a shorter sequence, we may as well use zero.  */
3887  insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
3888  if (best_start != 0
3889      && ((((unsigned HOST_WIDE_INT) 1) << best_start) < val))
3890    {
3891      insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
3892      if (insns2 <= insns1)
3893	{
3894	  *return_sequence = tmp_sequence;
3895	  insns1 = insns2;
3896	}
3897    }
3898
3899  return insns1;
3900}
3901
3902/* As for optimal_immediate_sequence, but starting at bit-position I.  */
3903static int
3904optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
3905			     struct four_ints *return_sequence, int i)
3906{
3907  int remainder = val & 0xffffffff;
3908  int insns = 0;
3909
3910  /* Try and find a way of doing the job in either two or three
3911     instructions.
3912
3913     In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
3914     location.  We start at position I.  This may be the MSB, or
3915     optimial_immediate_sequence may have positioned it at the largest block
3916     of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
3917     wrapping around to the top of the word when we drop off the bottom.
3918     In the worst case this code should produce no more than four insns.
3919
3920     In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
3921     constants, shifted to any arbitrary location.  We should always start
3922     at the MSB.  */
3923  do
3924    {
3925      int end;
3926      unsigned int b1, b2, b3, b4;
3927      unsigned HOST_WIDE_INT result;
3928      int loc;
3929
3930      gcc_assert (insns < 4);
3931
3932      if (i <= 0)
3933	i += 32;
3934
3935      /* First, find the next normal 12/8-bit shifted/rotated immediate.  */
3936      if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
3937	{
3938	  loc = i;
3939	  if (i <= 12 && TARGET_THUMB2 && code == PLUS)
3940	    /* We can use addw/subw for the last 12 bits.  */
3941	    result = remainder;
3942	  else
3943	    {
3944	      /* Use an 8-bit shifted/rotated immediate.  */
3945	      end = i - 8;
3946	      if (end < 0)
3947		end += 32;
3948	      result = remainder & ((0x0ff << end)
3949				   | ((i < end) ? (0xff >> (32 - end))
3950						: 0));
3951	      i -= 8;
3952	    }
3953	}
3954      else
3955	{
3956	  /* Arm allows rotates by a multiple of two. Thumb-2 allows
3957	     arbitrary shifts.  */
3958	  i -= TARGET_ARM ? 2 : 1;
3959	  continue;
3960	}
3961
3962      /* Next, see if we can do a better job with a thumb2 replicated
3963	 constant.
3964
3965         We do it this way around to catch the cases like 0x01F001E0 where
3966	 two 8-bit immediates would work, but a replicated constant would
3967	 make it worse.
3968
3969         TODO: 16-bit constants that don't clear all the bits, but still win.
3970         TODO: Arithmetic splitting for set/add/sub, rather than bitwise.  */
3971      if (TARGET_THUMB2)
3972	{
3973	  b1 = (remainder & 0xff000000) >> 24;
3974	  b2 = (remainder & 0x00ff0000) >> 16;
3975	  b3 = (remainder & 0x0000ff00) >> 8;
3976	  b4 = remainder & 0xff;
3977
3978	  if (loc > 24)
3979	    {
3980	      /* The 8-bit immediate already found clears b1 (and maybe b2),
3981		 but must leave b3 and b4 alone.  */
3982
3983	      /* First try to find a 32-bit replicated constant that clears
3984		 almost everything.  We can assume that we can't do it in one,
3985		 or else we wouldn't be here.  */
3986	      unsigned int tmp = b1 & b2 & b3 & b4;
3987	      unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
3988				  + (tmp << 24);
3989	      unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
3990					    + (tmp == b3) + (tmp == b4);
3991	      if (tmp
3992		  && (matching_bytes >= 3
3993		      || (matching_bytes == 2
3994			  && const_ok_for_op (remainder & ~tmp2, code))))
3995		{
3996		  /* At least 3 of the bytes match, and the fourth has at
3997		     least as many bits set, or two of the bytes match
3998		     and it will only require one more insn to finish.  */
3999		  result = tmp2;
4000		  i = tmp != b1 ? 32
4001		      : tmp != b2 ? 24
4002		      : tmp != b3 ? 16
4003		      : 8;
4004		}
4005
4006	      /* Second, try to find a 16-bit replicated constant that can
4007		 leave three of the bytes clear.  If b2 or b4 is already
4008		 zero, then we can.  If the 8-bit from above would not
4009		 clear b2 anyway, then we still win.  */
4010	      else if (b1 == b3 && (!b2 || !b4
4011			       || (remainder & 0x00ff0000 & ~result)))
4012		{
4013		  result = remainder & 0xff00ff00;
4014		  i = 24;
4015		}
4016	    }
4017	  else if (loc > 16)
4018	    {
4019	      /* The 8-bit immediate already found clears b2 (and maybe b3)
4020		 and we don't get here unless b1 is alredy clear, but it will
4021		 leave b4 unchanged.  */
4022
4023	      /* If we can clear b2 and b4 at once, then we win, since the
4024		 8-bits couldn't possibly reach that far.  */
4025	      if (b2 == b4)
4026		{
4027		  result = remainder & 0x00ff00ff;
4028		  i = 16;
4029		}
4030	    }
4031	}
4032
4033      return_sequence->i[insns++] = result;
4034      remainder &= ~result;
4035
4036      if (code == SET || code == MINUS)
4037	code = PLUS;
4038    }
4039  while (remainder);
4040
4041  return insns;
4042}
4043
4044/* Emit an instruction with the indicated PATTERN.  If COND is
4045   non-NULL, conditionalize the execution of the instruction on COND
4046   being true.  */
4047
4048static void
4049emit_constant_insn (rtx cond, rtx pattern)
4050{
4051  if (cond)
4052    pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
4053  emit_insn (pattern);
4054}
4055
4056/* As above, but extra parameter GENERATE which, if clear, suppresses
4057   RTL generation.  */
4058
4059static int
4060arm_gen_constant (enum rtx_code code, machine_mode mode, rtx cond,
4061		  HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
4062		  int generate)
4063{
4064  int can_invert = 0;
4065  int can_negate = 0;
4066  int final_invert = 0;
4067  int i;
4068  int set_sign_bit_copies = 0;
4069  int clear_sign_bit_copies = 0;
4070  int clear_zero_bit_copies = 0;
4071  int set_zero_bit_copies = 0;
4072  int insns = 0, neg_insns, inv_insns;
4073  unsigned HOST_WIDE_INT temp1, temp2;
4074  unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
4075  struct four_ints *immediates;
4076  struct four_ints pos_immediates, neg_immediates, inv_immediates;
4077
4078  /* Find out which operations are safe for a given CODE.  Also do a quick
4079     check for degenerate cases; these can occur when DImode operations
4080     are split.  */
4081  switch (code)
4082    {
4083    case SET:
4084      can_invert = 1;
4085      break;
4086
4087    case PLUS:
4088      can_negate = 1;
4089      break;
4090
4091    case IOR:
4092      if (remainder == 0xffffffff)
4093	{
4094	  if (generate)
4095	    emit_constant_insn (cond,
4096				gen_rtx_SET (VOIDmode, target,
4097					     GEN_INT (ARM_SIGN_EXTEND (val))));
4098	  return 1;
4099	}
4100
4101      if (remainder == 0)
4102	{
4103	  if (reload_completed && rtx_equal_p (target, source))
4104	    return 0;
4105
4106	  if (generate)
4107	    emit_constant_insn (cond,
4108				gen_rtx_SET (VOIDmode, target, source));
4109	  return 1;
4110	}
4111      break;
4112
4113    case AND:
4114      if (remainder == 0)
4115	{
4116	  if (generate)
4117	    emit_constant_insn (cond,
4118				gen_rtx_SET (VOIDmode, target, const0_rtx));
4119	  return 1;
4120	}
4121      if (remainder == 0xffffffff)
4122	{
4123	  if (reload_completed && rtx_equal_p (target, source))
4124	    return 0;
4125	  if (generate)
4126	    emit_constant_insn (cond,
4127				gen_rtx_SET (VOIDmode, target, source));
4128	  return 1;
4129	}
4130      can_invert = 1;
4131      break;
4132
4133    case XOR:
4134      if (remainder == 0)
4135	{
4136	  if (reload_completed && rtx_equal_p (target, source))
4137	    return 0;
4138	  if (generate)
4139	    emit_constant_insn (cond,
4140				gen_rtx_SET (VOIDmode, target, source));
4141	  return 1;
4142	}
4143
4144      if (remainder == 0xffffffff)
4145	{
4146	  if (generate)
4147	    emit_constant_insn (cond,
4148				gen_rtx_SET (VOIDmode, target,
4149					     gen_rtx_NOT (mode, source)));
4150	  return 1;
4151	}
4152      final_invert = 1;
4153      break;
4154
4155    case MINUS:
4156      /* We treat MINUS as (val - source), since (source - val) is always
4157	 passed as (source + (-val)).  */
4158      if (remainder == 0)
4159	{
4160	  if (generate)
4161	    emit_constant_insn (cond,
4162				gen_rtx_SET (VOIDmode, target,
4163					     gen_rtx_NEG (mode, source)));
4164	  return 1;
4165	}
4166      if (const_ok_for_arm (val))
4167	{
4168	  if (generate)
4169	    emit_constant_insn (cond,
4170				gen_rtx_SET (VOIDmode, target,
4171					     gen_rtx_MINUS (mode, GEN_INT (val),
4172							    source)));
4173	  return 1;
4174	}
4175
4176      break;
4177
4178    default:
4179      gcc_unreachable ();
4180    }
4181
4182  /* If we can do it in one insn get out quickly.  */
4183  if (const_ok_for_op (val, code))
4184    {
4185      if (generate)
4186	emit_constant_insn (cond,
4187			    gen_rtx_SET (VOIDmode, target,
4188					 (source
4189					  ? gen_rtx_fmt_ee (code, mode, source,
4190							    GEN_INT (val))
4191					  : GEN_INT (val))));
4192      return 1;
4193    }
4194
4195  /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
4196     insn.  */
4197  if (code == AND && (i = exact_log2 (remainder + 1)) > 0
4198      && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
4199    {
4200      if (generate)
4201	{
4202	  if (mode == SImode && i == 16)
4203	    /* Use UXTH in preference to UBFX, since on Thumb2 it's a
4204	       smaller insn.  */
4205	    emit_constant_insn (cond,
4206				gen_zero_extendhisi2
4207				(target, gen_lowpart (HImode, source)));
4208	  else
4209	    /* Extz only supports SImode, but we can coerce the operands
4210	       into that mode.  */
4211	    emit_constant_insn (cond,
4212				gen_extzv_t2 (gen_lowpart (SImode, target),
4213					      gen_lowpart (SImode, source),
4214					      GEN_INT (i), const0_rtx));
4215	}
4216
4217      return 1;
4218    }
4219
4220  /* Calculate a few attributes that may be useful for specific
4221     optimizations.  */
4222  /* Count number of leading zeros.  */
4223  for (i = 31; i >= 0; i--)
4224    {
4225      if ((remainder & (1 << i)) == 0)
4226	clear_sign_bit_copies++;
4227      else
4228	break;
4229    }
4230
4231  /* Count number of leading 1's.  */
4232  for (i = 31; i >= 0; i--)
4233    {
4234      if ((remainder & (1 << i)) != 0)
4235	set_sign_bit_copies++;
4236      else
4237	break;
4238    }
4239
4240  /* Count number of trailing zero's.  */
4241  for (i = 0; i <= 31; i++)
4242    {
4243      if ((remainder & (1 << i)) == 0)
4244	clear_zero_bit_copies++;
4245      else
4246	break;
4247    }
4248
4249  /* Count number of trailing 1's.  */
4250  for (i = 0; i <= 31; i++)
4251    {
4252      if ((remainder & (1 << i)) != 0)
4253	set_zero_bit_copies++;
4254      else
4255	break;
4256    }
4257
4258  switch (code)
4259    {
4260    case SET:
4261      /* See if we can do this by sign_extending a constant that is known
4262	 to be negative.  This is a good, way of doing it, since the shift
4263	 may well merge into a subsequent insn.  */
4264      if (set_sign_bit_copies > 1)
4265	{
4266	  if (const_ok_for_arm
4267	      (temp1 = ARM_SIGN_EXTEND (remainder
4268					<< (set_sign_bit_copies - 1))))
4269	    {
4270	      if (generate)
4271		{
4272		  rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4273		  emit_constant_insn (cond,
4274				      gen_rtx_SET (VOIDmode, new_src,
4275						   GEN_INT (temp1)));
4276		  emit_constant_insn (cond,
4277				      gen_ashrsi3 (target, new_src,
4278						   GEN_INT (set_sign_bit_copies - 1)));
4279		}
4280	      return 2;
4281	    }
4282	  /* For an inverted constant, we will need to set the low bits,
4283	     these will be shifted out of harm's way.  */
4284	  temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
4285	  if (const_ok_for_arm (~temp1))
4286	    {
4287	      if (generate)
4288		{
4289		  rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4290		  emit_constant_insn (cond,
4291				      gen_rtx_SET (VOIDmode, new_src,
4292						   GEN_INT (temp1)));
4293		  emit_constant_insn (cond,
4294				      gen_ashrsi3 (target, new_src,
4295						   GEN_INT (set_sign_bit_copies - 1)));
4296		}
4297	      return 2;
4298	    }
4299	}
4300
4301      /* See if we can calculate the value as the difference between two
4302	 valid immediates.  */
4303      if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
4304	{
4305	  int topshift = clear_sign_bit_copies & ~1;
4306
4307	  temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
4308				   & (0xff000000 >> topshift));
4309
4310	  /* If temp1 is zero, then that means the 9 most significant
4311	     bits of remainder were 1 and we've caused it to overflow.
4312	     When topshift is 0 we don't need to do anything since we
4313	     can borrow from 'bit 32'.  */
4314	  if (temp1 == 0 && topshift != 0)
4315	    temp1 = 0x80000000 >> (topshift - 1);
4316
4317	  temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
4318
4319	  if (const_ok_for_arm (temp2))
4320	    {
4321	      if (generate)
4322		{
4323		  rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4324		  emit_constant_insn (cond,
4325				      gen_rtx_SET (VOIDmode, new_src,
4326						   GEN_INT (temp1)));
4327		  emit_constant_insn (cond,
4328				      gen_addsi3 (target, new_src,
4329						  GEN_INT (-temp2)));
4330		}
4331
4332	      return 2;
4333	    }
4334	}
4335
4336      /* See if we can generate this by setting the bottom (or the top)
4337	 16 bits, and then shifting these into the other half of the
4338	 word.  We only look for the simplest cases, to do more would cost
4339	 too much.  Be careful, however, not to generate this when the
4340	 alternative would take fewer insns.  */
4341      if (val & 0xffff0000)
4342	{
4343	  temp1 = remainder & 0xffff0000;
4344	  temp2 = remainder & 0x0000ffff;
4345
4346	  /* Overlaps outside this range are best done using other methods.  */
4347	  for (i = 9; i < 24; i++)
4348	    {
4349	      if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
4350		  && !const_ok_for_arm (temp2))
4351		{
4352		  rtx new_src = (subtargets
4353				 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4354				 : target);
4355		  insns = arm_gen_constant (code, mode, cond, temp2, new_src,
4356					    source, subtargets, generate);
4357		  source = new_src;
4358		  if (generate)
4359		    emit_constant_insn
4360		      (cond,
4361		       gen_rtx_SET
4362		       (VOIDmode, target,
4363			gen_rtx_IOR (mode,
4364				     gen_rtx_ASHIFT (mode, source,
4365						     GEN_INT (i)),
4366				     source)));
4367		  return insns + 1;
4368		}
4369	    }
4370
4371	  /* Don't duplicate cases already considered.  */
4372	  for (i = 17; i < 24; i++)
4373	    {
4374	      if (((temp1 | (temp1 >> i)) == remainder)
4375		  && !const_ok_for_arm (temp1))
4376		{
4377		  rtx new_src = (subtargets
4378				 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4379				 : target);
4380		  insns = arm_gen_constant (code, mode, cond, temp1, new_src,
4381					    source, subtargets, generate);
4382		  source = new_src;
4383		  if (generate)
4384		    emit_constant_insn
4385		      (cond,
4386		       gen_rtx_SET (VOIDmode, target,
4387				    gen_rtx_IOR
4388				    (mode,
4389				     gen_rtx_LSHIFTRT (mode, source,
4390						       GEN_INT (i)),
4391				     source)));
4392		  return insns + 1;
4393		}
4394	    }
4395	}
4396      break;
4397
4398    case IOR:
4399    case XOR:
4400      /* If we have IOR or XOR, and the constant can be loaded in a
4401	 single instruction, and we can find a temporary to put it in,
4402	 then this can be done in two instructions instead of 3-4.  */
4403      if (subtargets
4404	  /* TARGET can't be NULL if SUBTARGETS is 0 */
4405	  || (reload_completed && !reg_mentioned_p (target, source)))
4406	{
4407	  if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
4408	    {
4409	      if (generate)
4410		{
4411		  rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4412
4413		  emit_constant_insn (cond,
4414				      gen_rtx_SET (VOIDmode, sub,
4415						   GEN_INT (val)));
4416		  emit_constant_insn (cond,
4417				      gen_rtx_SET (VOIDmode, target,
4418						   gen_rtx_fmt_ee (code, mode,
4419								   source, sub)));
4420		}
4421	      return 2;
4422	    }
4423	}
4424
4425      if (code == XOR)
4426	break;
4427
4428      /*  Convert.
4429	  x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4430	                     and the remainder 0s for e.g. 0xfff00000)
4431	  x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4432
4433	  This can be done in 2 instructions by using shifts with mov or mvn.
4434	  e.g. for
4435	  x = x | 0xfff00000;
4436	  we generate.
4437	  mvn	r0, r0, asl #12
4438	  mvn	r0, r0, lsr #12  */
4439      if (set_sign_bit_copies > 8
4440	  && (val & (-1 << (32 - set_sign_bit_copies))) == val)
4441	{
4442	  if (generate)
4443	    {
4444	      rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4445	      rtx shift = GEN_INT (set_sign_bit_copies);
4446
4447	      emit_constant_insn
4448		(cond,
4449		 gen_rtx_SET (VOIDmode, sub,
4450			      gen_rtx_NOT (mode,
4451					   gen_rtx_ASHIFT (mode,
4452							   source,
4453							   shift))));
4454	      emit_constant_insn
4455		(cond,
4456		 gen_rtx_SET (VOIDmode, target,
4457			      gen_rtx_NOT (mode,
4458					   gen_rtx_LSHIFTRT (mode, sub,
4459							     shift))));
4460	    }
4461	  return 2;
4462	}
4463
4464      /* Convert
4465	  x = y | constant (which has set_zero_bit_copies number of trailing ones).
4466	   to
4467	  x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
4468
4469	  For eg. r0 = r0 | 0xfff
4470	       mvn	r0, r0, lsr #12
4471	       mvn	r0, r0, asl #12
4472
4473      */
4474      if (set_zero_bit_copies > 8
4475	  && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
4476	{
4477	  if (generate)
4478	    {
4479	      rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4480	      rtx shift = GEN_INT (set_zero_bit_copies);
4481
4482	      emit_constant_insn
4483		(cond,
4484		 gen_rtx_SET (VOIDmode, sub,
4485			      gen_rtx_NOT (mode,
4486					   gen_rtx_LSHIFTRT (mode,
4487							     source,
4488							     shift))));
4489	      emit_constant_insn
4490		(cond,
4491		 gen_rtx_SET (VOIDmode, target,
4492			      gen_rtx_NOT (mode,
4493					   gen_rtx_ASHIFT (mode, sub,
4494							   shift))));
4495	    }
4496	  return 2;
4497	}
4498
4499      /* This will never be reached for Thumb2 because orn is a valid
4500	 instruction. This is for Thumb1 and the ARM 32 bit cases.
4501
4502	 x = y | constant (such that ~constant is a valid constant)
4503	 Transform this to
4504	 x = ~(~y & ~constant).
4505      */
4506      if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
4507	{
4508	  if (generate)
4509	    {
4510	      rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4511	      emit_constant_insn (cond,
4512				  gen_rtx_SET (VOIDmode, sub,
4513					       gen_rtx_NOT (mode, source)));
4514	      source = sub;
4515	      if (subtargets)
4516		sub = gen_reg_rtx (mode);
4517	      emit_constant_insn (cond,
4518				  gen_rtx_SET (VOIDmode, sub,
4519					       gen_rtx_AND (mode, source,
4520							    GEN_INT (temp1))));
4521	      emit_constant_insn (cond,
4522				  gen_rtx_SET (VOIDmode, target,
4523					       gen_rtx_NOT (mode, sub)));
4524	    }
4525	  return 3;
4526	}
4527      break;
4528
4529    case AND:
4530      /* See if two shifts will do 2 or more insn's worth of work.  */
4531      if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
4532	{
4533	  HOST_WIDE_INT shift_mask = ((0xffffffff
4534				       << (32 - clear_sign_bit_copies))
4535				      & 0xffffffff);
4536
4537	  if ((remainder | shift_mask) != 0xffffffff)
4538	    {
4539	      HOST_WIDE_INT new_val
4540	        = ARM_SIGN_EXTEND (remainder | shift_mask);
4541
4542	      if (generate)
4543		{
4544		  rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4545		  insns = arm_gen_constant (AND, SImode, cond, new_val,
4546					    new_src, source, subtargets, 1);
4547		  source = new_src;
4548		}
4549	      else
4550		{
4551		  rtx targ = subtargets ? NULL_RTX : target;
4552		  insns = arm_gen_constant (AND, mode, cond, new_val,
4553					    targ, source, subtargets, 0);
4554		}
4555	    }
4556
4557	  if (generate)
4558	    {
4559	      rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4560	      rtx shift = GEN_INT (clear_sign_bit_copies);
4561
4562	      emit_insn (gen_ashlsi3 (new_src, source, shift));
4563	      emit_insn (gen_lshrsi3 (target, new_src, shift));
4564	    }
4565
4566	  return insns + 2;
4567	}
4568
4569      if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
4570	{
4571	  HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
4572
4573	  if ((remainder | shift_mask) != 0xffffffff)
4574	    {
4575	      HOST_WIDE_INT new_val
4576	        = ARM_SIGN_EXTEND (remainder | shift_mask);
4577	      if (generate)
4578		{
4579		  rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4580
4581		  insns = arm_gen_constant (AND, mode, cond, new_val,
4582					    new_src, source, subtargets, 1);
4583		  source = new_src;
4584		}
4585	      else
4586		{
4587		  rtx targ = subtargets ? NULL_RTX : target;
4588
4589		  insns = arm_gen_constant (AND, mode, cond, new_val,
4590					    targ, source, subtargets, 0);
4591		}
4592	    }
4593
4594	  if (generate)
4595	    {
4596	      rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4597	      rtx shift = GEN_INT (clear_zero_bit_copies);
4598
4599	      emit_insn (gen_lshrsi3 (new_src, source, shift));
4600	      emit_insn (gen_ashlsi3 (target, new_src, shift));
4601	    }
4602
4603	  return insns + 2;
4604	}
4605
4606      break;
4607
4608    default:
4609      break;
4610    }
4611
4612  /* Calculate what the instruction sequences would be if we generated it
4613     normally, negated, or inverted.  */
4614  if (code == AND)
4615    /* AND cannot be split into multiple insns, so invert and use BIC.  */
4616    insns = 99;
4617  else
4618    insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
4619
4620  if (can_negate)
4621    neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
4622					    &neg_immediates);
4623  else
4624    neg_insns = 99;
4625
4626  if (can_invert || final_invert)
4627    inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
4628					    &inv_immediates);
4629  else
4630    inv_insns = 99;
4631
4632  immediates = &pos_immediates;
4633
4634  /* Is the negated immediate sequence more efficient?  */
4635  if (neg_insns < insns && neg_insns <= inv_insns)
4636    {
4637      insns = neg_insns;
4638      immediates = &neg_immediates;
4639    }
4640  else
4641    can_negate = 0;
4642
4643  /* Is the inverted immediate sequence more efficient?
4644     We must allow for an extra NOT instruction for XOR operations, although
4645     there is some chance that the final 'mvn' will get optimized later.  */
4646  if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
4647    {
4648      insns = inv_insns;
4649      immediates = &inv_immediates;
4650    }
4651  else
4652    {
4653      can_invert = 0;
4654      final_invert = 0;
4655    }
4656
4657  /* Now output the chosen sequence as instructions.  */
4658  if (generate)
4659    {
4660      for (i = 0; i < insns; i++)
4661	{
4662	  rtx new_src, temp1_rtx;
4663
4664	  temp1 = immediates->i[i];
4665
4666	  if (code == SET || code == MINUS)
4667	    new_src = (subtargets ? gen_reg_rtx (mode) : target);
4668	  else if ((final_invert || i < (insns - 1)) && subtargets)
4669	    new_src = gen_reg_rtx (mode);
4670	  else
4671	    new_src = target;
4672
4673	  if (can_invert)
4674	    temp1 = ~temp1;
4675	  else if (can_negate)
4676	    temp1 = -temp1;
4677
4678	  temp1 = trunc_int_for_mode (temp1, mode);
4679	  temp1_rtx = GEN_INT (temp1);
4680
4681	  if (code == SET)
4682	    ;
4683	  else if (code == MINUS)
4684	    temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
4685	  else
4686	    temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
4687
4688	  emit_constant_insn (cond,
4689			      gen_rtx_SET (VOIDmode, new_src,
4690					   temp1_rtx));
4691	  source = new_src;
4692
4693	  if (code == SET)
4694	    {
4695	      can_negate = can_invert;
4696	      can_invert = 0;
4697	      code = PLUS;
4698	    }
4699	  else if (code == MINUS)
4700	    code = PLUS;
4701	}
4702    }
4703
4704  if (final_invert)
4705    {
4706      if (generate)
4707	emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
4708					       gen_rtx_NOT (mode, source)));
4709      insns++;
4710    }
4711
4712  return insns;
4713}
4714
4715/* Canonicalize a comparison so that we are more likely to recognize it.
4716   This can be done for a few constant compares, where we can make the
4717   immediate value easier to load.  */
4718
4719static void
4720arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
4721			     bool op0_preserve_value)
4722{
4723  machine_mode mode;
4724  unsigned HOST_WIDE_INT i, maxval;
4725
4726  mode = GET_MODE (*op0);
4727  if (mode == VOIDmode)
4728    mode = GET_MODE (*op1);
4729
4730  maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
4731
4732  /* For DImode, we have GE/LT/GEU/LTU comparisons.  In ARM mode
4733     we can also use cmp/cmpeq for GTU/LEU.  GT/LE must be either
4734     reversed or (for constant OP1) adjusted to GE/LT.  Similarly
4735     for GTU/LEU in Thumb mode.  */
4736  if (mode == DImode)
4737    {
4738
4739      if (*code == GT || *code == LE
4740	  || (!TARGET_ARM && (*code == GTU || *code == LEU)))
4741	{
4742	  /* Missing comparison.  First try to use an available
4743	     comparison.  */
4744	  if (CONST_INT_P (*op1))
4745	    {
4746	      i = INTVAL (*op1);
4747	      switch (*code)
4748		{
4749		case GT:
4750		case LE:
4751		  if (i != maxval
4752		      && arm_const_double_by_immediates (GEN_INT (i + 1)))
4753		    {
4754		      *op1 = GEN_INT (i + 1);
4755		      *code = *code == GT ? GE : LT;
4756		      return;
4757		    }
4758		  break;
4759		case GTU:
4760		case LEU:
4761		  if (i != ~((unsigned HOST_WIDE_INT) 0)
4762		      && arm_const_double_by_immediates (GEN_INT (i + 1)))
4763		    {
4764		      *op1 = GEN_INT (i + 1);
4765		      *code = *code == GTU ? GEU : LTU;
4766		      return;
4767		    }
4768		  break;
4769		default:
4770		  gcc_unreachable ();
4771		}
4772	    }
4773
4774	  /* If that did not work, reverse the condition.  */
4775	  if (!op0_preserve_value)
4776	    {
4777	      std::swap (*op0, *op1);
4778	      *code = (int)swap_condition ((enum rtx_code)*code);
4779	    }
4780	}
4781      return;
4782    }
4783
4784  /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
4785     with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
4786     to facilitate possible combining with a cmp into 'ands'.  */
4787  if (mode == SImode
4788      && GET_CODE (*op0) == ZERO_EXTEND
4789      && GET_CODE (XEXP (*op0, 0)) == SUBREG
4790      && GET_MODE (XEXP (*op0, 0)) == QImode
4791      && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
4792      && subreg_lowpart_p (XEXP (*op0, 0))
4793      && *op1 == const0_rtx)
4794    *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
4795			GEN_INT (255));
4796
4797  /* Comparisons smaller than DImode.  Only adjust comparisons against
4798     an out-of-range constant.  */
4799  if (!CONST_INT_P (*op1)
4800      || const_ok_for_arm (INTVAL (*op1))
4801      || const_ok_for_arm (- INTVAL (*op1)))
4802    return;
4803
4804  i = INTVAL (*op1);
4805
4806  switch (*code)
4807    {
4808    case EQ:
4809    case NE:
4810      return;
4811
4812    case GT:
4813    case LE:
4814      if (i != maxval
4815	  && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
4816	{
4817	  *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
4818	  *code = *code == GT ? GE : LT;
4819	  return;
4820	}
4821      break;
4822
4823    case GE:
4824    case LT:
4825      if (i != ~maxval
4826	  && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
4827	{
4828	  *op1 = GEN_INT (i - 1);
4829	  *code = *code == GE ? GT : LE;
4830	  return;
4831	}
4832      break;
4833
4834    case GTU:
4835    case LEU:
4836      if (i != ~((unsigned HOST_WIDE_INT) 0)
4837	  && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
4838	{
4839	  *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
4840	  *code = *code == GTU ? GEU : LTU;
4841	  return;
4842	}
4843      break;
4844
4845    case GEU:
4846    case LTU:
4847      if (i != 0
4848	  && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
4849	{
4850	  *op1 = GEN_INT (i - 1);
4851	  *code = *code == GEU ? GTU : LEU;
4852	  return;
4853	}
4854      break;
4855
4856    default:
4857      gcc_unreachable ();
4858    }
4859}
4860
4861
4862/* Define how to find the value returned by a function.  */
4863
4864static rtx
4865arm_function_value(const_tree type, const_tree func,
4866		   bool outgoing ATTRIBUTE_UNUSED)
4867{
4868  machine_mode mode;
4869  int unsignedp ATTRIBUTE_UNUSED;
4870  rtx r ATTRIBUTE_UNUSED;
4871
4872  mode = TYPE_MODE (type);
4873
4874  if (TARGET_AAPCS_BASED)
4875    return aapcs_allocate_return_reg (mode, type, func);
4876
4877  /* Promote integer types.  */
4878  if (INTEGRAL_TYPE_P (type))
4879    mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
4880
4881  /* Promotes small structs returned in a register to full-word size
4882     for big-endian AAPCS.  */
4883  if (arm_return_in_msb (type))
4884    {
4885      HOST_WIDE_INT size = int_size_in_bytes (type);
4886      if (size % UNITS_PER_WORD != 0)
4887	{
4888	  size += UNITS_PER_WORD - size % UNITS_PER_WORD;
4889	  mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
4890	}
4891    }
4892
4893  return arm_libcall_value_1 (mode);
4894}
4895
4896/* libcall hashtable helpers.  */
4897
4898struct libcall_hasher : typed_noop_remove <rtx_def>
4899{
4900  typedef rtx_def value_type;
4901  typedef rtx_def compare_type;
4902  static inline hashval_t hash (const value_type *);
4903  static inline bool equal (const value_type *, const compare_type *);
4904  static inline void remove (value_type *);
4905};
4906
4907inline bool
4908libcall_hasher::equal (const value_type *p1, const compare_type *p2)
4909{
4910  return rtx_equal_p (p1, p2);
4911}
4912
4913inline hashval_t
4914libcall_hasher::hash (const value_type *p1)
4915{
4916  return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
4917}
4918
4919typedef hash_table<libcall_hasher> libcall_table_type;
4920
4921static void
4922add_libcall (libcall_table_type *htab, rtx libcall)
4923{
4924  *htab->find_slot (libcall, INSERT) = libcall;
4925}
4926
4927static bool
4928arm_libcall_uses_aapcs_base (const_rtx libcall)
4929{
4930  static bool init_done = false;
4931  static libcall_table_type *libcall_htab = NULL;
4932
4933  if (!init_done)
4934    {
4935      init_done = true;
4936
4937      libcall_htab = new libcall_table_type (31);
4938      add_libcall (libcall_htab,
4939		   convert_optab_libfunc (sfloat_optab, SFmode, SImode));
4940      add_libcall (libcall_htab,
4941		   convert_optab_libfunc (sfloat_optab, DFmode, SImode));
4942      add_libcall (libcall_htab,
4943		   convert_optab_libfunc (sfloat_optab, SFmode, DImode));
4944      add_libcall (libcall_htab,
4945		   convert_optab_libfunc (sfloat_optab, DFmode, DImode));
4946
4947      add_libcall (libcall_htab,
4948		   convert_optab_libfunc (ufloat_optab, SFmode, SImode));
4949      add_libcall (libcall_htab,
4950		   convert_optab_libfunc (ufloat_optab, DFmode, SImode));
4951      add_libcall (libcall_htab,
4952		   convert_optab_libfunc (ufloat_optab, SFmode, DImode));
4953      add_libcall (libcall_htab,
4954		   convert_optab_libfunc (ufloat_optab, DFmode, DImode));
4955
4956      add_libcall (libcall_htab,
4957		   convert_optab_libfunc (sext_optab, SFmode, HFmode));
4958      add_libcall (libcall_htab,
4959		   convert_optab_libfunc (trunc_optab, HFmode, SFmode));
4960      add_libcall (libcall_htab,
4961		   convert_optab_libfunc (sfix_optab, SImode, DFmode));
4962      add_libcall (libcall_htab,
4963		   convert_optab_libfunc (ufix_optab, SImode, DFmode));
4964      add_libcall (libcall_htab,
4965		   convert_optab_libfunc (sfix_optab, DImode, DFmode));
4966      add_libcall (libcall_htab,
4967		   convert_optab_libfunc (ufix_optab, DImode, DFmode));
4968      add_libcall (libcall_htab,
4969		   convert_optab_libfunc (sfix_optab, DImode, SFmode));
4970      add_libcall (libcall_htab,
4971		   convert_optab_libfunc (ufix_optab, DImode, SFmode));
4972
4973      /* Values from double-precision helper functions are returned in core
4974	 registers if the selected core only supports single-precision
4975	 arithmetic, even if we are using the hard-float ABI.  The same is
4976	 true for single-precision helpers, but we will never be using the
4977	 hard-float ABI on a CPU which doesn't support single-precision
4978	 operations in hardware.  */
4979      add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
4980      add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
4981      add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
4982      add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
4983      add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
4984      add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
4985      add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
4986      add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
4987      add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
4988      add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
4989      add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
4990      add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
4991							SFmode));
4992      add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
4993							DFmode));
4994    }
4995
4996  return libcall && libcall_htab->find (libcall) != NULL;
4997}
4998
4999static rtx
5000arm_libcall_value_1 (machine_mode mode)
5001{
5002  if (TARGET_AAPCS_BASED)
5003    return aapcs_libcall_value (mode);
5004  else if (TARGET_IWMMXT_ABI
5005	   && arm_vector_mode_supported_p (mode))
5006    return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
5007  else
5008    return gen_rtx_REG (mode, ARG_REGISTER (1));
5009}
5010
5011/* Define how to find the value returned by a library function
5012   assuming the value has mode MODE.  */
5013
5014static rtx
5015arm_libcall_value (machine_mode mode, const_rtx libcall)
5016{
5017  if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
5018      && GET_MODE_CLASS (mode) == MODE_FLOAT)
5019    {
5020      /* The following libcalls return their result in integer registers,
5021	 even though they return a floating point value.  */
5022      if (arm_libcall_uses_aapcs_base (libcall))
5023	return gen_rtx_REG (mode, ARG_REGISTER(1));
5024
5025    }
5026
5027  return arm_libcall_value_1 (mode);
5028}
5029
5030/* Implement TARGET_FUNCTION_VALUE_REGNO_P.  */
5031
5032static bool
5033arm_function_value_regno_p (const unsigned int regno)
5034{
5035  if (regno == ARG_REGISTER (1)
5036      || (TARGET_32BIT
5037	  && TARGET_AAPCS_BASED
5038	  && TARGET_VFP
5039	  && TARGET_HARD_FLOAT
5040	  && regno == FIRST_VFP_REGNUM)
5041      || (TARGET_IWMMXT_ABI
5042	  && regno == FIRST_IWMMXT_REGNUM))
5043    return true;
5044
5045  return false;
5046}
5047
5048/* Determine the amount of memory needed to store the possible return
5049   registers of an untyped call.  */
5050int
5051arm_apply_result_size (void)
5052{
5053  int size = 16;
5054
5055  if (TARGET_32BIT)
5056    {
5057      if (TARGET_HARD_FLOAT_ABI && TARGET_VFP)
5058	size += 32;
5059      if (TARGET_IWMMXT_ABI)
5060	size += 8;
5061    }
5062
5063  return size;
5064}
5065
5066/* Decide whether TYPE should be returned in memory (true)
5067   or in a register (false).  FNTYPE is the type of the function making
5068   the call.  */
5069static bool
5070arm_return_in_memory (const_tree type, const_tree fntype)
5071{
5072  HOST_WIDE_INT size;
5073
5074  size = int_size_in_bytes (type);  /* Negative if not fixed size.  */
5075
5076  if (TARGET_AAPCS_BASED)
5077    {
5078      /* Simple, non-aggregate types (ie not including vectors and
5079	 complex) are always returned in a register (or registers).
5080	 We don't care about which register here, so we can short-cut
5081	 some of the detail.  */
5082      if (!AGGREGATE_TYPE_P (type)
5083	  && TREE_CODE (type) != VECTOR_TYPE
5084	  && TREE_CODE (type) != COMPLEX_TYPE)
5085	return false;
5086
5087      /* Any return value that is no larger than one word can be
5088	 returned in r0.  */
5089      if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
5090	return false;
5091
5092      /* Check any available co-processors to see if they accept the
5093	 type as a register candidate (VFP, for example, can return
5094	 some aggregates in consecutive registers).  These aren't
5095	 available if the call is variadic.  */
5096      if (aapcs_select_return_coproc (type, fntype) >= 0)
5097	return false;
5098
5099      /* Vector values should be returned using ARM registers, not
5100	 memory (unless they're over 16 bytes, which will break since
5101	 we only have four call-clobbered registers to play with).  */
5102      if (TREE_CODE (type) == VECTOR_TYPE)
5103	return (size < 0 || size > (4 * UNITS_PER_WORD));
5104
5105      /* The rest go in memory.  */
5106      return true;
5107    }
5108
5109  if (TREE_CODE (type) == VECTOR_TYPE)
5110    return (size < 0 || size > (4 * UNITS_PER_WORD));
5111
5112  if (!AGGREGATE_TYPE_P (type) &&
5113      (TREE_CODE (type) != VECTOR_TYPE))
5114    /* All simple types are returned in registers.  */
5115    return false;
5116
5117  if (arm_abi != ARM_ABI_APCS)
5118    {
5119      /* ATPCS and later return aggregate types in memory only if they are
5120	 larger than a word (or are variable size).  */
5121      return (size < 0 || size > UNITS_PER_WORD);
5122    }
5123
5124  /* For the arm-wince targets we choose to be compatible with Microsoft's
5125     ARM and Thumb compilers, which always return aggregates in memory.  */
5126#ifndef ARM_WINCE
5127  /* All structures/unions bigger than one word are returned in memory.
5128     Also catch the case where int_size_in_bytes returns -1.  In this case
5129     the aggregate is either huge or of variable size, and in either case
5130     we will want to return it via memory and not in a register.  */
5131  if (size < 0 || size > UNITS_PER_WORD)
5132    return true;
5133
5134  if (TREE_CODE (type) == RECORD_TYPE)
5135    {
5136      tree field;
5137
5138      /* For a struct the APCS says that we only return in a register
5139	 if the type is 'integer like' and every addressable element
5140	 has an offset of zero.  For practical purposes this means
5141	 that the structure can have at most one non bit-field element
5142	 and that this element must be the first one in the structure.  */
5143
5144      /* Find the first field, ignoring non FIELD_DECL things which will
5145	 have been created by C++.  */
5146      for (field = TYPE_FIELDS (type);
5147	   field && TREE_CODE (field) != FIELD_DECL;
5148	   field = DECL_CHAIN (field))
5149	continue;
5150
5151      if (field == NULL)
5152	return false; /* An empty structure.  Allowed by an extension to ANSI C.  */
5153
5154      /* Check that the first field is valid for returning in a register.  */
5155
5156      /* ... Floats are not allowed */
5157      if (FLOAT_TYPE_P (TREE_TYPE (field)))
5158	return true;
5159
5160      /* ... Aggregates that are not themselves valid for returning in
5161	 a register are not allowed.  */
5162      if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5163	return true;
5164
5165      /* Now check the remaining fields, if any.  Only bitfields are allowed,
5166	 since they are not addressable.  */
5167      for (field = DECL_CHAIN (field);
5168	   field;
5169	   field = DECL_CHAIN (field))
5170	{
5171	  if (TREE_CODE (field) != FIELD_DECL)
5172	    continue;
5173
5174	  if (!DECL_BIT_FIELD_TYPE (field))
5175	    return true;
5176	}
5177
5178      return false;
5179    }
5180
5181  if (TREE_CODE (type) == UNION_TYPE)
5182    {
5183      tree field;
5184
5185      /* Unions can be returned in registers if every element is
5186	 integral, or can be returned in an integer register.  */
5187      for (field = TYPE_FIELDS (type);
5188	   field;
5189	   field = DECL_CHAIN (field))
5190	{
5191	  if (TREE_CODE (field) != FIELD_DECL)
5192	    continue;
5193
5194	  if (FLOAT_TYPE_P (TREE_TYPE (field)))
5195	    return true;
5196
5197	  if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5198	    return true;
5199	}
5200
5201      return false;
5202    }
5203#endif /* not ARM_WINCE */
5204
5205  /* Return all other types in memory.  */
5206  return true;
5207}
5208
5209const struct pcs_attribute_arg
5210{
5211  const char *arg;
5212  enum arm_pcs value;
5213} pcs_attribute_args[] =
5214  {
5215    {"aapcs", ARM_PCS_AAPCS},
5216    {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
5217#if 0
5218    /* We could recognize these, but changes would be needed elsewhere
5219     * to implement them.  */
5220    {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
5221    {"atpcs", ARM_PCS_ATPCS},
5222    {"apcs", ARM_PCS_APCS},
5223#endif
5224    {NULL, ARM_PCS_UNKNOWN}
5225  };
5226
5227static enum arm_pcs
5228arm_pcs_from_attribute (tree attr)
5229{
5230  const struct pcs_attribute_arg *ptr;
5231  const char *arg;
5232
5233  /* Get the value of the argument.  */
5234  if (TREE_VALUE (attr) == NULL_TREE
5235      || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
5236    return ARM_PCS_UNKNOWN;
5237
5238  arg = TREE_STRING_POINTER (TREE_VALUE (attr));
5239
5240  /* Check it against the list of known arguments.  */
5241  for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
5242    if (streq (arg, ptr->arg))
5243      return ptr->value;
5244
5245  /* An unrecognized interrupt type.  */
5246  return ARM_PCS_UNKNOWN;
5247}
5248
5249/* Get the PCS variant to use for this call.  TYPE is the function's type
5250   specification, DECL is the specific declartion.  DECL may be null if
5251   the call could be indirect or if this is a library call.  */
5252static enum arm_pcs
5253arm_get_pcs_model (const_tree type, const_tree decl)
5254{
5255  bool user_convention = false;
5256  enum arm_pcs user_pcs = arm_pcs_default;
5257  tree attr;
5258
5259  gcc_assert (type);
5260
5261  attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
5262  if (attr)
5263    {
5264      user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
5265      user_convention = true;
5266    }
5267
5268  if (TARGET_AAPCS_BASED)
5269    {
5270      /* Detect varargs functions.  These always use the base rules
5271	 (no argument is ever a candidate for a co-processor
5272	 register).  */
5273      bool base_rules = stdarg_p (type);
5274
5275      if (user_convention)
5276	{
5277	  if (user_pcs > ARM_PCS_AAPCS_LOCAL)
5278	    sorry ("non-AAPCS derived PCS variant");
5279	  else if (base_rules && user_pcs != ARM_PCS_AAPCS)
5280	    error ("variadic functions must use the base AAPCS variant");
5281	}
5282
5283      if (base_rules)
5284	return ARM_PCS_AAPCS;
5285      else if (user_convention)
5286	return user_pcs;
5287      else if (decl && flag_unit_at_a_time)
5288	{
5289	  /* Local functions never leak outside this compilation unit,
5290	     so we are free to use whatever conventions are
5291	     appropriate.  */
5292	  /* FIXME: remove CONST_CAST_TREE when cgraph is constified.  */
5293	  cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE(decl));
5294	  if (i && i->local)
5295	    return ARM_PCS_AAPCS_LOCAL;
5296	}
5297    }
5298  else if (user_convention && user_pcs != arm_pcs_default)
5299    sorry ("PCS variant");
5300
5301  /* For everything else we use the target's default.  */
5302  return arm_pcs_default;
5303}
5304
5305
5306static void
5307aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum  ATTRIBUTE_UNUSED,
5308		    const_tree fntype ATTRIBUTE_UNUSED,
5309		    rtx libcall ATTRIBUTE_UNUSED,
5310		    const_tree fndecl ATTRIBUTE_UNUSED)
5311{
5312  /* Record the unallocated VFP registers.  */
5313  pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
5314  pcum->aapcs_vfp_reg_alloc = 0;
5315}
5316
5317/* Walk down the type tree of TYPE counting consecutive base elements.
5318   If *MODEP is VOIDmode, then set it to the first valid floating point
5319   type.  If a non-floating point type is found, or if a floating point
5320   type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5321   otherwise return the count in the sub-tree.  */
5322static int
5323aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
5324{
5325  machine_mode mode;
5326  HOST_WIDE_INT size;
5327
5328  switch (TREE_CODE (type))
5329    {
5330    case REAL_TYPE:
5331      mode = TYPE_MODE (type);
5332      if (mode != DFmode && mode != SFmode)
5333	return -1;
5334
5335      if (*modep == VOIDmode)
5336	*modep = mode;
5337
5338      if (*modep == mode)
5339	return 1;
5340
5341      break;
5342
5343    case COMPLEX_TYPE:
5344      mode = TYPE_MODE (TREE_TYPE (type));
5345      if (mode != DFmode && mode != SFmode)
5346	return -1;
5347
5348      if (*modep == VOIDmode)
5349	*modep = mode;
5350
5351      if (*modep == mode)
5352	return 2;
5353
5354      break;
5355
5356    case VECTOR_TYPE:
5357      /* Use V2SImode and V4SImode as representatives of all 64-bit
5358	 and 128-bit vector types, whether or not those modes are
5359	 supported with the present options.  */
5360      size = int_size_in_bytes (type);
5361      switch (size)
5362	{
5363	case 8:
5364	  mode = V2SImode;
5365	  break;
5366	case 16:
5367	  mode = V4SImode;
5368	  break;
5369	default:
5370	  return -1;
5371	}
5372
5373      if (*modep == VOIDmode)
5374	*modep = mode;
5375
5376      /* Vector modes are considered to be opaque: two vectors are
5377	 equivalent for the purposes of being homogeneous aggregates
5378	 if they are the same size.  */
5379      if (*modep == mode)
5380	return 1;
5381
5382      break;
5383
5384    case ARRAY_TYPE:
5385      {
5386	int count;
5387	tree index = TYPE_DOMAIN (type);
5388
5389	/* Can't handle incomplete types nor sizes that are not
5390	   fixed.  */
5391	if (!COMPLETE_TYPE_P (type)
5392	    || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5393	  return -1;
5394
5395	count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
5396	if (count == -1
5397	    || !index
5398	    || !TYPE_MAX_VALUE (index)
5399	    || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
5400	    || !TYPE_MIN_VALUE (index)
5401	    || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
5402	    || count < 0)
5403	  return -1;
5404
5405	count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
5406		      - tree_to_uhwi (TYPE_MIN_VALUE (index)));
5407
5408	/* There must be no padding.  */
5409	if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5410	  return -1;
5411
5412	return count;
5413      }
5414
5415    case RECORD_TYPE:
5416      {
5417	int count = 0;
5418	int sub_count;
5419	tree field;
5420
5421	/* Can't handle incomplete types nor sizes that are not
5422	   fixed.  */
5423	if (!COMPLETE_TYPE_P (type)
5424	    || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5425	  return -1;
5426
5427	for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5428	  {
5429	    if (TREE_CODE (field) != FIELD_DECL)
5430	      continue;
5431
5432	    sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5433	    if (sub_count < 0)
5434	      return -1;
5435	    count += sub_count;
5436	  }
5437
5438	/* There must be no padding.  */
5439	if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5440	  return -1;
5441
5442	return count;
5443      }
5444
5445    case UNION_TYPE:
5446    case QUAL_UNION_TYPE:
5447      {
5448	/* These aren't very interesting except in a degenerate case.  */
5449	int count = 0;
5450	int sub_count;
5451	tree field;
5452
5453	/* Can't handle incomplete types nor sizes that are not
5454	   fixed.  */
5455	if (!COMPLETE_TYPE_P (type)
5456	    || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5457	  return -1;
5458
5459	for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5460	  {
5461	    if (TREE_CODE (field) != FIELD_DECL)
5462	      continue;
5463
5464	    sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5465	    if (sub_count < 0)
5466	      return -1;
5467	    count = count > sub_count ? count : sub_count;
5468	  }
5469
5470	/* There must be no padding.  */
5471	if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5472	  return -1;
5473
5474	return count;
5475      }
5476
5477    default:
5478      break;
5479    }
5480
5481  return -1;
5482}
5483
5484/* Return true if PCS_VARIANT should use VFP registers.  */
5485static bool
5486use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
5487{
5488  if (pcs_variant == ARM_PCS_AAPCS_VFP)
5489    {
5490      static bool seen_thumb1_vfp = false;
5491
5492      if (TARGET_THUMB1 && !seen_thumb1_vfp)
5493	{
5494	  sorry ("Thumb-1 hard-float VFP ABI");
5495	  /* sorry() is not immediately fatal, so only display this once.  */
5496	  seen_thumb1_vfp = true;
5497	}
5498
5499      return true;
5500    }
5501
5502  if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
5503    return false;
5504
5505  return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT &&
5506	  (TARGET_VFP_DOUBLE || !is_double));
5507}
5508
5509/* Return true if an argument whose type is TYPE, or mode is MODE, is
5510   suitable for passing or returning in VFP registers for the PCS
5511   variant selected.  If it is, then *BASE_MODE is updated to contain
5512   a machine mode describing each element of the argument's type and
5513   *COUNT to hold the number of such elements.  */
5514static bool
5515aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
5516				       machine_mode mode, const_tree type,
5517				       machine_mode *base_mode, int *count)
5518{
5519  machine_mode new_mode = VOIDmode;
5520
5521  /* If we have the type information, prefer that to working things
5522     out from the mode.  */
5523  if (type)
5524    {
5525      int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
5526
5527      if (ag_count > 0 && ag_count <= 4)
5528	*count = ag_count;
5529      else
5530	return false;
5531    }
5532  else if (GET_MODE_CLASS (mode) == MODE_FLOAT
5533	   || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
5534	   || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
5535    {
5536      *count = 1;
5537      new_mode = mode;
5538    }
5539  else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
5540    {
5541      *count = 2;
5542      new_mode = (mode == DCmode ? DFmode : SFmode);
5543    }
5544  else
5545    return false;
5546
5547
5548  if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
5549    return false;
5550
5551  *base_mode = new_mode;
5552  return true;
5553}
5554
5555static bool
5556aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
5557			       machine_mode mode, const_tree type)
5558{
5559  int count ATTRIBUTE_UNUSED;
5560  machine_mode ag_mode ATTRIBUTE_UNUSED;
5561
5562  if (!use_vfp_abi (pcs_variant, false))
5563    return false;
5564  return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
5565						&ag_mode, &count);
5566}
5567
5568static bool
5569aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, machine_mode mode,
5570			     const_tree type)
5571{
5572  if (!use_vfp_abi (pcum->pcs_variant, false))
5573    return false;
5574
5575  return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
5576						&pcum->aapcs_vfp_rmode,
5577						&pcum->aapcs_vfp_rcount);
5578}
5579
5580static bool
5581aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, machine_mode mode,
5582		    const_tree type  ATTRIBUTE_UNUSED)
5583{
5584  int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode);
5585  unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
5586  int regno;
5587
5588  for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
5589    if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
5590      {
5591	pcum->aapcs_vfp_reg_alloc = mask << regno;
5592	if (mode == BLKmode
5593	    || (mode == TImode && ! TARGET_NEON)
5594	    || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
5595	  {
5596	    int i;
5597	    int rcount = pcum->aapcs_vfp_rcount;
5598	    int rshift = shift;
5599	    machine_mode rmode = pcum->aapcs_vfp_rmode;
5600	    rtx par;
5601	    if (!TARGET_NEON)
5602	      {
5603		/* Avoid using unsupported vector modes.  */
5604		if (rmode == V2SImode)
5605		  rmode = DImode;
5606		else if (rmode == V4SImode)
5607		  {
5608		    rmode = DImode;
5609		    rcount *= 2;
5610		    rshift /= 2;
5611		  }
5612	      }
5613	    par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
5614	    for (i = 0; i < rcount; i++)
5615	      {
5616		rtx tmp = gen_rtx_REG (rmode,
5617				       FIRST_VFP_REGNUM + regno + i * rshift);
5618		tmp = gen_rtx_EXPR_LIST
5619		  (VOIDmode, tmp,
5620		   GEN_INT (i * GET_MODE_SIZE (rmode)));
5621		XVECEXP (par, 0, i) = tmp;
5622	      }
5623
5624	    pcum->aapcs_reg = par;
5625	  }
5626	else
5627	  pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
5628	return true;
5629      }
5630  return false;
5631}
5632
5633static rtx
5634aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
5635			       machine_mode mode,
5636			       const_tree type ATTRIBUTE_UNUSED)
5637{
5638  if (!use_vfp_abi (pcs_variant, false))
5639    return NULL;
5640
5641  if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
5642    {
5643      int count;
5644      machine_mode ag_mode;
5645      int i;
5646      rtx par;
5647      int shift;
5648
5649      aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
5650					     &ag_mode, &count);
5651
5652      if (!TARGET_NEON)
5653	{
5654	  if (ag_mode == V2SImode)
5655	    ag_mode = DImode;
5656	  else if (ag_mode == V4SImode)
5657	    {
5658	      ag_mode = DImode;
5659	      count *= 2;
5660	    }
5661	}
5662      shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
5663      par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
5664      for (i = 0; i < count; i++)
5665	{
5666	  rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
5667	  tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
5668				   GEN_INT (i * GET_MODE_SIZE (ag_mode)));
5669	  XVECEXP (par, 0, i) = tmp;
5670	}
5671
5672      return par;
5673    }
5674
5675  return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
5676}
5677
5678static void
5679aapcs_vfp_advance (CUMULATIVE_ARGS *pcum  ATTRIBUTE_UNUSED,
5680		   machine_mode mode  ATTRIBUTE_UNUSED,
5681		   const_tree type  ATTRIBUTE_UNUSED)
5682{
5683  pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
5684  pcum->aapcs_vfp_reg_alloc = 0;
5685  return;
5686}
5687
5688#define AAPCS_CP(X)				\
5689  {						\
5690    aapcs_ ## X ## _cum_init,			\
5691    aapcs_ ## X ## _is_call_candidate,		\
5692    aapcs_ ## X ## _allocate,			\
5693    aapcs_ ## X ## _is_return_candidate,	\
5694    aapcs_ ## X ## _allocate_return_reg,	\
5695    aapcs_ ## X ## _advance			\
5696  }
5697
5698/* Table of co-processors that can be used to pass arguments in
5699   registers.  Idealy no arugment should be a candidate for more than
5700   one co-processor table entry, but the table is processed in order
5701   and stops after the first match.  If that entry then fails to put
5702   the argument into a co-processor register, the argument will go on
5703   the stack.  */
5704static struct
5705{
5706  /* Initialize co-processor related state in CUMULATIVE_ARGS structure.  */
5707  void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
5708
5709  /* Return true if an argument of mode MODE (or type TYPE if MODE is
5710     BLKmode) is a candidate for this co-processor's registers; this
5711     function should ignore any position-dependent state in
5712     CUMULATIVE_ARGS and only use call-type dependent information.  */
5713  bool (*is_call_candidate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
5714
5715  /* Return true if the argument does get a co-processor register; it
5716     should set aapcs_reg to an RTX of the register allocated as is
5717     required for a return from FUNCTION_ARG.  */
5718  bool (*allocate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
5719
5720  /* Return true if a result of mode MODE (or type TYPE if MODE is
5721     BLKmode) is can be returned in this co-processor's registers.  */
5722  bool (*is_return_candidate) (enum arm_pcs, machine_mode, const_tree);
5723
5724  /* Allocate and return an RTX element to hold the return type of a
5725     call, this routine must not fail and will only be called if
5726     is_return_candidate returned true with the same parameters.  */
5727  rtx (*allocate_return_reg) (enum arm_pcs, machine_mode, const_tree);
5728
5729  /* Finish processing this argument and prepare to start processing
5730     the next one.  */
5731  void (*advance) (CUMULATIVE_ARGS *, machine_mode, const_tree);
5732} aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
5733  {
5734    AAPCS_CP(vfp)
5735  };
5736
5737#undef AAPCS_CP
5738
5739static int
5740aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, machine_mode mode,
5741			  const_tree type)
5742{
5743  int i;
5744
5745  for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5746    if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
5747      return i;
5748
5749  return -1;
5750}
5751
5752static int
5753aapcs_select_return_coproc (const_tree type, const_tree fntype)
5754{
5755  /* We aren't passed a decl, so we can't check that a call is local.
5756     However, it isn't clear that that would be a win anyway, since it
5757     might limit some tail-calling opportunities.  */
5758  enum arm_pcs pcs_variant;
5759
5760  if (fntype)
5761    {
5762      const_tree fndecl = NULL_TREE;
5763
5764      if (TREE_CODE (fntype) == FUNCTION_DECL)
5765	{
5766	  fndecl = fntype;
5767	  fntype = TREE_TYPE (fntype);
5768	}
5769
5770      pcs_variant = arm_get_pcs_model (fntype, fndecl);
5771    }
5772  else
5773    pcs_variant = arm_pcs_default;
5774
5775  if (pcs_variant != ARM_PCS_AAPCS)
5776    {
5777      int i;
5778
5779      for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5780	if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
5781							TYPE_MODE (type),
5782							type))
5783	  return i;
5784    }
5785  return -1;
5786}
5787
5788static rtx
5789aapcs_allocate_return_reg (machine_mode mode, const_tree type,
5790			   const_tree fntype)
5791{
5792  /* We aren't passed a decl, so we can't check that a call is local.
5793     However, it isn't clear that that would be a win anyway, since it
5794     might limit some tail-calling opportunities.  */
5795  enum arm_pcs pcs_variant;
5796  int unsignedp ATTRIBUTE_UNUSED;
5797
5798  if (fntype)
5799    {
5800      const_tree fndecl = NULL_TREE;
5801
5802      if (TREE_CODE (fntype) == FUNCTION_DECL)
5803	{
5804	  fndecl = fntype;
5805	  fntype = TREE_TYPE (fntype);
5806	}
5807
5808      pcs_variant = arm_get_pcs_model (fntype, fndecl);
5809    }
5810  else
5811    pcs_variant = arm_pcs_default;
5812
5813  /* Promote integer types.  */
5814  if (type && INTEGRAL_TYPE_P (type))
5815    mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
5816
5817  if (pcs_variant != ARM_PCS_AAPCS)
5818    {
5819      int i;
5820
5821      for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5822	if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
5823							type))
5824	  return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
5825							     mode, type);
5826    }
5827
5828  /* Promotes small structs returned in a register to full-word size
5829     for big-endian AAPCS.  */
5830  if (type && arm_return_in_msb (type))
5831    {
5832      HOST_WIDE_INT size = int_size_in_bytes (type);
5833      if (size % UNITS_PER_WORD != 0)
5834	{
5835	  size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5836	  mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
5837	}
5838    }
5839
5840  return gen_rtx_REG (mode, R0_REGNUM);
5841}
5842
5843static rtx
5844aapcs_libcall_value (machine_mode mode)
5845{
5846  if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
5847      && GET_MODE_SIZE (mode) <= 4)
5848    mode = SImode;
5849
5850  return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
5851}
5852
5853/* Lay out a function argument using the AAPCS rules.  The rule
5854   numbers referred to here are those in the AAPCS.  */
5855static void
5856aapcs_layout_arg (CUMULATIVE_ARGS *pcum, machine_mode mode,
5857		  const_tree type, bool named)
5858{
5859  int nregs, nregs2;
5860  int ncrn;
5861
5862  /* We only need to do this once per argument.  */
5863  if (pcum->aapcs_arg_processed)
5864    return;
5865
5866  pcum->aapcs_arg_processed = true;
5867
5868  /* Special case: if named is false then we are handling an incoming
5869     anonymous argument which is on the stack.  */
5870  if (!named)
5871    return;
5872
5873  /* Is this a potential co-processor register candidate?  */
5874  if (pcum->pcs_variant != ARM_PCS_AAPCS)
5875    {
5876      int slot = aapcs_select_call_coproc (pcum, mode, type);
5877      pcum->aapcs_cprc_slot = slot;
5878
5879      /* We don't have to apply any of the rules from part B of the
5880	 preparation phase, these are handled elsewhere in the
5881	 compiler.  */
5882
5883      if (slot >= 0)
5884	{
5885	  /* A Co-processor register candidate goes either in its own
5886	     class of registers or on the stack.  */
5887	  if (!pcum->aapcs_cprc_failed[slot])
5888	    {
5889	      /* C1.cp - Try to allocate the argument to co-processor
5890		 registers.  */
5891	      if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
5892		return;
5893
5894	      /* C2.cp - Put the argument on the stack and note that we
5895		 can't assign any more candidates in this slot.  We also
5896		 need to note that we have allocated stack space, so that
5897		 we won't later try to split a non-cprc candidate between
5898		 core registers and the stack.  */
5899	      pcum->aapcs_cprc_failed[slot] = true;
5900	      pcum->can_split = false;
5901	    }
5902
5903	  /* We didn't get a register, so this argument goes on the
5904	     stack.  */
5905	  gcc_assert (pcum->can_split == false);
5906	  return;
5907	}
5908    }
5909
5910  /* C3 - For double-word aligned arguments, round the NCRN up to the
5911     next even number.  */
5912  ncrn = pcum->aapcs_ncrn;
5913  if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
5914    ncrn++;
5915
5916  nregs = ARM_NUM_REGS2(mode, type);
5917
5918  /* Sigh, this test should really assert that nregs > 0, but a GCC
5919     extension allows empty structs and then gives them empty size; it
5920     then allows such a structure to be passed by value.  For some of
5921     the code below we have to pretend that such an argument has
5922     non-zero size so that we 'locate' it correctly either in
5923     registers or on the stack.  */
5924  gcc_assert (nregs >= 0);
5925
5926  nregs2 = nregs ? nregs : 1;
5927
5928  /* C4 - Argument fits entirely in core registers.  */
5929  if (ncrn + nregs2 <= NUM_ARG_REGS)
5930    {
5931      pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
5932      pcum->aapcs_next_ncrn = ncrn + nregs;
5933      return;
5934    }
5935
5936  /* C5 - Some core registers left and there are no arguments already
5937     on the stack: split this argument between the remaining core
5938     registers and the stack.  */
5939  if (ncrn < NUM_ARG_REGS && pcum->can_split)
5940    {
5941      pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
5942      pcum->aapcs_next_ncrn = NUM_ARG_REGS;
5943      pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
5944      return;
5945    }
5946
5947  /* C6 - NCRN is set to 4.  */
5948  pcum->aapcs_next_ncrn = NUM_ARG_REGS;
5949
5950  /* C7,C8 - arugment goes on the stack.  We have nothing to do here.  */
5951  return;
5952}
5953
5954/* Initialize a variable CUM of type CUMULATIVE_ARGS
5955   for a call to a function whose data type is FNTYPE.
5956   For a library call, FNTYPE is NULL.  */
5957void
5958arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
5959			  rtx libname,
5960			  tree fndecl ATTRIBUTE_UNUSED)
5961{
5962  /* Long call handling.  */
5963  if (fntype)
5964    pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
5965  else
5966    pcum->pcs_variant = arm_pcs_default;
5967
5968  if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
5969    {
5970      if (arm_libcall_uses_aapcs_base (libname))
5971	pcum->pcs_variant = ARM_PCS_AAPCS;
5972
5973      pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
5974      pcum->aapcs_reg = NULL_RTX;
5975      pcum->aapcs_partial = 0;
5976      pcum->aapcs_arg_processed = false;
5977      pcum->aapcs_cprc_slot = -1;
5978      pcum->can_split = true;
5979
5980      if (pcum->pcs_variant != ARM_PCS_AAPCS)
5981	{
5982	  int i;
5983
5984	  for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5985	    {
5986	      pcum->aapcs_cprc_failed[i] = false;
5987	      aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
5988	    }
5989	}
5990      return;
5991    }
5992
5993  /* Legacy ABIs */
5994
5995  /* On the ARM, the offset starts at 0.  */
5996  pcum->nregs = 0;
5997  pcum->iwmmxt_nregs = 0;
5998  pcum->can_split = true;
5999
6000  /* Varargs vectors are treated the same as long long.
6001     named_count avoids having to change the way arm handles 'named' */
6002  pcum->named_count = 0;
6003  pcum->nargs = 0;
6004
6005  if (TARGET_REALLY_IWMMXT && fntype)
6006    {
6007      tree fn_arg;
6008
6009      for (fn_arg = TYPE_ARG_TYPES (fntype);
6010	   fn_arg;
6011	   fn_arg = TREE_CHAIN (fn_arg))
6012	pcum->named_count += 1;
6013
6014      if (! pcum->named_count)
6015	pcum->named_count = INT_MAX;
6016    }
6017}
6018
6019/* Return true if mode/type need doubleword alignment.  */
6020static bool
6021arm_needs_doubleword_align (machine_mode mode, const_tree type)
6022{
6023  if (!type)
6024    return PARM_BOUNDARY < GET_MODE_ALIGNMENT (mode);
6025
6026  /* Scalar and vector types: Use natural alignment, i.e. of base type.  */
6027  if (!AGGREGATE_TYPE_P (type))
6028    return TYPE_ALIGN (TYPE_MAIN_VARIANT (type)) > PARM_BOUNDARY;
6029
6030  /* Array types: Use member alignment of element type.  */
6031  if (TREE_CODE (type) == ARRAY_TYPE)
6032    return TYPE_ALIGN (TREE_TYPE (type)) > PARM_BOUNDARY;
6033
6034  /* Record/aggregate types: Use greatest member alignment of any member.  */
6035  for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6036    if (DECL_ALIGN (field) > PARM_BOUNDARY)
6037      return true;
6038
6039  return false;
6040}
6041
6042
6043/* Determine where to put an argument to a function.
6044   Value is zero to push the argument on the stack,
6045   or a hard register in which to store the argument.
6046
6047   MODE is the argument's machine mode.
6048   TYPE is the data type of the argument (as a tree).
6049    This is null for libcalls where that information may
6050    not be available.
6051   CUM is a variable of type CUMULATIVE_ARGS which gives info about
6052    the preceding args and about the function being called.
6053   NAMED is nonzero if this argument is a named parameter
6054    (otherwise it is an extra parameter matching an ellipsis).
6055
6056   On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
6057   other arguments are passed on the stack.  If (NAMED == 0) (which happens
6058   only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
6059   defined), say it is passed in the stack (function_prologue will
6060   indeed make it pass in the stack if necessary).  */
6061
6062static rtx
6063arm_function_arg (cumulative_args_t pcum_v, machine_mode mode,
6064		  const_tree type, bool named)
6065{
6066  CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6067  int nregs;
6068
6069  /* Handle the special case quickly.  Pick an arbitrary value for op2 of
6070     a call insn (op3 of a call_value insn).  */
6071  if (mode == VOIDmode)
6072    return const0_rtx;
6073
6074  if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6075    {
6076      aapcs_layout_arg (pcum, mode, type, named);
6077      return pcum->aapcs_reg;
6078    }
6079
6080  /* Varargs vectors are treated the same as long long.
6081     named_count avoids having to change the way arm handles 'named' */
6082  if (TARGET_IWMMXT_ABI
6083      && arm_vector_mode_supported_p (mode)
6084      && pcum->named_count > pcum->nargs + 1)
6085    {
6086      if (pcum->iwmmxt_nregs <= 9)
6087	return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
6088      else
6089	{
6090	  pcum->can_split = false;
6091	  return NULL_RTX;
6092	}
6093    }
6094
6095  /* Put doubleword aligned quantities in even register pairs.  */
6096  if (pcum->nregs & 1
6097      && ARM_DOUBLEWORD_ALIGN
6098      && arm_needs_doubleword_align (mode, type))
6099    pcum->nregs++;
6100
6101  /* Only allow splitting an arg between regs and memory if all preceding
6102     args were allocated to regs.  For args passed by reference we only count
6103     the reference pointer.  */
6104  if (pcum->can_split)
6105    nregs = 1;
6106  else
6107    nregs = ARM_NUM_REGS2 (mode, type);
6108
6109  if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
6110    return NULL_RTX;
6111
6112  return gen_rtx_REG (mode, pcum->nregs);
6113}
6114
6115static unsigned int
6116arm_function_arg_boundary (machine_mode mode, const_tree type)
6117{
6118  return (ARM_DOUBLEWORD_ALIGN && arm_needs_doubleword_align (mode, type)
6119	  ? DOUBLEWORD_ALIGNMENT
6120	  : PARM_BOUNDARY);
6121}
6122
6123static int
6124arm_arg_partial_bytes (cumulative_args_t pcum_v, machine_mode mode,
6125		       tree type, bool named)
6126{
6127  CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6128  int nregs = pcum->nregs;
6129
6130  if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6131    {
6132      aapcs_layout_arg (pcum, mode, type, named);
6133      return pcum->aapcs_partial;
6134    }
6135
6136  if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
6137    return 0;
6138
6139  if (NUM_ARG_REGS > nregs
6140      && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
6141      && pcum->can_split)
6142    return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
6143
6144  return 0;
6145}
6146
6147/* Update the data in PCUM to advance over an argument
6148   of mode MODE and data type TYPE.
6149   (TYPE is null for libcalls where that information may not be available.)  */
6150
6151static void
6152arm_function_arg_advance (cumulative_args_t pcum_v, machine_mode mode,
6153			  const_tree type, bool named)
6154{
6155  CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6156
6157  if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6158    {
6159      aapcs_layout_arg (pcum, mode, type, named);
6160
6161      if (pcum->aapcs_cprc_slot >= 0)
6162	{
6163	  aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
6164							      type);
6165	  pcum->aapcs_cprc_slot = -1;
6166	}
6167
6168      /* Generic stuff.  */
6169      pcum->aapcs_arg_processed = false;
6170      pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
6171      pcum->aapcs_reg = NULL_RTX;
6172      pcum->aapcs_partial = 0;
6173    }
6174  else
6175    {
6176      pcum->nargs += 1;
6177      if (arm_vector_mode_supported_p (mode)
6178	  && pcum->named_count > pcum->nargs
6179	  && TARGET_IWMMXT_ABI)
6180	pcum->iwmmxt_nregs += 1;
6181      else
6182	pcum->nregs += ARM_NUM_REGS2 (mode, type);
6183    }
6184}
6185
6186/* Variable sized types are passed by reference.  This is a GCC
6187   extension to the ARM ABI.  */
6188
6189static bool
6190arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
6191		       machine_mode mode ATTRIBUTE_UNUSED,
6192		       const_tree type, bool named ATTRIBUTE_UNUSED)
6193{
6194  return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
6195}
6196
6197/* Encode the current state of the #pragma [no_]long_calls.  */
6198typedef enum
6199{
6200  OFF,		/* No #pragma [no_]long_calls is in effect.  */
6201  LONG,		/* #pragma long_calls is in effect.  */
6202  SHORT		/* #pragma no_long_calls is in effect.  */
6203} arm_pragma_enum;
6204
6205static arm_pragma_enum arm_pragma_long_calls = OFF;
6206
6207void
6208arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6209{
6210  arm_pragma_long_calls = LONG;
6211}
6212
6213void
6214arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6215{
6216  arm_pragma_long_calls = SHORT;
6217}
6218
6219void
6220arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6221{
6222  arm_pragma_long_calls = OFF;
6223}
6224
6225/* Handle an attribute requiring a FUNCTION_DECL;
6226   arguments as in struct attribute_spec.handler.  */
6227static tree
6228arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
6229			     int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6230{
6231  if (TREE_CODE (*node) != FUNCTION_DECL)
6232    {
6233      warning (OPT_Wattributes, "%qE attribute only applies to functions",
6234	       name);
6235      *no_add_attrs = true;
6236    }
6237
6238  return NULL_TREE;
6239}
6240
6241/* Handle an "interrupt" or "isr" attribute;
6242   arguments as in struct attribute_spec.handler.  */
6243static tree
6244arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
6245			  bool *no_add_attrs)
6246{
6247  if (DECL_P (*node))
6248    {
6249      if (TREE_CODE (*node) != FUNCTION_DECL)
6250	{
6251	  warning (OPT_Wattributes, "%qE attribute only applies to functions",
6252		   name);
6253	  *no_add_attrs = true;
6254	}
6255      /* FIXME: the argument if any is checked for type attributes;
6256	 should it be checked for decl ones?  */
6257    }
6258  else
6259    {
6260      if (TREE_CODE (*node) == FUNCTION_TYPE
6261	  || TREE_CODE (*node) == METHOD_TYPE)
6262	{
6263	  if (arm_isr_value (args) == ARM_FT_UNKNOWN)
6264	    {
6265	      warning (OPT_Wattributes, "%qE attribute ignored",
6266		       name);
6267	      *no_add_attrs = true;
6268	    }
6269	}
6270      else if (TREE_CODE (*node) == POINTER_TYPE
6271	       && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
6272		   || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
6273	       && arm_isr_value (args) != ARM_FT_UNKNOWN)
6274	{
6275	  *node = build_variant_type_copy (*node);
6276	  TREE_TYPE (*node) = build_type_attribute_variant
6277	    (TREE_TYPE (*node),
6278	     tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
6279	  *no_add_attrs = true;
6280	}
6281      else
6282	{
6283	  /* Possibly pass this attribute on from the type to a decl.  */
6284	  if (flags & ((int) ATTR_FLAG_DECL_NEXT
6285		       | (int) ATTR_FLAG_FUNCTION_NEXT
6286		       | (int) ATTR_FLAG_ARRAY_NEXT))
6287	    {
6288	      *no_add_attrs = true;
6289	      return tree_cons (name, args, NULL_TREE);
6290	    }
6291	  else
6292	    {
6293	      warning (OPT_Wattributes, "%qE attribute ignored",
6294		       name);
6295	    }
6296	}
6297    }
6298
6299  return NULL_TREE;
6300}
6301
6302/* Handle a "pcs" attribute; arguments as in struct
6303   attribute_spec.handler.  */
6304static tree
6305arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
6306			  int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6307{
6308  if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
6309    {
6310      warning (OPT_Wattributes, "%qE attribute ignored", name);
6311      *no_add_attrs = true;
6312    }
6313  return NULL_TREE;
6314}
6315
6316#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
6317/* Handle the "notshared" attribute.  This attribute is another way of
6318   requesting hidden visibility.  ARM's compiler supports
6319   "__declspec(notshared)"; we support the same thing via an
6320   attribute.  */
6321
6322static tree
6323arm_handle_notshared_attribute (tree *node,
6324				tree name ATTRIBUTE_UNUSED,
6325				tree args ATTRIBUTE_UNUSED,
6326				int flags ATTRIBUTE_UNUSED,
6327				bool *no_add_attrs)
6328{
6329  tree decl = TYPE_NAME (*node);
6330
6331  if (decl)
6332    {
6333      DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
6334      DECL_VISIBILITY_SPECIFIED (decl) = 1;
6335      *no_add_attrs = false;
6336    }
6337  return NULL_TREE;
6338}
6339#endif
6340
6341/* Return 0 if the attributes for two types are incompatible, 1 if they
6342   are compatible, and 2 if they are nearly compatible (which causes a
6343   warning to be generated).  */
6344static int
6345arm_comp_type_attributes (const_tree type1, const_tree type2)
6346{
6347  int l1, l2, s1, s2;
6348
6349  /* Check for mismatch of non-default calling convention.  */
6350  if (TREE_CODE (type1) != FUNCTION_TYPE)
6351    return 1;
6352
6353  /* Check for mismatched call attributes.  */
6354  l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
6355  l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
6356  s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
6357  s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
6358
6359  /* Only bother to check if an attribute is defined.  */
6360  if (l1 | l2 | s1 | s2)
6361    {
6362      /* If one type has an attribute, the other must have the same attribute.  */
6363      if ((l1 != l2) || (s1 != s2))
6364	return 0;
6365
6366      /* Disallow mixed attributes.  */
6367      if ((l1 & s2) || (l2 & s1))
6368	return 0;
6369    }
6370
6371  /* Check for mismatched ISR attribute.  */
6372  l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
6373  if (! l1)
6374    l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
6375  l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
6376  if (! l2)
6377    l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
6378  if (l1 != l2)
6379    return 0;
6380
6381  return 1;
6382}
6383
6384/*  Assigns default attributes to newly defined type.  This is used to
6385    set short_call/long_call attributes for function types of
6386    functions defined inside corresponding #pragma scopes.  */
6387static void
6388arm_set_default_type_attributes (tree type)
6389{
6390  /* Add __attribute__ ((long_call)) to all functions, when
6391     inside #pragma long_calls or __attribute__ ((short_call)),
6392     when inside #pragma no_long_calls.  */
6393  if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
6394    {
6395      tree type_attr_list, attr_name;
6396      type_attr_list = TYPE_ATTRIBUTES (type);
6397
6398      if (arm_pragma_long_calls == LONG)
6399 	attr_name = get_identifier ("long_call");
6400      else if (arm_pragma_long_calls == SHORT)
6401 	attr_name = get_identifier ("short_call");
6402      else
6403 	return;
6404
6405      type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
6406      TYPE_ATTRIBUTES (type) = type_attr_list;
6407    }
6408}
6409
6410/* Return true if DECL is known to be linked into section SECTION.  */
6411
6412static bool
6413arm_function_in_section_p (tree decl, section *section)
6414{
6415  /* We can only be certain about the prevailing symbol definition.  */
6416  if (!decl_binds_to_current_def_p (decl))
6417    return false;
6418
6419  /* If DECL_SECTION_NAME is set, assume it is trustworthy.  */
6420  if (!DECL_SECTION_NAME (decl))
6421    {
6422      /* Make sure that we will not create a unique section for DECL.  */
6423      if (flag_function_sections || DECL_COMDAT_GROUP (decl))
6424	return false;
6425    }
6426
6427  return function_section (decl) == section;
6428}
6429
6430/* Return nonzero if a 32-bit "long_call" should be generated for
6431   a call from the current function to DECL.  We generate a long_call
6432   if the function:
6433
6434        a.  has an __attribute__((long call))
6435     or b.  is within the scope of a #pragma long_calls
6436     or c.  the -mlong-calls command line switch has been specified
6437
6438   However we do not generate a long call if the function:
6439
6440        d.  has an __attribute__ ((short_call))
6441     or e.  is inside the scope of a #pragma no_long_calls
6442     or f.  is defined in the same section as the current function.  */
6443
6444bool
6445arm_is_long_call_p (tree decl)
6446{
6447  tree attrs;
6448
6449  if (!decl)
6450    return TARGET_LONG_CALLS;
6451
6452  attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
6453  if (lookup_attribute ("short_call", attrs))
6454    return false;
6455
6456  /* For "f", be conservative, and only cater for cases in which the
6457     whole of the current function is placed in the same section.  */
6458  if (!flag_reorder_blocks_and_partition
6459      && TREE_CODE (decl) == FUNCTION_DECL
6460      && arm_function_in_section_p (decl, current_function_section ()))
6461    return false;
6462
6463  if (lookup_attribute ("long_call", attrs))
6464    return true;
6465
6466  return TARGET_LONG_CALLS;
6467}
6468
6469/* Return nonzero if it is ok to make a tail-call to DECL.  */
6470static bool
6471arm_function_ok_for_sibcall (tree decl, tree exp)
6472{
6473  unsigned long func_type;
6474
6475  if (cfun->machine->sibcall_blocked)
6476    return false;
6477
6478  /* Never tailcall something if we are generating code for Thumb-1.  */
6479  if (TARGET_THUMB1)
6480    return false;
6481
6482  /* The PIC register is live on entry to VxWorks PLT entries, so we
6483     must make the call before restoring the PIC register.  */
6484  if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
6485    return false;
6486
6487  /* If we are interworking and the function is not declared static
6488     then we can't tail-call it unless we know that it exists in this
6489     compilation unit (since it might be a Thumb routine).  */
6490  if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
6491      && !TREE_ASM_WRITTEN (decl))
6492    return false;
6493
6494  func_type = arm_current_func_type ();
6495  /* Never tailcall from an ISR routine - it needs a special exit sequence.  */
6496  if (IS_INTERRUPT (func_type))
6497    return false;
6498
6499  if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
6500    {
6501      /* Check that the return value locations are the same.  For
6502	 example that we aren't returning a value from the sibling in
6503	 a VFP register but then need to transfer it to a core
6504	 register.  */
6505      rtx a, b;
6506
6507      a = arm_function_value (TREE_TYPE (exp), decl, false);
6508      b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
6509			      cfun->decl, false);
6510      if (!rtx_equal_p (a, b))
6511	return false;
6512    }
6513
6514  /* Never tailcall if function may be called with a misaligned SP.  */
6515  if (IS_STACKALIGN (func_type))
6516    return false;
6517
6518  /* The AAPCS says that, on bare-metal, calls to unresolved weak
6519     references should become a NOP.  Don't convert such calls into
6520     sibling calls.  */
6521  if (TARGET_AAPCS_BASED
6522      && arm_abi == ARM_ABI_AAPCS
6523      && decl
6524      && DECL_WEAK (decl))
6525    return false;
6526
6527  /* Everything else is ok.  */
6528  return true;
6529}
6530
6531
6532/* Addressing mode support functions.  */
6533
6534/* Return nonzero if X is a legitimate immediate operand when compiling
6535   for PIC.  We know that X satisfies CONSTANT_P and flag_pic is true.  */
6536int
6537legitimate_pic_operand_p (rtx x)
6538{
6539  if (GET_CODE (x) == SYMBOL_REF
6540      || (GET_CODE (x) == CONST
6541	  && GET_CODE (XEXP (x, 0)) == PLUS
6542	  && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
6543    return 0;
6544
6545  return 1;
6546}
6547
6548/* Record that the current function needs a PIC register.  Initialize
6549   cfun->machine->pic_reg if we have not already done so.  */
6550
6551static void
6552require_pic_register (void)
6553{
6554  /* A lot of the logic here is made obscure by the fact that this
6555     routine gets called as part of the rtx cost estimation process.
6556     We don't want those calls to affect any assumptions about the real
6557     function; and further, we can't call entry_of_function() until we
6558     start the real expansion process.  */
6559  if (!crtl->uses_pic_offset_table)
6560    {
6561      gcc_assert (can_create_pseudo_p ());
6562      if (arm_pic_register != INVALID_REGNUM
6563	  && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
6564	{
6565	  if (!cfun->machine->pic_reg)
6566	    cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
6567
6568	  /* Play games to avoid marking the function as needing pic
6569	     if we are being called as part of the cost-estimation
6570	     process.  */
6571	  if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
6572	    crtl->uses_pic_offset_table = 1;
6573	}
6574      else
6575	{
6576	  rtx_insn *seq, *insn;
6577
6578	  if (!cfun->machine->pic_reg)
6579	    cfun->machine->pic_reg = gen_reg_rtx (Pmode);
6580
6581	  /* Play games to avoid marking the function as needing pic
6582	     if we are being called as part of the cost-estimation
6583	     process.  */
6584	  if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
6585	    {
6586	      crtl->uses_pic_offset_table = 1;
6587	      start_sequence ();
6588
6589	      if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
6590		  && arm_pic_register > LAST_LO_REGNUM)
6591		emit_move_insn (cfun->machine->pic_reg,
6592				gen_rtx_REG (Pmode, arm_pic_register));
6593	      else
6594		arm_load_pic_register (0UL);
6595
6596	      seq = get_insns ();
6597	      end_sequence ();
6598
6599	      for (insn = seq; insn; insn = NEXT_INSN (insn))
6600		if (INSN_P (insn))
6601		  INSN_LOCATION (insn) = prologue_location;
6602
6603	      /* We can be called during expansion of PHI nodes, where
6604	         we can't yet emit instructions directly in the final
6605		 insn stream.  Queue the insns on the entry edge, they will
6606		 be committed after everything else is expanded.  */
6607	      insert_insn_on_edge (seq,
6608				   single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
6609	    }
6610	}
6611    }
6612}
6613
6614rtx
6615legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
6616{
6617  if (GET_CODE (orig) == SYMBOL_REF
6618      || GET_CODE (orig) == LABEL_REF)
6619    {
6620      rtx insn;
6621
6622      if (reg == 0)
6623	{
6624	  gcc_assert (can_create_pseudo_p ());
6625	  reg = gen_reg_rtx (Pmode);
6626	}
6627
6628      /* VxWorks does not impose a fixed gap between segments; the run-time
6629	 gap can be different from the object-file gap.  We therefore can't
6630	 use GOTOFF unless we are absolutely sure that the symbol is in the
6631	 same segment as the GOT.  Unfortunately, the flexibility of linker
6632	 scripts means that we can't be sure of that in general, so assume
6633	 that GOTOFF is never valid on VxWorks.  */
6634      if ((GET_CODE (orig) == LABEL_REF
6635	   || (GET_CODE (orig) == SYMBOL_REF &&
6636	       SYMBOL_REF_LOCAL_P (orig)))
6637	  && NEED_GOT_RELOC
6638	  && arm_pic_data_is_text_relative)
6639	insn = arm_pic_static_addr (orig, reg);
6640      else
6641	{
6642	  rtx pat;
6643	  rtx mem;
6644
6645	  /* If this function doesn't have a pic register, create one now.  */
6646	  require_pic_register ();
6647
6648	  pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
6649
6650	  /* Make the MEM as close to a constant as possible.  */
6651	  mem = SET_SRC (pat);
6652	  gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
6653	  MEM_READONLY_P (mem) = 1;
6654	  MEM_NOTRAP_P (mem) = 1;
6655
6656	  insn = emit_insn (pat);
6657	}
6658
6659      /* Put a REG_EQUAL note on this insn, so that it can be optimized
6660	 by loop.  */
6661      set_unique_reg_note (insn, REG_EQUAL, orig);
6662
6663      return reg;
6664    }
6665  else if (GET_CODE (orig) == CONST)
6666    {
6667      rtx base, offset;
6668
6669      if (GET_CODE (XEXP (orig, 0)) == PLUS
6670	  && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
6671	return orig;
6672
6673      /* Handle the case where we have: const (UNSPEC_TLS).  */
6674      if (GET_CODE (XEXP (orig, 0)) == UNSPEC
6675	  && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
6676	return orig;
6677
6678      /* Handle the case where we have:
6679         const (plus (UNSPEC_TLS) (ADDEND)).  The ADDEND must be a
6680         CONST_INT.  */
6681      if (GET_CODE (XEXP (orig, 0)) == PLUS
6682          && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
6683          && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
6684        {
6685	  gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
6686	  return orig;
6687	}
6688
6689      if (reg == 0)
6690	{
6691	  gcc_assert (can_create_pseudo_p ());
6692	  reg = gen_reg_rtx (Pmode);
6693	}
6694
6695      gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
6696
6697      base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
6698      offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
6699				       base == reg ? 0 : reg);
6700
6701      if (CONST_INT_P (offset))
6702	{
6703	  /* The base register doesn't really matter, we only want to
6704	     test the index for the appropriate mode.  */
6705	  if (!arm_legitimate_index_p (mode, offset, SET, 0))
6706	    {
6707	      gcc_assert (can_create_pseudo_p ());
6708	      offset = force_reg (Pmode, offset);
6709	    }
6710
6711	  if (CONST_INT_P (offset))
6712	    return plus_constant (Pmode, base, INTVAL (offset));
6713	}
6714
6715      if (GET_MODE_SIZE (mode) > 4
6716	  && (GET_MODE_CLASS (mode) == MODE_INT
6717	      || TARGET_SOFT_FLOAT))
6718	{
6719	  emit_insn (gen_addsi3 (reg, base, offset));
6720	  return reg;
6721	}
6722
6723      return gen_rtx_PLUS (Pmode, base, offset);
6724    }
6725
6726  return orig;
6727}
6728
6729
6730/* Find a spare register to use during the prolog of a function.  */
6731
6732static int
6733thumb_find_work_register (unsigned long pushed_regs_mask)
6734{
6735  int reg;
6736
6737  /* Check the argument registers first as these are call-used.  The
6738     register allocation order means that sometimes r3 might be used
6739     but earlier argument registers might not, so check them all.  */
6740  for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
6741    if (!df_regs_ever_live_p (reg))
6742      return reg;
6743
6744  /* Before going on to check the call-saved registers we can try a couple
6745     more ways of deducing that r3 is available.  The first is when we are
6746     pushing anonymous arguments onto the stack and we have less than 4
6747     registers worth of fixed arguments(*).  In this case r3 will be part of
6748     the variable argument list and so we can be sure that it will be
6749     pushed right at the start of the function.  Hence it will be available
6750     for the rest of the prologue.
6751     (*): ie crtl->args.pretend_args_size is greater than 0.  */
6752  if (cfun->machine->uses_anonymous_args
6753      && crtl->args.pretend_args_size > 0)
6754    return LAST_ARG_REGNUM;
6755
6756  /* The other case is when we have fixed arguments but less than 4 registers
6757     worth.  In this case r3 might be used in the body of the function, but
6758     it is not being used to convey an argument into the function.  In theory
6759     we could just check crtl->args.size to see how many bytes are
6760     being passed in argument registers, but it seems that it is unreliable.
6761     Sometimes it will have the value 0 when in fact arguments are being
6762     passed.  (See testcase execute/20021111-1.c for an example).  So we also
6763     check the args_info.nregs field as well.  The problem with this field is
6764     that it makes no allowances for arguments that are passed to the
6765     function but which are not used.  Hence we could miss an opportunity
6766     when a function has an unused argument in r3.  But it is better to be
6767     safe than to be sorry.  */
6768  if (! cfun->machine->uses_anonymous_args
6769      && crtl->args.size >= 0
6770      && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
6771      && (TARGET_AAPCS_BASED
6772	  ? crtl->args.info.aapcs_ncrn < 4
6773	  : crtl->args.info.nregs < 4))
6774    return LAST_ARG_REGNUM;
6775
6776  /* Otherwise look for a call-saved register that is going to be pushed.  */
6777  for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
6778    if (pushed_regs_mask & (1 << reg))
6779      return reg;
6780
6781  if (TARGET_THUMB2)
6782    {
6783      /* Thumb-2 can use high regs.  */
6784      for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
6785	if (pushed_regs_mask & (1 << reg))
6786	  return reg;
6787    }
6788  /* Something went wrong - thumb_compute_save_reg_mask()
6789     should have arranged for a suitable register to be pushed.  */
6790  gcc_unreachable ();
6791}
6792
6793static GTY(()) int pic_labelno;
6794
6795/* Generate code to load the PIC register.  In thumb mode SCRATCH is a
6796   low register.  */
6797
6798void
6799arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
6800{
6801  rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
6802
6803  if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
6804    return;
6805
6806  gcc_assert (flag_pic);
6807
6808  pic_reg = cfun->machine->pic_reg;
6809  if (TARGET_VXWORKS_RTP)
6810    {
6811      pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
6812      pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
6813      emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
6814
6815      emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
6816
6817      pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
6818      emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
6819    }
6820  else
6821    {
6822      /* We use an UNSPEC rather than a LABEL_REF because this label
6823	 never appears in the code stream.  */
6824
6825      labelno = GEN_INT (pic_labelno++);
6826      l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6827      l1 = gen_rtx_CONST (VOIDmode, l1);
6828
6829      /* On the ARM the PC register contains 'dot + 8' at the time of the
6830	 addition, on the Thumb it is 'dot + 4'.  */
6831      pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
6832      pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
6833				UNSPEC_GOTSYM_OFF);
6834      pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
6835
6836      if (TARGET_32BIT)
6837	{
6838	  emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6839	}
6840      else /* TARGET_THUMB1 */
6841	{
6842	  if (arm_pic_register != INVALID_REGNUM
6843	      && REGNO (pic_reg) > LAST_LO_REGNUM)
6844	    {
6845	      /* We will have pushed the pic register, so we should always be
6846		 able to find a work register.  */
6847	      pic_tmp = gen_rtx_REG (SImode,
6848				     thumb_find_work_register (saved_regs));
6849	      emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
6850	      emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
6851	      emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
6852	    }
6853	  else if (arm_pic_register != INVALID_REGNUM
6854		   && arm_pic_register > LAST_LO_REGNUM
6855		   && REGNO (pic_reg) <= LAST_LO_REGNUM)
6856	    {
6857	      emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6858	      emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
6859	      emit_use (gen_rtx_REG (Pmode, arm_pic_register));
6860	    }
6861	  else
6862	    emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6863	}
6864    }
6865
6866  /* Need to emit this whether or not we obey regdecls,
6867     since setjmp/longjmp can cause life info to screw up.  */
6868  emit_use (pic_reg);
6869}
6870
6871/* Generate code to load the address of a static var when flag_pic is set.  */
6872static rtx
6873arm_pic_static_addr (rtx orig, rtx reg)
6874{
6875  rtx l1, labelno, offset_rtx, insn;
6876
6877  gcc_assert (flag_pic);
6878
6879  /* We use an UNSPEC rather than a LABEL_REF because this label
6880     never appears in the code stream.  */
6881  labelno = GEN_INT (pic_labelno++);
6882  l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6883  l1 = gen_rtx_CONST (VOIDmode, l1);
6884
6885  /* On the ARM the PC register contains 'dot + 8' at the time of the
6886     addition, on the Thumb it is 'dot + 4'.  */
6887  offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
6888  offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
6889                               UNSPEC_SYMBOL_OFFSET);
6890  offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
6891
6892  insn = emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
6893  return insn;
6894}
6895
6896/* Return nonzero if X is valid as an ARM state addressing register.  */
6897static int
6898arm_address_register_rtx_p (rtx x, int strict_p)
6899{
6900  int regno;
6901
6902  if (!REG_P (x))
6903    return 0;
6904
6905  regno = REGNO (x);
6906
6907  if (strict_p)
6908    return ARM_REGNO_OK_FOR_BASE_P (regno);
6909
6910  return (regno <= LAST_ARM_REGNUM
6911	  || regno >= FIRST_PSEUDO_REGISTER
6912	  || regno == FRAME_POINTER_REGNUM
6913	  || regno == ARG_POINTER_REGNUM);
6914}
6915
6916/* Return TRUE if this rtx is the difference of a symbol and a label,
6917   and will reduce to a PC-relative relocation in the object file.
6918   Expressions like this can be left alone when generating PIC, rather
6919   than forced through the GOT.  */
6920static int
6921pcrel_constant_p (rtx x)
6922{
6923  if (GET_CODE (x) == MINUS)
6924    return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
6925
6926  return FALSE;
6927}
6928
6929/* Return true if X will surely end up in an index register after next
6930   splitting pass.  */
6931static bool
6932will_be_in_index_register (const_rtx x)
6933{
6934  /* arm.md: calculate_pic_address will split this into a register.  */
6935  return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
6936}
6937
6938/* Return nonzero if X is a valid ARM state address operand.  */
6939int
6940arm_legitimate_address_outer_p (machine_mode mode, rtx x, RTX_CODE outer,
6941			        int strict_p)
6942{
6943  bool use_ldrd;
6944  enum rtx_code code = GET_CODE (x);
6945
6946  if (arm_address_register_rtx_p (x, strict_p))
6947    return 1;
6948
6949  use_ldrd = (TARGET_LDRD
6950	      && (mode == DImode
6951		  || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
6952
6953  if (code == POST_INC || code == PRE_DEC
6954      || ((code == PRE_INC || code == POST_DEC)
6955	  && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
6956    return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
6957
6958  else if ((code == POST_MODIFY || code == PRE_MODIFY)
6959	   && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
6960	   && GET_CODE (XEXP (x, 1)) == PLUS
6961	   && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
6962    {
6963      rtx addend = XEXP (XEXP (x, 1), 1);
6964
6965      /* Don't allow ldrd post increment by register because it's hard
6966	 to fixup invalid register choices.  */
6967      if (use_ldrd
6968	  && GET_CODE (x) == POST_MODIFY
6969	  && REG_P (addend))
6970	return 0;
6971
6972      return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
6973	      && arm_legitimate_index_p (mode, addend, outer, strict_p));
6974    }
6975
6976  /* After reload constants split into minipools will have addresses
6977     from a LABEL_REF.  */
6978  else if (reload_completed
6979	   && (code == LABEL_REF
6980	       || (code == CONST
6981		   && GET_CODE (XEXP (x, 0)) == PLUS
6982		   && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
6983		   && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
6984    return 1;
6985
6986  else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
6987    return 0;
6988
6989  else if (code == PLUS)
6990    {
6991      rtx xop0 = XEXP (x, 0);
6992      rtx xop1 = XEXP (x, 1);
6993
6994      return ((arm_address_register_rtx_p (xop0, strict_p)
6995	       && ((CONST_INT_P (xop1)
6996		    && arm_legitimate_index_p (mode, xop1, outer, strict_p))
6997		   || (!strict_p && will_be_in_index_register (xop1))))
6998	      || (arm_address_register_rtx_p (xop1, strict_p)
6999		  && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
7000    }
7001
7002#if 0
7003  /* Reload currently can't handle MINUS, so disable this for now */
7004  else if (GET_CODE (x) == MINUS)
7005    {
7006      rtx xop0 = XEXP (x, 0);
7007      rtx xop1 = XEXP (x, 1);
7008
7009      return (arm_address_register_rtx_p (xop0, strict_p)
7010	      && arm_legitimate_index_p (mode, xop1, outer, strict_p));
7011    }
7012#endif
7013
7014  else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7015	   && code == SYMBOL_REF
7016	   && CONSTANT_POOL_ADDRESS_P (x)
7017	   && ! (flag_pic
7018		 && symbol_mentioned_p (get_pool_constant (x))
7019		 && ! pcrel_constant_p (get_pool_constant (x))))
7020    return 1;
7021
7022  return 0;
7023}
7024
7025/* Return nonzero if X is a valid Thumb-2 address operand.  */
7026static int
7027thumb2_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
7028{
7029  bool use_ldrd;
7030  enum rtx_code code = GET_CODE (x);
7031
7032  if (arm_address_register_rtx_p (x, strict_p))
7033    return 1;
7034
7035  use_ldrd = (TARGET_LDRD
7036	      && (mode == DImode
7037		  || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
7038
7039  if (code == POST_INC || code == PRE_DEC
7040      || ((code == PRE_INC || code == POST_DEC)
7041	  && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
7042    return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
7043
7044  else if ((code == POST_MODIFY || code == PRE_MODIFY)
7045	   && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
7046	   && GET_CODE (XEXP (x, 1)) == PLUS
7047	   && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7048    {
7049      /* Thumb-2 only has autoincrement by constant.  */
7050      rtx addend = XEXP (XEXP (x, 1), 1);
7051      HOST_WIDE_INT offset;
7052
7053      if (!CONST_INT_P (addend))
7054	return 0;
7055
7056      offset = INTVAL(addend);
7057      if (GET_MODE_SIZE (mode) <= 4)
7058	return (offset > -256 && offset < 256);
7059
7060      return (use_ldrd && offset > -1024 && offset < 1024
7061	      && (offset & 3) == 0);
7062    }
7063
7064  /* After reload constants split into minipools will have addresses
7065     from a LABEL_REF.  */
7066  else if (reload_completed
7067	   && (code == LABEL_REF
7068	       || (code == CONST
7069		   && GET_CODE (XEXP (x, 0)) == PLUS
7070		   && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7071		   && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7072    return 1;
7073
7074  else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7075    return 0;
7076
7077  else if (code == PLUS)
7078    {
7079      rtx xop0 = XEXP (x, 0);
7080      rtx xop1 = XEXP (x, 1);
7081
7082      return ((arm_address_register_rtx_p (xop0, strict_p)
7083	       && (thumb2_legitimate_index_p (mode, xop1, strict_p)
7084		   || (!strict_p && will_be_in_index_register (xop1))))
7085	      || (arm_address_register_rtx_p (xop1, strict_p)
7086		  && thumb2_legitimate_index_p (mode, xop0, strict_p)));
7087    }
7088
7089  /* Normally we can assign constant values to target registers without
7090     the help of constant pool.  But there are cases we have to use constant
7091     pool like:
7092     1) assign a label to register.
7093     2) sign-extend a 8bit value to 32bit and then assign to register.
7094
7095     Constant pool access in format:
7096     (set (reg r0) (mem (symbol_ref (".LC0"))))
7097     will cause the use of literal pool (later in function arm_reorg).
7098     So here we mark such format as an invalid format, then the compiler
7099     will adjust it into:
7100     (set (reg r0) (symbol_ref (".LC0")))
7101     (set (reg r0) (mem (reg r0))).
7102     No extra register is required, and (mem (reg r0)) won't cause the use
7103     of literal pools.  */
7104  else if (arm_disable_literal_pool && code == SYMBOL_REF
7105	   && CONSTANT_POOL_ADDRESS_P (x))
7106    return 0;
7107
7108  else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7109	   && code == SYMBOL_REF
7110	   && CONSTANT_POOL_ADDRESS_P (x)
7111	   && ! (flag_pic
7112		 && symbol_mentioned_p (get_pool_constant (x))
7113		 && ! pcrel_constant_p (get_pool_constant (x))))
7114    return 1;
7115
7116  return 0;
7117}
7118
7119/* Return nonzero if INDEX is valid for an address index operand in
7120   ARM state.  */
7121static int
7122arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
7123			int strict_p)
7124{
7125  HOST_WIDE_INT range;
7126  enum rtx_code code = GET_CODE (index);
7127
7128  /* Standard coprocessor addressing modes.  */
7129  if (TARGET_HARD_FLOAT
7130      && TARGET_VFP
7131      && (mode == SFmode || mode == DFmode))
7132    return (code == CONST_INT && INTVAL (index) < 1024
7133	    && INTVAL (index) > -1024
7134	    && (INTVAL (index) & 3) == 0);
7135
7136  /* For quad modes, we restrict the constant offset to be slightly less
7137     than what the instruction format permits.  We do this because for
7138     quad mode moves, we will actually decompose them into two separate
7139     double-mode reads or writes.  INDEX must therefore be a valid
7140     (double-mode) offset and so should INDEX+8.  */
7141  if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7142    return (code == CONST_INT
7143	    && INTVAL (index) < 1016
7144	    && INTVAL (index) > -1024
7145	    && (INTVAL (index) & 3) == 0);
7146
7147  /* We have no such constraint on double mode offsets, so we permit the
7148     full range of the instruction format.  */
7149  if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7150    return (code == CONST_INT
7151	    && INTVAL (index) < 1024
7152	    && INTVAL (index) > -1024
7153	    && (INTVAL (index) & 3) == 0);
7154
7155  if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7156    return (code == CONST_INT
7157	    && INTVAL (index) < 1024
7158	    && INTVAL (index) > -1024
7159	    && (INTVAL (index) & 3) == 0);
7160
7161  if (arm_address_register_rtx_p (index, strict_p)
7162      && (GET_MODE_SIZE (mode) <= 4))
7163    return 1;
7164
7165  if (mode == DImode || mode == DFmode)
7166    {
7167      if (code == CONST_INT)
7168	{
7169	  HOST_WIDE_INT val = INTVAL (index);
7170
7171	  if (TARGET_LDRD)
7172	    return val > -256 && val < 256;
7173	  else
7174	    return val > -4096 && val < 4092;
7175	}
7176
7177      return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
7178    }
7179
7180  if (GET_MODE_SIZE (mode) <= 4
7181      && ! (arm_arch4
7182	    && (mode == HImode
7183		|| mode == HFmode
7184		|| (mode == QImode && outer == SIGN_EXTEND))))
7185    {
7186      if (code == MULT)
7187	{
7188	  rtx xiop0 = XEXP (index, 0);
7189	  rtx xiop1 = XEXP (index, 1);
7190
7191	  return ((arm_address_register_rtx_p (xiop0, strict_p)
7192		   && power_of_two_operand (xiop1, SImode))
7193		  || (arm_address_register_rtx_p (xiop1, strict_p)
7194		      && power_of_two_operand (xiop0, SImode)));
7195	}
7196      else if (code == LSHIFTRT || code == ASHIFTRT
7197	       || code == ASHIFT || code == ROTATERT)
7198	{
7199	  rtx op = XEXP (index, 1);
7200
7201	  return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
7202		  && CONST_INT_P (op)
7203		  && INTVAL (op) > 0
7204		  && INTVAL (op) <= 31);
7205	}
7206    }
7207
7208  /* For ARM v4 we may be doing a sign-extend operation during the
7209     load.  */
7210  if (arm_arch4)
7211    {
7212      if (mode == HImode
7213	  || mode == HFmode
7214	  || (outer == SIGN_EXTEND && mode == QImode))
7215	range = 256;
7216      else
7217	range = 4096;
7218    }
7219  else
7220    range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
7221
7222  return (code == CONST_INT
7223	  && INTVAL (index) < range
7224	  && INTVAL (index) > -range);
7225}
7226
7227/* Return true if OP is a valid index scaling factor for Thumb-2 address
7228   index operand.  i.e. 1, 2, 4 or 8.  */
7229static bool
7230thumb2_index_mul_operand (rtx op)
7231{
7232  HOST_WIDE_INT val;
7233
7234  if (!CONST_INT_P (op))
7235    return false;
7236
7237  val = INTVAL(op);
7238  return (val == 1 || val == 2 || val == 4 || val == 8);
7239}
7240
7241/* Return nonzero if INDEX is a valid Thumb-2 address index operand.  */
7242static int
7243thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
7244{
7245  enum rtx_code code = GET_CODE (index);
7246
7247  /* ??? Combine arm and thumb2 coprocessor addressing modes.  */
7248  /* Standard coprocessor addressing modes.  */
7249  if (TARGET_HARD_FLOAT
7250      && TARGET_VFP
7251      && (mode == SFmode || mode == DFmode))
7252    return (code == CONST_INT && INTVAL (index) < 1024
7253	    /* Thumb-2 allows only > -256 index range for it's core register
7254	       load/stores. Since we allow SF/DF in core registers, we have
7255	       to use the intersection between -256~4096 (core) and -1024~1024
7256	       (coprocessor).  */
7257	    && INTVAL (index) > -256
7258	    && (INTVAL (index) & 3) == 0);
7259
7260  if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7261    {
7262      /* For DImode assume values will usually live in core regs
7263	 and only allow LDRD addressing modes.  */
7264      if (!TARGET_LDRD || mode != DImode)
7265	return (code == CONST_INT
7266		&& INTVAL (index) < 1024
7267		&& INTVAL (index) > -1024
7268		&& (INTVAL (index) & 3) == 0);
7269    }
7270
7271  /* For quad modes, we restrict the constant offset to be slightly less
7272     than what the instruction format permits.  We do this because for
7273     quad mode moves, we will actually decompose them into two separate
7274     double-mode reads or writes.  INDEX must therefore be a valid
7275     (double-mode) offset and so should INDEX+8.  */
7276  if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7277    return (code == CONST_INT
7278	    && INTVAL (index) < 1016
7279	    && INTVAL (index) > -1024
7280	    && (INTVAL (index) & 3) == 0);
7281
7282  /* We have no such constraint on double mode offsets, so we permit the
7283     full range of the instruction format.  */
7284  if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7285    return (code == CONST_INT
7286	    && INTVAL (index) < 1024
7287	    && INTVAL (index) > -1024
7288	    && (INTVAL (index) & 3) == 0);
7289
7290  if (arm_address_register_rtx_p (index, strict_p)
7291      && (GET_MODE_SIZE (mode) <= 4))
7292    return 1;
7293
7294  if (mode == DImode || mode == DFmode)
7295    {
7296      if (code == CONST_INT)
7297	{
7298	  HOST_WIDE_INT val = INTVAL (index);
7299	  /* ??? Can we assume ldrd for thumb2?  */
7300	  /* Thumb-2 ldrd only has reg+const addressing modes.  */
7301	  /* ldrd supports offsets of +-1020.
7302	     However the ldr fallback does not.  */
7303	  return val > -256 && val < 256 && (val & 3) == 0;
7304	}
7305      else
7306	return 0;
7307    }
7308
7309  if (code == MULT)
7310    {
7311      rtx xiop0 = XEXP (index, 0);
7312      rtx xiop1 = XEXP (index, 1);
7313
7314      return ((arm_address_register_rtx_p (xiop0, strict_p)
7315	       && thumb2_index_mul_operand (xiop1))
7316	      || (arm_address_register_rtx_p (xiop1, strict_p)
7317		  && thumb2_index_mul_operand (xiop0)));
7318    }
7319  else if (code == ASHIFT)
7320    {
7321      rtx op = XEXP (index, 1);
7322
7323      return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
7324	      && CONST_INT_P (op)
7325	      && INTVAL (op) > 0
7326	      && INTVAL (op) <= 3);
7327    }
7328
7329  return (code == CONST_INT
7330	  && INTVAL (index) < 4096
7331	  && INTVAL (index) > -256);
7332}
7333
7334/* Return nonzero if X is valid as a 16-bit Thumb state base register.  */
7335static int
7336thumb1_base_register_rtx_p (rtx x, machine_mode mode, int strict_p)
7337{
7338  int regno;
7339
7340  if (!REG_P (x))
7341    return 0;
7342
7343  regno = REGNO (x);
7344
7345  if (strict_p)
7346    return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
7347
7348  return (regno <= LAST_LO_REGNUM
7349	  || regno > LAST_VIRTUAL_REGISTER
7350	  || regno == FRAME_POINTER_REGNUM
7351	  || (GET_MODE_SIZE (mode) >= 4
7352	      && (regno == STACK_POINTER_REGNUM
7353		  || regno >= FIRST_PSEUDO_REGISTER
7354		  || x == hard_frame_pointer_rtx
7355		  || x == arg_pointer_rtx)));
7356}
7357
7358/* Return nonzero if x is a legitimate index register.  This is the case
7359   for any base register that can access a QImode object.  */
7360inline static int
7361thumb1_index_register_rtx_p (rtx x, int strict_p)
7362{
7363  return thumb1_base_register_rtx_p (x, QImode, strict_p);
7364}
7365
7366/* Return nonzero if x is a legitimate 16-bit Thumb-state address.
7367
7368   The AP may be eliminated to either the SP or the FP, so we use the
7369   least common denominator, e.g. SImode, and offsets from 0 to 64.
7370
7371   ??? Verify whether the above is the right approach.
7372
7373   ??? Also, the FP may be eliminated to the SP, so perhaps that
7374   needs special handling also.
7375
7376   ??? Look at how the mips16 port solves this problem.  It probably uses
7377   better ways to solve some of these problems.
7378
7379   Although it is not incorrect, we don't accept QImode and HImode
7380   addresses based on the frame pointer or arg pointer until the
7381   reload pass starts.  This is so that eliminating such addresses
7382   into stack based ones won't produce impossible code.  */
7383int
7384thumb1_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
7385{
7386  /* ??? Not clear if this is right.  Experiment.  */
7387  if (GET_MODE_SIZE (mode) < 4
7388      && !(reload_in_progress || reload_completed)
7389      && (reg_mentioned_p (frame_pointer_rtx, x)
7390	  || reg_mentioned_p (arg_pointer_rtx, x)
7391	  || reg_mentioned_p (virtual_incoming_args_rtx, x)
7392	  || reg_mentioned_p (virtual_outgoing_args_rtx, x)
7393	  || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
7394	  || reg_mentioned_p (virtual_stack_vars_rtx, x)))
7395    return 0;
7396
7397  /* Accept any base register.  SP only in SImode or larger.  */
7398  else if (thumb1_base_register_rtx_p (x, mode, strict_p))
7399    return 1;
7400
7401  /* This is PC relative data before arm_reorg runs.  */
7402  else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
7403	   && GET_CODE (x) == SYMBOL_REF
7404           && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
7405    return 1;
7406
7407  /* This is PC relative data after arm_reorg runs.  */
7408  else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
7409	   && reload_completed
7410	   && (GET_CODE (x) == LABEL_REF
7411	       || (GET_CODE (x) == CONST
7412		   && GET_CODE (XEXP (x, 0)) == PLUS
7413		   && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7414		   && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7415    return 1;
7416
7417  /* Post-inc indexing only supported for SImode and larger.  */
7418  else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
7419	   && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
7420    return 1;
7421
7422  else if (GET_CODE (x) == PLUS)
7423    {
7424      /* REG+REG address can be any two index registers.  */
7425      /* We disallow FRAME+REG addressing since we know that FRAME
7426	 will be replaced with STACK, and SP relative addressing only
7427	 permits SP+OFFSET.  */
7428      if (GET_MODE_SIZE (mode) <= 4
7429	  && XEXP (x, 0) != frame_pointer_rtx
7430	  && XEXP (x, 1) != frame_pointer_rtx
7431	  && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
7432	  && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
7433	      || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
7434	return 1;
7435
7436      /* REG+const has 5-7 bit offset for non-SP registers.  */
7437      else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
7438		|| XEXP (x, 0) == arg_pointer_rtx)
7439	       && CONST_INT_P (XEXP (x, 1))
7440	       && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
7441	return 1;
7442
7443      /* REG+const has 10-bit offset for SP, but only SImode and
7444	 larger is supported.  */
7445      /* ??? Should probably check for DI/DFmode overflow here
7446	 just like GO_IF_LEGITIMATE_OFFSET does.  */
7447      else if (REG_P (XEXP (x, 0))
7448	       && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
7449	       && GET_MODE_SIZE (mode) >= 4
7450	       && CONST_INT_P (XEXP (x, 1))
7451	       && INTVAL (XEXP (x, 1)) >= 0
7452	       && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
7453	       && (INTVAL (XEXP (x, 1)) & 3) == 0)
7454	return 1;
7455
7456      else if (REG_P (XEXP (x, 0))
7457	       && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
7458		   || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
7459		   || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
7460		       && REGNO (XEXP (x, 0))
7461			  <= LAST_VIRTUAL_POINTER_REGISTER))
7462	       && GET_MODE_SIZE (mode) >= 4
7463	       && CONST_INT_P (XEXP (x, 1))
7464	       && (INTVAL (XEXP (x, 1)) & 3) == 0)
7465	return 1;
7466    }
7467
7468  else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7469	   && GET_MODE_SIZE (mode) == 4
7470	   && GET_CODE (x) == SYMBOL_REF
7471	   && CONSTANT_POOL_ADDRESS_P (x)
7472	   && ! (flag_pic
7473		 && symbol_mentioned_p (get_pool_constant (x))
7474		 && ! pcrel_constant_p (get_pool_constant (x))))
7475    return 1;
7476
7477  return 0;
7478}
7479
7480/* Return nonzero if VAL can be used as an offset in a Thumb-state address
7481   instruction of mode MODE.  */
7482int
7483thumb_legitimate_offset_p (machine_mode mode, HOST_WIDE_INT val)
7484{
7485  switch (GET_MODE_SIZE (mode))
7486    {
7487    case 1:
7488      return val >= 0 && val < 32;
7489
7490    case 2:
7491      return val >= 0 && val < 64 && (val & 1) == 0;
7492
7493    default:
7494      return (val >= 0
7495	      && (val + GET_MODE_SIZE (mode)) <= 128
7496	      && (val & 3) == 0);
7497    }
7498}
7499
7500bool
7501arm_legitimate_address_p (machine_mode mode, rtx x, bool strict_p)
7502{
7503  if (TARGET_ARM)
7504    return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
7505  else if (TARGET_THUMB2)
7506    return thumb2_legitimate_address_p (mode, x, strict_p);
7507  else /* if (TARGET_THUMB1) */
7508    return thumb1_legitimate_address_p (mode, x, strict_p);
7509}
7510
7511/* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
7512
7513   Given an rtx X being reloaded into a reg required to be
7514   in class CLASS, return the class of reg to actually use.
7515   In general this is just CLASS, but for the Thumb core registers and
7516   immediate constants we prefer a LO_REGS class or a subset.  */
7517
7518static reg_class_t
7519arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
7520{
7521  if (TARGET_32BIT)
7522    return rclass;
7523  else
7524    {
7525      if (rclass == GENERAL_REGS)
7526	return LO_REGS;
7527      else
7528	return rclass;
7529    }
7530}
7531
7532/* Build the SYMBOL_REF for __tls_get_addr.  */
7533
7534static GTY(()) rtx tls_get_addr_libfunc;
7535
7536static rtx
7537get_tls_get_addr (void)
7538{
7539  if (!tls_get_addr_libfunc)
7540    tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
7541  return tls_get_addr_libfunc;
7542}
7543
7544rtx
7545arm_load_tp (rtx target)
7546{
7547  if (!target)
7548    target = gen_reg_rtx (SImode);
7549
7550  if (TARGET_HARD_TP)
7551    {
7552      /* Can return in any reg.  */
7553      emit_insn (gen_load_tp_hard (target));
7554    }
7555  else
7556    {
7557      /* Always returned in r0.  Immediately copy the result into a pseudo,
7558	 otherwise other uses of r0 (e.g. setting up function arguments) may
7559	 clobber the value.  */
7560
7561      rtx tmp;
7562
7563      emit_insn (gen_load_tp_soft ());
7564
7565      tmp = gen_rtx_REG (SImode, R0_REGNUM);
7566      emit_move_insn (target, tmp);
7567    }
7568  return target;
7569}
7570
7571static rtx
7572load_tls_operand (rtx x, rtx reg)
7573{
7574  rtx tmp;
7575
7576  if (reg == NULL_RTX)
7577    reg = gen_reg_rtx (SImode);
7578
7579  tmp = gen_rtx_CONST (SImode, x);
7580
7581  emit_move_insn (reg, tmp);
7582
7583  return reg;
7584}
7585
7586static rtx
7587arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
7588{
7589  rtx insns, label, labelno, sum;
7590
7591  gcc_assert (reloc != TLS_DESCSEQ);
7592  start_sequence ();
7593
7594  labelno = GEN_INT (pic_labelno++);
7595  label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7596  label = gen_rtx_CONST (VOIDmode, label);
7597
7598  sum = gen_rtx_UNSPEC (Pmode,
7599			gen_rtvec (4, x, GEN_INT (reloc), label,
7600				   GEN_INT (TARGET_ARM ? 8 : 4)),
7601			UNSPEC_TLS);
7602  reg = load_tls_operand (sum, reg);
7603
7604  if (TARGET_ARM)
7605    emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
7606  else
7607    emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
7608
7609  *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
7610				     LCT_PURE, /* LCT_CONST?  */
7611				     Pmode, 1, reg, Pmode);
7612
7613  insns = get_insns ();
7614  end_sequence ();
7615
7616  return insns;
7617}
7618
7619static rtx
7620arm_tls_descseq_addr (rtx x, rtx reg)
7621{
7622  rtx labelno = GEN_INT (pic_labelno++);
7623  rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7624  rtx sum = gen_rtx_UNSPEC (Pmode,
7625			    gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
7626				       gen_rtx_CONST (VOIDmode, label),
7627				       GEN_INT (!TARGET_ARM)),
7628			    UNSPEC_TLS);
7629  rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, R0_REGNUM));
7630
7631  emit_insn (gen_tlscall (x, labelno));
7632  if (!reg)
7633    reg = gen_reg_rtx (SImode);
7634  else
7635    gcc_assert (REGNO (reg) != R0_REGNUM);
7636
7637  emit_move_insn (reg, reg0);
7638
7639  return reg;
7640}
7641
7642rtx
7643legitimize_tls_address (rtx x, rtx reg)
7644{
7645  rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
7646  unsigned int model = SYMBOL_REF_TLS_MODEL (x);
7647
7648  switch (model)
7649    {
7650    case TLS_MODEL_GLOBAL_DYNAMIC:
7651      if (TARGET_GNU2_TLS)
7652	{
7653	  reg = arm_tls_descseq_addr (x, reg);
7654
7655	  tp = arm_load_tp (NULL_RTX);
7656
7657	  dest = gen_rtx_PLUS (Pmode, tp, reg);
7658	}
7659      else
7660	{
7661	  /* Original scheme */
7662	  insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
7663	  dest = gen_reg_rtx (Pmode);
7664	  emit_libcall_block (insns, dest, ret, x);
7665	}
7666      return dest;
7667
7668    case TLS_MODEL_LOCAL_DYNAMIC:
7669      if (TARGET_GNU2_TLS)
7670	{
7671	  reg = arm_tls_descseq_addr (x, reg);
7672
7673	  tp = arm_load_tp (NULL_RTX);
7674
7675	  dest = gen_rtx_PLUS (Pmode, tp, reg);
7676	}
7677      else
7678	{
7679	  insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
7680
7681	  /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
7682	     share the LDM result with other LD model accesses.  */
7683	  eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
7684				UNSPEC_TLS);
7685	  dest = gen_reg_rtx (Pmode);
7686	  emit_libcall_block (insns, dest, ret, eqv);
7687
7688	  /* Load the addend.  */
7689	  addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
7690						     GEN_INT (TLS_LDO32)),
7691				   UNSPEC_TLS);
7692	  addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
7693	  dest = gen_rtx_PLUS (Pmode, dest, addend);
7694	}
7695      return dest;
7696
7697    case TLS_MODEL_INITIAL_EXEC:
7698      labelno = GEN_INT (pic_labelno++);
7699      label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7700      label = gen_rtx_CONST (VOIDmode, label);
7701      sum = gen_rtx_UNSPEC (Pmode,
7702			    gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
7703				       GEN_INT (TARGET_ARM ? 8 : 4)),
7704			    UNSPEC_TLS);
7705      reg = load_tls_operand (sum, reg);
7706
7707      if (TARGET_ARM)
7708	emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
7709      else if (TARGET_THUMB2)
7710	emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
7711      else
7712	{
7713	  emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
7714	  emit_move_insn (reg, gen_const_mem (SImode, reg));
7715	}
7716
7717      tp = arm_load_tp (NULL_RTX);
7718
7719      return gen_rtx_PLUS (Pmode, tp, reg);
7720
7721    case TLS_MODEL_LOCAL_EXEC:
7722      tp = arm_load_tp (NULL_RTX);
7723
7724      reg = gen_rtx_UNSPEC (Pmode,
7725			    gen_rtvec (2, x, GEN_INT (TLS_LE32)),
7726			    UNSPEC_TLS);
7727      reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
7728
7729      return gen_rtx_PLUS (Pmode, tp, reg);
7730
7731    default:
7732      abort ();
7733    }
7734}
7735
7736/* Try machine-dependent ways of modifying an illegitimate address
7737   to be legitimate.  If we find one, return the new, valid address.  */
7738rtx
7739arm_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
7740{
7741  if (arm_tls_referenced_p (x))
7742    {
7743      rtx addend = NULL;
7744
7745      if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
7746	{
7747	  addend = XEXP (XEXP (x, 0), 1);
7748	  x = XEXP (XEXP (x, 0), 0);
7749	}
7750
7751      if (GET_CODE (x) != SYMBOL_REF)
7752	return x;
7753
7754      gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
7755
7756      x = legitimize_tls_address (x, NULL_RTX);
7757
7758      if (addend)
7759	{
7760	  x = gen_rtx_PLUS (SImode, x, addend);
7761	  orig_x = x;
7762	}
7763      else
7764	return x;
7765    }
7766
7767  if (!TARGET_ARM)
7768    {
7769      /* TODO: legitimize_address for Thumb2.  */
7770      if (TARGET_THUMB2)
7771        return x;
7772      return thumb_legitimize_address (x, orig_x, mode);
7773    }
7774
7775  if (GET_CODE (x) == PLUS)
7776    {
7777      rtx xop0 = XEXP (x, 0);
7778      rtx xop1 = XEXP (x, 1);
7779
7780      if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
7781	xop0 = force_reg (SImode, xop0);
7782
7783      if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
7784	  && !symbol_mentioned_p (xop1))
7785	xop1 = force_reg (SImode, xop1);
7786
7787      if (ARM_BASE_REGISTER_RTX_P (xop0)
7788	  && CONST_INT_P (xop1))
7789	{
7790	  HOST_WIDE_INT n, low_n;
7791	  rtx base_reg, val;
7792	  n = INTVAL (xop1);
7793
7794	  /* VFP addressing modes actually allow greater offsets, but for
7795	     now we just stick with the lowest common denominator.  */
7796	  if (mode == DImode
7797	      || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
7798	    {
7799	      low_n = n & 0x0f;
7800	      n &= ~0x0f;
7801	      if (low_n > 4)
7802		{
7803		  n += 16;
7804		  low_n -= 16;
7805		}
7806	    }
7807	  else
7808	    {
7809	      low_n = ((mode) == TImode ? 0
7810		       : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
7811	      n -= low_n;
7812	    }
7813
7814	  base_reg = gen_reg_rtx (SImode);
7815	  val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
7816	  emit_move_insn (base_reg, val);
7817	  x = plus_constant (Pmode, base_reg, low_n);
7818	}
7819      else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
7820	x = gen_rtx_PLUS (SImode, xop0, xop1);
7821    }
7822
7823  /* XXX We don't allow MINUS any more -- see comment in
7824     arm_legitimate_address_outer_p ().  */
7825  else if (GET_CODE (x) == MINUS)
7826    {
7827      rtx xop0 = XEXP (x, 0);
7828      rtx xop1 = XEXP (x, 1);
7829
7830      if (CONSTANT_P (xop0))
7831	xop0 = force_reg (SImode, xop0);
7832
7833      if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
7834	xop1 = force_reg (SImode, xop1);
7835
7836      if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
7837	x = gen_rtx_MINUS (SImode, xop0, xop1);
7838    }
7839
7840  /* Make sure to take full advantage of the pre-indexed addressing mode
7841     with absolute addresses which often allows for the base register to
7842     be factorized for multiple adjacent memory references, and it might
7843     even allows for the mini pool to be avoided entirely. */
7844  else if (CONST_INT_P (x) && optimize > 0)
7845    {
7846      unsigned int bits;
7847      HOST_WIDE_INT mask, base, index;
7848      rtx base_reg;
7849
7850      /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
7851         use a 8-bit index. So let's use a 12-bit index for SImode only and
7852         hope that arm_gen_constant will enable ldrb to use more bits. */
7853      bits = (mode == SImode) ? 12 : 8;
7854      mask = (1 << bits) - 1;
7855      base = INTVAL (x) & ~mask;
7856      index = INTVAL (x) & mask;
7857      if (bit_count (base & 0xffffffff) > (32 - bits)/2)
7858        {
7859	  /* It'll most probably be more efficient to generate the base
7860	     with more bits set and use a negative index instead. */
7861	  base |= mask;
7862	  index -= mask;
7863	}
7864      base_reg = force_reg (SImode, GEN_INT (base));
7865      x = plus_constant (Pmode, base_reg, index);
7866    }
7867
7868  if (flag_pic)
7869    {
7870      /* We need to find and carefully transform any SYMBOL and LABEL
7871	 references; so go back to the original address expression.  */
7872      rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
7873
7874      if (new_x != orig_x)
7875	x = new_x;
7876    }
7877
7878  return x;
7879}
7880
7881
7882/* Try machine-dependent ways of modifying an illegitimate Thumb address
7883   to be legitimate.  If we find one, return the new, valid address.  */
7884rtx
7885thumb_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
7886{
7887  if (GET_CODE (x) == PLUS
7888      && CONST_INT_P (XEXP (x, 1))
7889      && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
7890	  || INTVAL (XEXP (x, 1)) < 0))
7891    {
7892      rtx xop0 = XEXP (x, 0);
7893      rtx xop1 = XEXP (x, 1);
7894      HOST_WIDE_INT offset = INTVAL (xop1);
7895
7896      /* Try and fold the offset into a biasing of the base register and
7897	 then offsetting that.  Don't do this when optimizing for space
7898	 since it can cause too many CSEs.  */
7899      if (optimize_size && offset >= 0
7900	  && offset < 256 + 31 * GET_MODE_SIZE (mode))
7901	{
7902	  HOST_WIDE_INT delta;
7903
7904	  if (offset >= 256)
7905	    delta = offset - (256 - GET_MODE_SIZE (mode));
7906	  else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
7907	    delta = 31 * GET_MODE_SIZE (mode);
7908	  else
7909	    delta = offset & (~31 * GET_MODE_SIZE (mode));
7910
7911	  xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
7912				NULL_RTX);
7913	  x = plus_constant (Pmode, xop0, delta);
7914	}
7915      else if (offset < 0 && offset > -256)
7916	/* Small negative offsets are best done with a subtract before the
7917	   dereference, forcing these into a register normally takes two
7918	   instructions.  */
7919	x = force_operand (x, NULL_RTX);
7920      else
7921	{
7922	  /* For the remaining cases, force the constant into a register.  */
7923	  xop1 = force_reg (SImode, xop1);
7924	  x = gen_rtx_PLUS (SImode, xop0, xop1);
7925	}
7926    }
7927  else if (GET_CODE (x) == PLUS
7928	   && s_register_operand (XEXP (x, 1), SImode)
7929	   && !s_register_operand (XEXP (x, 0), SImode))
7930    {
7931      rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
7932
7933      x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
7934    }
7935
7936  if (flag_pic)
7937    {
7938      /* We need to find and carefully transform any SYMBOL and LABEL
7939	 references; so go back to the original address expression.  */
7940      rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
7941
7942      if (new_x != orig_x)
7943	x = new_x;
7944    }
7945
7946  return x;
7947}
7948
7949bool
7950arm_legitimize_reload_address (rtx *p,
7951			       machine_mode mode,
7952			       int opnum, int type,
7953			       int ind_levels ATTRIBUTE_UNUSED)
7954{
7955  /* We must recognize output that we have already generated ourselves.  */
7956  if (GET_CODE (*p) == PLUS
7957      && GET_CODE (XEXP (*p, 0)) == PLUS
7958      && REG_P (XEXP (XEXP (*p, 0), 0))
7959      && CONST_INT_P (XEXP (XEXP (*p, 0), 1))
7960      && CONST_INT_P (XEXP (*p, 1)))
7961    {
7962      push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
7963		   MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
7964		   VOIDmode, 0, 0, opnum, (enum reload_type) type);
7965      return true;
7966    }
7967
7968  if (GET_CODE (*p) == PLUS
7969      && REG_P (XEXP (*p, 0))
7970      && ARM_REGNO_OK_FOR_BASE_P (REGNO (XEXP (*p, 0)))
7971      /* If the base register is equivalent to a constant, let the generic
7972	 code handle it.  Otherwise we will run into problems if a future
7973	 reload pass decides to rematerialize the constant.  */
7974      && !reg_equiv_constant (ORIGINAL_REGNO (XEXP (*p, 0)))
7975      && CONST_INT_P (XEXP (*p, 1)))
7976    {
7977      HOST_WIDE_INT val = INTVAL (XEXP (*p, 1));
7978      HOST_WIDE_INT low, high;
7979
7980      /* Detect coprocessor load/stores.  */
7981      bool coproc_p = ((TARGET_HARD_FLOAT
7982			&& TARGET_VFP
7983			&& (mode == SFmode || mode == DFmode))
7984		       || (TARGET_REALLY_IWMMXT
7985			   && VALID_IWMMXT_REG_MODE (mode))
7986		       || (TARGET_NEON
7987			   && (VALID_NEON_DREG_MODE (mode)
7988			       || VALID_NEON_QREG_MODE (mode))));
7989
7990      /* For some conditions, bail out when lower two bits are unaligned.  */
7991      if ((val & 0x3) != 0
7992	  /* Coprocessor load/store indexes are 8-bits + '00' appended.  */
7993	  && (coproc_p
7994	      /* For DI, and DF under soft-float: */
7995	      || ((mode == DImode || mode == DFmode)
7996		  /* Without ldrd, we use stm/ldm, which does not
7997		     fair well with unaligned bits.  */
7998		  && (! TARGET_LDRD
7999		      /* Thumb-2 ldrd/strd is [-1020,+1020] in steps of 4.  */
8000		      || TARGET_THUMB2))))
8001	return false;
8002
8003      /* When breaking down a [reg+index] reload address into [(reg+high)+low],
8004	 of which the (reg+high) gets turned into a reload add insn,
8005	 we try to decompose the index into high/low values that can often
8006	 also lead to better reload CSE.
8007	 For example:
8008	         ldr r0, [r2, #4100]  // Offset too large
8009		 ldr r1, [r2, #4104]  // Offset too large
8010
8011	 is best reloaded as:
8012	         add t1, r2, #4096
8013		 ldr r0, [t1, #4]
8014		 add t2, r2, #4096
8015		 ldr r1, [t2, #8]
8016
8017	 which post-reload CSE can simplify in most cases to eliminate the
8018	 second add instruction:
8019	         add t1, r2, #4096
8020		 ldr r0, [t1, #4]
8021		 ldr r1, [t1, #8]
8022
8023	 The idea here is that we want to split out the bits of the constant
8024	 as a mask, rather than as subtracting the maximum offset that the
8025	 respective type of load/store used can handle.
8026
8027	 When encountering negative offsets, we can still utilize it even if
8028	 the overall offset is positive; sometimes this may lead to an immediate
8029	 that can be constructed with fewer instructions.
8030	 For example:
8031	         ldr r0, [r2, #0x3FFFFC]
8032
8033	 This is best reloaded as:
8034	         add t1, r2, #0x400000
8035		 ldr r0, [t1, #-4]
8036
8037	 The trick for spotting this for a load insn with N bits of offset
8038	 (i.e. bits N-1:0) is to look at bit N; if it is set, then chose a
8039	 negative offset that is going to make bit N and all the bits below
8040	 it become zero in the remainder part.
8041
8042	 The SIGN_MAG_LOW_ADDR_BITS macro below implements this, with respect
8043	 to sign-magnitude addressing (i.e. separate +- bit, or 1's complement),
8044	 used in most cases of ARM load/store instructions.  */
8045
8046#define SIGN_MAG_LOW_ADDR_BITS(VAL, N)					\
8047      (((VAL) & ((1 << (N)) - 1))					\
8048       ? (((VAL) & ((1 << ((N) + 1)) - 1)) ^ (1 << (N))) - (1 << (N))	\
8049       : 0)
8050
8051      if (coproc_p)
8052	{
8053	  low = SIGN_MAG_LOW_ADDR_BITS (val, 10);
8054
8055	  /* NEON quad-word load/stores are made of two double-word accesses,
8056	     so the valid index range is reduced by 8. Treat as 9-bit range if
8057	     we go over it.  */
8058	  if (TARGET_NEON && VALID_NEON_QREG_MODE (mode) && low >= 1016)
8059	    low = SIGN_MAG_LOW_ADDR_BITS (val, 9);
8060	}
8061      else if (GET_MODE_SIZE (mode) == 8)
8062	{
8063	  if (TARGET_LDRD)
8064	    low = (TARGET_THUMB2
8065		   ? SIGN_MAG_LOW_ADDR_BITS (val, 10)
8066		   : SIGN_MAG_LOW_ADDR_BITS (val, 8));
8067	  else
8068	    /* For pre-ARMv5TE (without ldrd), we use ldm/stm(db/da/ib)
8069	       to access doublewords. The supported load/store offsets are
8070	       -8, -4, and 4, which we try to produce here.  */
8071	    low = ((val & 0xf) ^ 0x8) - 0x8;
8072	}
8073      else if (GET_MODE_SIZE (mode) < 8)
8074	{
8075	  /* NEON element load/stores do not have an offset.  */
8076	  if (TARGET_NEON_FP16 && mode == HFmode)
8077	    return false;
8078
8079	  if (TARGET_THUMB2)
8080	    {
8081	      /* Thumb-2 has an asymmetrical index range of (-256,4096).
8082		 Try the wider 12-bit range first, and re-try if the result
8083		 is out of range.  */
8084	      low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
8085	      if (low < -255)
8086		low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
8087	    }
8088	  else
8089	    {
8090	      if (mode == HImode || mode == HFmode)
8091		{
8092		  if (arm_arch4)
8093		    low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
8094		  else
8095		    {
8096		      /* The storehi/movhi_bytes fallbacks can use only
8097			 [-4094,+4094] of the full ldrb/strb index range.  */
8098		      low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
8099		      if (low == 4095 || low == -4095)
8100			return false;
8101		    }
8102		}
8103	      else
8104		low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
8105	    }
8106	}
8107      else
8108	return false;
8109
8110      high = ((((val - low) & (unsigned HOST_WIDE_INT) 0xffffffff)
8111	       ^ (unsigned HOST_WIDE_INT) 0x80000000)
8112	      - (unsigned HOST_WIDE_INT) 0x80000000);
8113      /* Check for overflow or zero */
8114      if (low == 0 || high == 0 || (high + low != val))
8115	return false;
8116
8117      /* Reload the high part into a base reg; leave the low part
8118	 in the mem.
8119	 Note that replacing this gen_rtx_PLUS with plus_constant is
8120	 wrong in this case because we rely on the
8121	 (plus (plus reg c1) c2) structure being preserved so that
8122	 XEXP (*p, 0) in push_reload below uses the correct term.  */
8123      *p = gen_rtx_PLUS (GET_MODE (*p),
8124			 gen_rtx_PLUS (GET_MODE (*p), XEXP (*p, 0),
8125				       GEN_INT (high)),
8126			 GEN_INT (low));
8127      push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
8128		   MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
8129		   VOIDmode, 0, 0, opnum, (enum reload_type) type);
8130      return true;
8131    }
8132
8133  return false;
8134}
8135
8136rtx
8137thumb_legitimize_reload_address (rtx *x_p,
8138				 machine_mode mode,
8139				 int opnum, int type,
8140				 int ind_levels ATTRIBUTE_UNUSED)
8141{
8142  rtx x = *x_p;
8143
8144  if (GET_CODE (x) == PLUS
8145      && GET_MODE_SIZE (mode) < 4
8146      && REG_P (XEXP (x, 0))
8147      && XEXP (x, 0) == stack_pointer_rtx
8148      && CONST_INT_P (XEXP (x, 1))
8149      && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
8150    {
8151      rtx orig_x = x;
8152
8153      x = copy_rtx (x);
8154      push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
8155		   Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
8156      return x;
8157    }
8158
8159  /* If both registers are hi-regs, then it's better to reload the
8160     entire expression rather than each register individually.  That
8161     only requires one reload register rather than two.  */
8162  if (GET_CODE (x) == PLUS
8163      && REG_P (XEXP (x, 0))
8164      && REG_P (XEXP (x, 1))
8165      && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
8166      && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
8167    {
8168      rtx orig_x = x;
8169
8170      x = copy_rtx (x);
8171      push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
8172		   Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
8173      return x;
8174    }
8175
8176  return NULL;
8177}
8178
8179/* Return TRUE if X contains any TLS symbol references.  */
8180
8181bool
8182arm_tls_referenced_p (rtx x)
8183{
8184  if (! TARGET_HAVE_TLS)
8185    return false;
8186
8187  subrtx_iterator::array_type array;
8188  FOR_EACH_SUBRTX (iter, array, x, ALL)
8189    {
8190      const_rtx x = *iter;
8191      if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
8192	return true;
8193
8194      /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
8195	 TLS offsets, not real symbol references.  */
8196      if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
8197	iter.skip_subrtxes ();
8198    }
8199  return false;
8200}
8201
8202/* Implement TARGET_LEGITIMATE_CONSTANT_P.
8203
8204   On the ARM, allow any integer (invalid ones are removed later by insn
8205   patterns), nice doubles and symbol_refs which refer to the function's
8206   constant pool XXX.
8207
8208   When generating pic allow anything.  */
8209
8210static bool
8211arm_legitimate_constant_p_1 (machine_mode, rtx x)
8212{
8213  return flag_pic || !label_mentioned_p (x);
8214}
8215
8216static bool
8217thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8218{
8219  return (CONST_INT_P (x)
8220	  || CONST_DOUBLE_P (x)
8221	  || CONSTANT_ADDRESS_P (x)
8222	  || flag_pic);
8223}
8224
8225static bool
8226arm_legitimate_constant_p (machine_mode mode, rtx x)
8227{
8228  return (!arm_cannot_force_const_mem (mode, x)
8229	  && (TARGET_32BIT
8230	      ? arm_legitimate_constant_p_1 (mode, x)
8231	      : thumb_legitimate_constant_p (mode, x)));
8232}
8233
8234/* Implement TARGET_CANNOT_FORCE_CONST_MEM.  */
8235
8236static bool
8237arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8238{
8239  rtx base, offset;
8240
8241  if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
8242    {
8243      split_const (x, &base, &offset);
8244      if (GET_CODE (base) == SYMBOL_REF
8245	  && !offset_within_block_p (base, INTVAL (offset)))
8246	return true;
8247    }
8248  return arm_tls_referenced_p (x);
8249}
8250
8251#define REG_OR_SUBREG_REG(X)						\
8252  (REG_P (X)							\
8253   || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
8254
8255#define REG_OR_SUBREG_RTX(X)			\
8256   (REG_P (X) ? (X) : SUBREG_REG (X))
8257
8258static inline int
8259thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8260{
8261  machine_mode mode = GET_MODE (x);
8262  int total, words;
8263
8264  switch (code)
8265    {
8266    case ASHIFT:
8267    case ASHIFTRT:
8268    case LSHIFTRT:
8269    case ROTATERT:
8270      return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8271
8272    case PLUS:
8273    case MINUS:
8274    case COMPARE:
8275    case NEG:
8276    case NOT:
8277      return COSTS_N_INSNS (1);
8278
8279    case MULT:
8280      if (CONST_INT_P (XEXP (x, 1)))
8281	{
8282	  int cycles = 0;
8283	  unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
8284
8285	  while (i)
8286	    {
8287	      i >>= 2;
8288	      cycles++;
8289	    }
8290	  return COSTS_N_INSNS (2) + cycles;
8291	}
8292      return COSTS_N_INSNS (1) + 16;
8293
8294    case SET:
8295      /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8296	 the mode.  */
8297      words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8298      return (COSTS_N_INSNS (words)
8299	      + 4 * ((MEM_P (SET_SRC (x)))
8300		     + MEM_P (SET_DEST (x))));
8301
8302    case CONST_INT:
8303      if (outer == SET)
8304	{
8305	  if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
8306	    return 0;
8307	  if (thumb_shiftable_const (INTVAL (x)))
8308	    return COSTS_N_INSNS (2);
8309	  return COSTS_N_INSNS (3);
8310	}
8311      else if ((outer == PLUS || outer == COMPARE)
8312	       && INTVAL (x) < 256 && INTVAL (x) > -256)
8313	return 0;
8314      else if ((outer == IOR || outer == XOR || outer == AND)
8315	       && INTVAL (x) < 256 && INTVAL (x) >= -256)
8316	return COSTS_N_INSNS (1);
8317      else if (outer == AND)
8318	{
8319	  int i;
8320	  /* This duplicates the tests in the andsi3 expander.  */
8321	  for (i = 9; i <= 31; i++)
8322	    if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
8323		|| (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
8324	      return COSTS_N_INSNS (2);
8325	}
8326      else if (outer == ASHIFT || outer == ASHIFTRT
8327	       || outer == LSHIFTRT)
8328	return 0;
8329      return COSTS_N_INSNS (2);
8330
8331    case CONST:
8332    case CONST_DOUBLE:
8333    case LABEL_REF:
8334    case SYMBOL_REF:
8335      return COSTS_N_INSNS (3);
8336
8337    case UDIV:
8338    case UMOD:
8339    case DIV:
8340    case MOD:
8341      return 100;
8342
8343    case TRUNCATE:
8344      return 99;
8345
8346    case AND:
8347    case XOR:
8348    case IOR:
8349      /* XXX guess.  */
8350      return 8;
8351
8352    case MEM:
8353      /* XXX another guess.  */
8354      /* Memory costs quite a lot for the first word, but subsequent words
8355	 load at the equivalent of a single insn each.  */
8356      return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8357	      + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8358		 ? 4 : 0));
8359
8360    case IF_THEN_ELSE:
8361      /* XXX a guess.  */
8362      if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8363	return 14;
8364      return 2;
8365
8366    case SIGN_EXTEND:
8367    case ZERO_EXTEND:
8368      total = mode == DImode ? COSTS_N_INSNS (1) : 0;
8369      total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
8370
8371      if (mode == SImode)
8372	return total;
8373
8374      if (arm_arch6)
8375	return total + COSTS_N_INSNS (1);
8376
8377      /* Assume a two-shift sequence.  Increase the cost slightly so
8378	 we prefer actual shifts over an extend operation.  */
8379      return total + 1 + COSTS_N_INSNS (2);
8380
8381    default:
8382      return 99;
8383    }
8384}
8385
8386static inline bool
8387arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
8388{
8389  machine_mode mode = GET_MODE (x);
8390  enum rtx_code subcode;
8391  rtx operand;
8392  enum rtx_code code = GET_CODE (x);
8393  *total = 0;
8394
8395  switch (code)
8396    {
8397    case MEM:
8398      /* Memory costs quite a lot for the first word, but subsequent words
8399	 load at the equivalent of a single insn each.  */
8400      *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
8401      return true;
8402
8403    case DIV:
8404    case MOD:
8405    case UDIV:
8406    case UMOD:
8407      if (TARGET_HARD_FLOAT && mode == SFmode)
8408	*total = COSTS_N_INSNS (2);
8409      else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE)
8410	*total = COSTS_N_INSNS (4);
8411      else
8412	*total = COSTS_N_INSNS (20);
8413      return false;
8414
8415    case ROTATE:
8416      if (REG_P (XEXP (x, 1)))
8417	*total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
8418      else if (!CONST_INT_P (XEXP (x, 1)))
8419	*total = rtx_cost (XEXP (x, 1), code, 1, speed);
8420
8421      /* Fall through */
8422    case ROTATERT:
8423      if (mode != SImode)
8424	{
8425	  *total += COSTS_N_INSNS (4);
8426	  return true;
8427	}
8428
8429      /* Fall through */
8430    case ASHIFT: case LSHIFTRT: case ASHIFTRT:
8431      *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8432      if (mode == DImode)
8433	{
8434	  *total += COSTS_N_INSNS (3);
8435	  return true;
8436	}
8437
8438      *total += COSTS_N_INSNS (1);
8439      /* Increase the cost of complex shifts because they aren't any faster,
8440         and reduce dual issue opportunities.  */
8441      if (arm_tune_cortex_a9
8442	  && outer != SET && !CONST_INT_P (XEXP (x, 1)))
8443	++*total;
8444
8445      return true;
8446
8447    case MINUS:
8448      if (mode == DImode)
8449	{
8450	  *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8451	  if (CONST_INT_P (XEXP (x, 0))
8452	      && const_ok_for_arm (INTVAL (XEXP (x, 0))))
8453	    {
8454	      *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8455	      return true;
8456	    }
8457
8458	  if (CONST_INT_P (XEXP (x, 1))
8459	      && const_ok_for_arm (INTVAL (XEXP (x, 1))))
8460	    {
8461	      *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8462	      return true;
8463	    }
8464
8465	  return false;
8466	}
8467
8468      if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8469	{
8470	  if (TARGET_HARD_FLOAT
8471	      && (mode == SFmode
8472		  || (mode == DFmode && !TARGET_VFP_SINGLE)))
8473	    {
8474	      *total = COSTS_N_INSNS (1);
8475	      if (CONST_DOUBLE_P (XEXP (x, 0))
8476		  && arm_const_double_rtx (XEXP (x, 0)))
8477		{
8478		  *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8479		  return true;
8480		}
8481
8482	      if (CONST_DOUBLE_P (XEXP (x, 1))
8483		  && arm_const_double_rtx (XEXP (x, 1)))
8484		{
8485		  *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8486		  return true;
8487		}
8488
8489	      return false;
8490	    }
8491	  *total = COSTS_N_INSNS (20);
8492	  return false;
8493	}
8494
8495      *total = COSTS_N_INSNS (1);
8496      if (CONST_INT_P (XEXP (x, 0))
8497	  && const_ok_for_arm (INTVAL (XEXP (x, 0))))
8498	{
8499	  *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8500	  return true;
8501	}
8502
8503      subcode = GET_CODE (XEXP (x, 1));
8504      if (subcode == ASHIFT || subcode == ASHIFTRT
8505	  || subcode == LSHIFTRT
8506	  || subcode == ROTATE || subcode == ROTATERT)
8507	{
8508	  *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8509	  *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
8510	  return true;
8511	}
8512
8513      /* A shift as a part of RSB costs no more than RSB itself.  */
8514      if (GET_CODE (XEXP (x, 0)) == MULT
8515	  && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8516	{
8517	  *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed);
8518	  *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8519	  return true;
8520	}
8521
8522      if (subcode == MULT
8523	  && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
8524	{
8525	  *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8526	  *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
8527	  return true;
8528	}
8529
8530      if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
8531	  || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
8532	{
8533	  *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8534	  if (REG_P (XEXP (XEXP (x, 1), 0))
8535	      && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
8536	    *total += COSTS_N_INSNS (1);
8537
8538	  return true;
8539	}
8540
8541      /* Fall through */
8542
8543    case PLUS:
8544      if (code == PLUS && arm_arch6 && mode == SImode
8545	  && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8546	      || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8547	{
8548	  *total = COSTS_N_INSNS (1);
8549	  *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)),
8550			      0, speed);
8551	  *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8552	  return true;
8553	}
8554
8555      /* MLA: All arguments must be registers.  We filter out
8556	 multiplication by a power of two, so that we fall down into
8557	 the code below.  */
8558      if (GET_CODE (XEXP (x, 0)) == MULT
8559	  && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8560	{
8561	  /* The cost comes from the cost of the multiply.  */
8562	  return false;
8563	}
8564
8565      if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8566	{
8567	  if (TARGET_HARD_FLOAT
8568	      && (mode == SFmode
8569		  || (mode == DFmode && !TARGET_VFP_SINGLE)))
8570	    {
8571	      *total = COSTS_N_INSNS (1);
8572	      if (CONST_DOUBLE_P (XEXP (x, 1))
8573		  && arm_const_double_rtx (XEXP (x, 1)))
8574		{
8575		  *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8576		  return true;
8577		}
8578
8579	      return false;
8580	    }
8581
8582	  *total = COSTS_N_INSNS (20);
8583	  return false;
8584	}
8585
8586      if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
8587	  || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
8588	{
8589	  *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, 1, speed);
8590	  if (REG_P (XEXP (XEXP (x, 0), 0))
8591	      && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
8592	    *total += COSTS_N_INSNS (1);
8593	  return true;
8594	}
8595
8596      /* Fall through */
8597
8598    case AND: case XOR: case IOR:
8599
8600      /* Normally the frame registers will be spilt into reg+const during
8601	 reload, so it is a bad idea to combine them with other instructions,
8602	 since then they might not be moved outside of loops.  As a compromise
8603	 we allow integration with ops that have a constant as their second
8604	 operand.  */
8605      if (REG_OR_SUBREG_REG (XEXP (x, 0))
8606	  && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
8607	  && !CONST_INT_P (XEXP (x, 1)))
8608	*total = COSTS_N_INSNS (1);
8609
8610      if (mode == DImode)
8611	{
8612	  *total += COSTS_N_INSNS (2);
8613	  if (CONST_INT_P (XEXP (x, 1))
8614	      && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8615	    {
8616	      *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8617	      return true;
8618	    }
8619
8620	  return false;
8621	}
8622
8623      *total += COSTS_N_INSNS (1);
8624      if (CONST_INT_P (XEXP (x, 1))
8625	  && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8626	{
8627	  *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8628	  return true;
8629	}
8630      subcode = GET_CODE (XEXP (x, 0));
8631      if (subcode == ASHIFT || subcode == ASHIFTRT
8632	  || subcode == LSHIFTRT
8633	  || subcode == ROTATE || subcode == ROTATERT)
8634	{
8635	  *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8636	  *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8637	  return true;
8638	}
8639
8640      if (subcode == MULT
8641	  && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8642	{
8643	  *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8644	  *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8645	  return true;
8646	}
8647
8648      if (subcode == UMIN || subcode == UMAX
8649	  || subcode == SMIN || subcode == SMAX)
8650	{
8651	  *total = COSTS_N_INSNS (3);
8652	  return true;
8653	}
8654
8655      return false;
8656
8657    case MULT:
8658      /* This should have been handled by the CPU specific routines.  */
8659      gcc_unreachable ();
8660
8661    case TRUNCATE:
8662      if (arm_arch3m && mode == SImode
8663	  && GET_CODE (XEXP (x, 0)) == LSHIFTRT
8664	  && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
8665	  && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
8666	      == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
8667	  && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
8668	      || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
8669	{
8670	  *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, 0, speed);
8671	  return true;
8672	}
8673      *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
8674      return false;
8675
8676    case NEG:
8677      if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8678	{
8679	  if (TARGET_HARD_FLOAT
8680	      && (mode == SFmode
8681		  || (mode == DFmode && !TARGET_VFP_SINGLE)))
8682	    {
8683	      *total = COSTS_N_INSNS (1);
8684	      return false;
8685	    }
8686	  *total = COSTS_N_INSNS (2);
8687	  return false;
8688	}
8689
8690      /* Fall through */
8691    case NOT:
8692      *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
8693      if (mode == SImode && code == NOT)
8694	{
8695	  subcode = GET_CODE (XEXP (x, 0));
8696	  if (subcode == ASHIFT || subcode == ASHIFTRT
8697	      || subcode == LSHIFTRT
8698	      || subcode == ROTATE || subcode == ROTATERT
8699	      || (subcode == MULT
8700		  && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
8701	    {
8702	      *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8703	      /* Register shifts cost an extra cycle.  */
8704	      if (!CONST_INT_P (XEXP (XEXP (x, 0), 1)))
8705		*total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
8706							subcode, 1, speed);
8707	      return true;
8708	    }
8709	}
8710
8711      return false;
8712
8713    case IF_THEN_ELSE:
8714      if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8715	{
8716	  *total = COSTS_N_INSNS (4);
8717	  return true;
8718	}
8719
8720      operand = XEXP (x, 0);
8721
8722      if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
8723	     || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
8724	    && REG_P (XEXP (operand, 0))
8725	    && REGNO (XEXP (operand, 0)) == CC_REGNUM))
8726	*total += COSTS_N_INSNS (1);
8727      *total += (rtx_cost (XEXP (x, 1), code, 1, speed)
8728		 + rtx_cost (XEXP (x, 2), code, 2, speed));
8729      return true;
8730
8731    case NE:
8732      if (mode == SImode && XEXP (x, 1) == const0_rtx)
8733	{
8734	  *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8735	  return true;
8736	}
8737      goto scc_insn;
8738
8739    case GE:
8740      if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
8741	  && mode == SImode && XEXP (x, 1) == const0_rtx)
8742	{
8743	  *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8744	  return true;
8745	}
8746      goto scc_insn;
8747
8748    case LT:
8749      if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
8750	  && mode == SImode && XEXP (x, 1) == const0_rtx)
8751	{
8752	  *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8753	  return true;
8754	}
8755      goto scc_insn;
8756
8757    case EQ:
8758    case GT:
8759    case LE:
8760    case GEU:
8761    case LTU:
8762    case GTU:
8763    case LEU:
8764    case UNORDERED:
8765    case ORDERED:
8766    case UNEQ:
8767    case UNGE:
8768    case UNLT:
8769    case UNGT:
8770    case UNLE:
8771    scc_insn:
8772      /* SCC insns.  In the case where the comparison has already been
8773	 performed, then they cost 2 instructions.  Otherwise they need
8774	 an additional comparison before them.  */
8775      *total = COSTS_N_INSNS (2);
8776      if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
8777	{
8778	  return true;
8779	}
8780
8781      /* Fall through */
8782    case COMPARE:
8783      if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
8784	{
8785	  *total = 0;
8786	  return true;
8787	}
8788
8789      *total += COSTS_N_INSNS (1);
8790      if (CONST_INT_P (XEXP (x, 1))
8791	  && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8792	{
8793	  *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8794	  return true;
8795	}
8796
8797      subcode = GET_CODE (XEXP (x, 0));
8798      if (subcode == ASHIFT || subcode == ASHIFTRT
8799	  || subcode == LSHIFTRT
8800	  || subcode == ROTATE || subcode == ROTATERT)
8801	{
8802	  *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8803	  *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8804	  return true;
8805	}
8806
8807      if (subcode == MULT
8808	  && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8809	{
8810	  *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8811	  *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8812	  return true;
8813	}
8814
8815      return false;
8816
8817    case UMIN:
8818    case UMAX:
8819    case SMIN:
8820    case SMAX:
8821      *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8822      if (!CONST_INT_P (XEXP (x, 1))
8823	  || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
8824	*total += rtx_cost (XEXP (x, 1), code, 1, speed);
8825      return true;
8826
8827    case ABS:
8828      if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8829	{
8830	  if (TARGET_HARD_FLOAT
8831	      && (mode == SFmode
8832		  || (mode == DFmode && !TARGET_VFP_SINGLE)))
8833	    {
8834	      *total = COSTS_N_INSNS (1);
8835	      return false;
8836	    }
8837	  *total = COSTS_N_INSNS (20);
8838	  return false;
8839	}
8840      *total = COSTS_N_INSNS (1);
8841      if (mode == DImode)
8842	*total += COSTS_N_INSNS (3);
8843      return false;
8844
8845    case SIGN_EXTEND:
8846    case ZERO_EXTEND:
8847      *total = 0;
8848      if (GET_MODE_CLASS (mode) == MODE_INT)
8849	{
8850	  rtx op = XEXP (x, 0);
8851	  machine_mode opmode = GET_MODE (op);
8852
8853	  if (mode == DImode)
8854	    *total += COSTS_N_INSNS (1);
8855
8856	  if (opmode != SImode)
8857	    {
8858	      if (MEM_P (op))
8859		{
8860		  /* If !arm_arch4, we use one of the extendhisi2_mem
8861		     or movhi_bytes patterns for HImode.  For a QImode
8862		     sign extension, we first zero-extend from memory
8863		     and then perform a shift sequence.  */
8864		  if (!arm_arch4 && (opmode != QImode || code == SIGN_EXTEND))
8865		    *total += COSTS_N_INSNS (2);
8866		}
8867	      else if (arm_arch6)
8868		*total += COSTS_N_INSNS (1);
8869
8870	      /* We don't have the necessary insn, so we need to perform some
8871		 other operation.  */
8872	      else if (TARGET_ARM && code == ZERO_EXTEND && mode == QImode)
8873		/* An and with constant 255.  */
8874		*total += COSTS_N_INSNS (1);
8875	      else
8876		/* A shift sequence.  Increase costs slightly to avoid
8877		   combining two shifts into an extend operation.  */
8878		*total += COSTS_N_INSNS (2) + 1;
8879	    }
8880
8881	  return false;
8882	}
8883
8884      switch (GET_MODE (XEXP (x, 0)))
8885	{
8886	case V8QImode:
8887	case V4HImode:
8888	case V2SImode:
8889	case V4QImode:
8890	case V2HImode:
8891	  *total = COSTS_N_INSNS (1);
8892	  return false;
8893
8894	default:
8895	  gcc_unreachable ();
8896	}
8897      gcc_unreachable ();
8898
8899    case ZERO_EXTRACT:
8900    case SIGN_EXTRACT:
8901      *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8902      return true;
8903
8904    case CONST_INT:
8905      if (const_ok_for_arm (INTVAL (x))
8906	  || const_ok_for_arm (~INTVAL (x)))
8907	*total = COSTS_N_INSNS (1);
8908      else
8909	*total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
8910						  INTVAL (x), NULL_RTX,
8911						  NULL_RTX, 0, 0));
8912      return true;
8913
8914    case CONST:
8915    case LABEL_REF:
8916    case SYMBOL_REF:
8917      *total = COSTS_N_INSNS (3);
8918      return true;
8919
8920    case HIGH:
8921      *total = COSTS_N_INSNS (1);
8922      return true;
8923
8924    case LO_SUM:
8925      *total = COSTS_N_INSNS (1);
8926      *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8927      return true;
8928
8929    case CONST_DOUBLE:
8930      if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x)
8931	  && (mode == SFmode || !TARGET_VFP_SINGLE))
8932	*total = COSTS_N_INSNS (1);
8933      else
8934	*total = COSTS_N_INSNS (4);
8935      return true;
8936
8937    case SET:
8938      /* The vec_extract patterns accept memory operands that require an
8939	 address reload.  Account for the cost of that reload to give the
8940	 auto-inc-dec pass an incentive to try to replace them.  */
8941      if (TARGET_NEON && MEM_P (SET_DEST (x))
8942	  && GET_CODE (SET_SRC (x)) == VEC_SELECT)
8943	{
8944	  *total = rtx_cost (SET_DEST (x), code, 0, speed);
8945	  if (!neon_vector_mem_operand (SET_DEST (x), 2, true))
8946	    *total += COSTS_N_INSNS (1);
8947	  return true;
8948	}
8949      /* Likewise for the vec_set patterns.  */
8950      if (TARGET_NEON && GET_CODE (SET_SRC (x)) == VEC_MERGE
8951	  && GET_CODE (XEXP (SET_SRC (x), 0)) == VEC_DUPLICATE
8952	  && MEM_P (XEXP (XEXP (SET_SRC (x), 0), 0)))
8953	{
8954	  rtx mem = XEXP (XEXP (SET_SRC (x), 0), 0);
8955	  *total = rtx_cost (mem, code, 0, speed);
8956	  if (!neon_vector_mem_operand (mem, 2, true))
8957	    *total += COSTS_N_INSNS (1);
8958	  return true;
8959	}
8960      return false;
8961
8962    case UNSPEC:
8963      /* We cost this as high as our memory costs to allow this to
8964	 be hoisted from loops.  */
8965      if (XINT (x, 1) == UNSPEC_PIC_UNIFIED)
8966	{
8967	  *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
8968	}
8969      return true;
8970
8971    case CONST_VECTOR:
8972      if (TARGET_NEON
8973	  && TARGET_HARD_FLOAT
8974	  && outer == SET
8975	  && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
8976	  && neon_immediate_valid_for_move (x, mode, NULL, NULL))
8977	*total = COSTS_N_INSNS (1);
8978      else
8979	*total = COSTS_N_INSNS (4);
8980      return true;
8981
8982    default:
8983      *total = COSTS_N_INSNS (4);
8984      return false;
8985    }
8986}
8987
8988/* Estimates the size cost of thumb1 instructions.
8989   For now most of the code is copied from thumb1_rtx_costs. We need more
8990   fine grain tuning when we have more related test cases.  */
8991static inline int
8992thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8993{
8994  machine_mode mode = GET_MODE (x);
8995  int words;
8996
8997  switch (code)
8998    {
8999    case ASHIFT:
9000    case ASHIFTRT:
9001    case LSHIFTRT:
9002    case ROTATERT:
9003      return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
9004
9005    case PLUS:
9006    case MINUS:
9007      /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
9008	 defined by RTL expansion, especially for the expansion of
9009	 multiplication.  */
9010      if ((GET_CODE (XEXP (x, 0)) == MULT
9011	   && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
9012	  || (GET_CODE (XEXP (x, 1)) == MULT
9013	      && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
9014	return COSTS_N_INSNS (2);
9015      /* On purpose fall through for normal RTX.  */
9016    case COMPARE:
9017    case NEG:
9018    case NOT:
9019      return COSTS_N_INSNS (1);
9020
9021    case MULT:
9022      if (CONST_INT_P (XEXP (x, 1)))
9023        {
9024          /* Thumb1 mul instruction can't operate on const. We must Load it
9025             into a register first.  */
9026          int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
9027	  /* For the targets which have a very small and high-latency multiply
9028	     unit, we prefer to synthesize the mult with up to 5 instructions,
9029	     giving a good balance between size and performance.  */
9030	  if (arm_arch6m && arm_m_profile_small_mul)
9031	    return COSTS_N_INSNS (5);
9032	  else
9033	    return COSTS_N_INSNS (1) + const_size;
9034        }
9035      return COSTS_N_INSNS (1);
9036
9037    case SET:
9038      /* A SET doesn't have a mode, so let's look at the SET_DEST to get
9039	 the mode.  */
9040      words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
9041      return COSTS_N_INSNS (words)
9042	     + COSTS_N_INSNS (1) * (satisfies_constraint_J (SET_SRC (x))
9043				    || satisfies_constraint_K (SET_SRC (x))
9044				       /* thumb1_movdi_insn.  */
9045				    || ((words > 1) && MEM_P (SET_SRC (x))));
9046
9047    case CONST_INT:
9048      if (outer == SET)
9049        {
9050          if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
9051            return COSTS_N_INSNS (1);
9052	  /* See split "TARGET_THUMB1 && satisfies_constraint_J".  */
9053	  if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
9054            return COSTS_N_INSNS (2);
9055	  /* See split "TARGET_THUMB1 && satisfies_constraint_K".  */
9056          if (thumb_shiftable_const (INTVAL (x)))
9057            return COSTS_N_INSNS (2);
9058          return COSTS_N_INSNS (3);
9059        }
9060      else if ((outer == PLUS || outer == COMPARE)
9061               && INTVAL (x) < 256 && INTVAL (x) > -256)
9062        return 0;
9063      else if ((outer == IOR || outer == XOR || outer == AND)
9064               && INTVAL (x) < 256 && INTVAL (x) >= -256)
9065        return COSTS_N_INSNS (1);
9066      else if (outer == AND)
9067        {
9068          int i;
9069          /* This duplicates the tests in the andsi3 expander.  */
9070          for (i = 9; i <= 31; i++)
9071            if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
9072                || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
9073              return COSTS_N_INSNS (2);
9074        }
9075      else if (outer == ASHIFT || outer == ASHIFTRT
9076               || outer == LSHIFTRT)
9077        return 0;
9078      return COSTS_N_INSNS (2);
9079
9080    case CONST:
9081    case CONST_DOUBLE:
9082    case LABEL_REF:
9083    case SYMBOL_REF:
9084      return COSTS_N_INSNS (3);
9085
9086    case UDIV:
9087    case UMOD:
9088    case DIV:
9089    case MOD:
9090      return 100;
9091
9092    case TRUNCATE:
9093      return 99;
9094
9095    case AND:
9096    case XOR:
9097    case IOR:
9098      return COSTS_N_INSNS (1);
9099
9100    case MEM:
9101      return (COSTS_N_INSNS (1)
9102	      + COSTS_N_INSNS (1)
9103		* ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
9104              + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9105                 ? COSTS_N_INSNS (1) : 0));
9106
9107    case IF_THEN_ELSE:
9108      /* XXX a guess.  */
9109      if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9110        return 14;
9111      return 2;
9112
9113    case ZERO_EXTEND:
9114      /* XXX still guessing.  */
9115      switch (GET_MODE (XEXP (x, 0)))
9116        {
9117          case QImode:
9118            return (1 + (mode == DImode ? 4 : 0)
9119                    + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9120
9121          case HImode:
9122            return (4 + (mode == DImode ? 4 : 0)
9123                    + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9124
9125          case SImode:
9126            return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9127
9128          default:
9129            return 99;
9130        }
9131
9132    default:
9133      return 99;
9134    }
9135}
9136
9137/* RTX costs when optimizing for size.  */
9138static bool
9139arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
9140		    int *total)
9141{
9142  machine_mode mode = GET_MODE (x);
9143  if (TARGET_THUMB1)
9144    {
9145      *total = thumb1_size_rtx_costs (x, code, outer_code);
9146      return true;
9147    }
9148
9149  /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions.  */
9150  switch (code)
9151    {
9152    case MEM:
9153      /* A memory access costs 1 insn if the mode is small, or the address is
9154	 a single register, otherwise it costs one insn per word.  */
9155      if (REG_P (XEXP (x, 0)))
9156	*total = COSTS_N_INSNS (1);
9157      else if (flag_pic
9158	       && GET_CODE (XEXP (x, 0)) == PLUS
9159	       && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9160	/* This will be split into two instructions.
9161	   See arm.md:calculate_pic_address.  */
9162	*total = COSTS_N_INSNS (2);
9163      else
9164	*total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9165      return true;
9166
9167    case DIV:
9168    case MOD:
9169    case UDIV:
9170    case UMOD:
9171      /* Needs a libcall, so it costs about this.  */
9172      *total = COSTS_N_INSNS (2);
9173      return false;
9174
9175    case ROTATE:
9176      if (mode == SImode && REG_P (XEXP (x, 1)))
9177	{
9178	  *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, false);
9179	  return true;
9180	}
9181      /* Fall through */
9182    case ROTATERT:
9183    case ASHIFT:
9184    case LSHIFTRT:
9185    case ASHIFTRT:
9186      if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9187	{
9188	  *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, 0, false);
9189	  return true;
9190	}
9191      else if (mode == SImode)
9192	{
9193	  *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, false);
9194	  /* Slightly disparage register shifts, but not by much.  */
9195	  if (!CONST_INT_P (XEXP (x, 1)))
9196	    *total += 1 + rtx_cost (XEXP (x, 1), code, 1, false);
9197	  return true;
9198	}
9199
9200      /* Needs a libcall.  */
9201      *total = COSTS_N_INSNS (2);
9202      return false;
9203
9204    case MINUS:
9205      if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9206	  && (mode == SFmode || !TARGET_VFP_SINGLE))
9207	{
9208	  *total = COSTS_N_INSNS (1);
9209	  return false;
9210	}
9211
9212      if (mode == SImode)
9213	{
9214	  enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
9215	  enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
9216
9217	  if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
9218	      || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
9219	      || subcode1 == ROTATE || subcode1 == ROTATERT
9220	      || subcode1 == ASHIFT || subcode1 == LSHIFTRT
9221	      || subcode1 == ASHIFTRT)
9222	    {
9223	      /* It's just the cost of the two operands.  */
9224	      *total = 0;
9225	      return false;
9226	    }
9227
9228	  *total = COSTS_N_INSNS (1);
9229	  return false;
9230	}
9231
9232      *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9233      return false;
9234
9235    case PLUS:
9236      if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9237	  && (mode == SFmode || !TARGET_VFP_SINGLE))
9238	{
9239	  *total = COSTS_N_INSNS (1);
9240	  return false;
9241	}
9242
9243      /* A shift as a part of ADD costs nothing.  */
9244      if (GET_CODE (XEXP (x, 0)) == MULT
9245	  && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
9246	{
9247	  *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
9248	  *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, false);
9249	  *total += rtx_cost (XEXP (x, 1), code, 1, false);
9250	  return true;
9251	}
9252
9253      /* Fall through */
9254    case AND: case XOR: case IOR:
9255      if (mode == SImode)
9256	{
9257	  enum rtx_code subcode = GET_CODE (XEXP (x, 0));
9258
9259	  if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
9260	      || subcode == LSHIFTRT || subcode == ASHIFTRT
9261	      || (code == AND && subcode == NOT))
9262	    {
9263	      /* It's just the cost of the two operands.  */
9264	      *total = 0;
9265	      return false;
9266	    }
9267	}
9268
9269      *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9270      return false;
9271
9272    case MULT:
9273      *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9274      return false;
9275
9276    case NEG:
9277      if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9278	  && (mode == SFmode || !TARGET_VFP_SINGLE))
9279	{
9280	  *total = COSTS_N_INSNS (1);
9281	  return false;
9282	}
9283
9284      /* Fall through */
9285    case NOT:
9286      *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9287
9288      return false;
9289
9290    case IF_THEN_ELSE:
9291      *total = 0;
9292      return false;
9293
9294    case COMPARE:
9295      if (cc_register (XEXP (x, 0), VOIDmode))
9296	* total = 0;
9297      else
9298	*total = COSTS_N_INSNS (1);
9299      return false;
9300
9301    case ABS:
9302      if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9303	  && (mode == SFmode || !TARGET_VFP_SINGLE))
9304	*total = COSTS_N_INSNS (1);
9305      else
9306	*total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
9307      return false;
9308
9309    case SIGN_EXTEND:
9310    case ZERO_EXTEND:
9311      return arm_rtx_costs_1 (x, outer_code, total, 0);
9312
9313    case CONST_INT:
9314      if (const_ok_for_arm (INTVAL (x)))
9315	/* A multiplication by a constant requires another instruction
9316	   to load the constant to a register.  */
9317	*total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
9318				? 1 : 0);
9319      else if (const_ok_for_arm (~INTVAL (x)))
9320	*total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
9321      else if (const_ok_for_arm (-INTVAL (x)))
9322	{
9323	  if (outer_code == COMPARE || outer_code == PLUS
9324	      || outer_code == MINUS)
9325	    *total = 0;
9326	  else
9327	    *total = COSTS_N_INSNS (1);
9328	}
9329      else
9330	*total = COSTS_N_INSNS (2);
9331      return true;
9332
9333    case CONST:
9334    case LABEL_REF:
9335    case SYMBOL_REF:
9336      *total = COSTS_N_INSNS (2);
9337      return true;
9338
9339    case CONST_DOUBLE:
9340      *total = COSTS_N_INSNS (4);
9341      return true;
9342
9343    case CONST_VECTOR:
9344      if (TARGET_NEON
9345	  && TARGET_HARD_FLOAT
9346	  && outer_code == SET
9347	  && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
9348	  && neon_immediate_valid_for_move (x, mode, NULL, NULL))
9349	*total = COSTS_N_INSNS (1);
9350      else
9351	*total = COSTS_N_INSNS (4);
9352      return true;
9353
9354    case HIGH:
9355    case LO_SUM:
9356      /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
9357	 cost of these slightly.  */
9358      *total = COSTS_N_INSNS (1) + 1;
9359      return true;
9360
9361    case SET:
9362      return false;
9363
9364    default:
9365      if (mode != VOIDmode)
9366	*total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9367      else
9368	*total = COSTS_N_INSNS (4); /* How knows?  */
9369      return false;
9370    }
9371}
9372
9373/* Helper function for arm_rtx_costs.  If the operand is a valid shift
9374   operand, then return the operand that is being shifted.  If the shift
9375   is not by a constant, then set SHIFT_REG to point to the operand.
9376   Return NULL if OP is not a shifter operand.  */
9377static rtx
9378shifter_op_p (rtx op, rtx *shift_reg)
9379{
9380  enum rtx_code code = GET_CODE (op);
9381
9382  if (code == MULT && CONST_INT_P (XEXP (op, 1))
9383      && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
9384    return XEXP (op, 0);
9385  else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
9386    return XEXP (op, 0);
9387  else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
9388	   || code == ASHIFTRT)
9389    {
9390      if (!CONST_INT_P (XEXP (op, 1)))
9391	*shift_reg = XEXP (op, 1);
9392      return XEXP (op, 0);
9393    }
9394
9395  return NULL;
9396}
9397
9398static bool
9399arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
9400{
9401  const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
9402  gcc_assert (GET_CODE (x) == UNSPEC);
9403
9404  switch (XINT (x, 1))
9405    {
9406    case UNSPEC_UNALIGNED_LOAD:
9407      /* We can only do unaligned loads into the integer unit, and we can't
9408	 use LDM or LDRD.  */
9409      *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9410      if (speed_p)
9411	*cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
9412		  + extra_cost->ldst.load_unaligned);
9413
9414#ifdef NOT_YET
9415      *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9416				 ADDR_SPACE_GENERIC, speed_p);
9417#endif
9418      return true;
9419
9420    case UNSPEC_UNALIGNED_STORE:
9421      *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9422      if (speed_p)
9423	*cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
9424		  + extra_cost->ldst.store_unaligned);
9425
9426      *cost += rtx_cost (XVECEXP (x, 0, 0), UNSPEC, 0, speed_p);
9427#ifdef NOT_YET
9428      *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9429				 ADDR_SPACE_GENERIC, speed_p);
9430#endif
9431      return true;
9432
9433    case UNSPEC_VRINTZ:
9434    case UNSPEC_VRINTP:
9435    case UNSPEC_VRINTM:
9436    case UNSPEC_VRINTR:
9437    case UNSPEC_VRINTX:
9438    case UNSPEC_VRINTA:
9439      *cost = COSTS_N_INSNS (1);
9440      if (speed_p)
9441        *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
9442
9443      return true;
9444    default:
9445      *cost = COSTS_N_INSNS (2);
9446      break;
9447    }
9448  return false;
9449}
9450
9451/* Cost of a libcall.  We assume one insn per argument, an amount for the
9452   call (one insn for -Os) and then one for processing the result.  */
9453#define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9454
9455#define HANDLE_NARROW_SHIFT_ARITH(OP, IDX)				\
9456	do								\
9457	  {								\
9458	    shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg);	\
9459	    if (shift_op != NULL					\
9460	        && arm_rtx_shift_left_p (XEXP (x, IDX)))		\
9461	      {								\
9462	        if (shift_reg)						\
9463		  {							\
9464		    if (speed_p)					\
9465		      *cost += extra_cost->alu.arith_shift_reg;	\
9466		    *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);	\
9467		  }							\
9468	        else if (speed_p)					\
9469		  *cost += extra_cost->alu.arith_shift;		\
9470									\
9471		  *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)	\
9472			  + rtx_cost (XEXP (x, 1 - IDX),		\
9473			              OP, 1, speed_p));		\
9474	        return true;						\
9475	      }								\
9476	  }								\
9477	while (0);
9478
9479/* RTX costs.  Make an estimate of the cost of executing the operation
9480   X, which is contained with an operation with code OUTER_CODE.
9481   SPEED_P indicates whether the cost desired is the performance cost,
9482   or the size cost.  The estimate is stored in COST and the return
9483   value is TRUE if the cost calculation is final, or FALSE if the
9484   caller should recurse through the operands of X to add additional
9485   costs.
9486
9487   We currently make no attempt to model the size savings of Thumb-2
9488   16-bit instructions.  At the normal points in compilation where
9489   this code is called we have no measure of whether the condition
9490   flags are live or not, and thus no realistic way to determine what
9491   the size will eventually be.  */
9492static bool
9493arm_new_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
9494		   const struct cpu_cost_table *extra_cost,
9495		   int *cost, bool speed_p)
9496{
9497  machine_mode mode = GET_MODE (x);
9498
9499  if (TARGET_THUMB1)
9500    {
9501      if (speed_p)
9502	*cost = thumb1_rtx_costs (x, code, outer_code);
9503      else
9504	*cost = thumb1_size_rtx_costs (x, code, outer_code);
9505      return true;
9506    }
9507
9508  switch (code)
9509    {
9510    case SET:
9511      *cost = 0;
9512      /* SET RTXs don't have a mode so we get it from the destination.  */
9513      mode = GET_MODE (SET_DEST (x));
9514
9515      if (REG_P (SET_SRC (x))
9516	  && REG_P (SET_DEST (x)))
9517	{
9518	  /* Assume that most copies can be done with a single insn,
9519	     unless we don't have HW FP, in which case everything
9520	     larger than word mode will require two insns.  */
9521	  *cost = COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9522				   && GET_MODE_SIZE (mode) > 4)
9523				  || mode == DImode)
9524				 ? 2 : 1);
9525	  /* Conditional register moves can be encoded
9526	     in 16 bits in Thumb mode.  */
9527	  if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
9528	    *cost >>= 1;
9529
9530	  return true;
9531	}
9532
9533      if (CONST_INT_P (SET_SRC (x)))
9534	{
9535	  /* Handle CONST_INT here, since the value doesn't have a mode
9536	     and we would otherwise be unable to work out the true cost.  */
9537	  *cost = rtx_cost (SET_DEST (x), SET, 0, speed_p);
9538	  outer_code = SET;
9539	  /* Slightly lower the cost of setting a core reg to a constant.
9540	     This helps break up chains and allows for better scheduling.  */
9541	  if (REG_P (SET_DEST (x))
9542	      && REGNO (SET_DEST (x)) <= LR_REGNUM)
9543	    *cost -= 1;
9544	  x = SET_SRC (x);
9545	  /* Immediate moves with an immediate in the range [0, 255] can be
9546	     encoded in 16 bits in Thumb mode.  */
9547	  if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
9548	      && INTVAL (x) >= 0 && INTVAL (x) <=255)
9549	    *cost >>= 1;
9550	  goto const_int_cost;
9551	}
9552
9553      return false;
9554
9555    case MEM:
9556      /* A memory access costs 1 insn if the mode is small, or the address is
9557	 a single register, otherwise it costs one insn per word.  */
9558      if (REG_P (XEXP (x, 0)))
9559	*cost = COSTS_N_INSNS (1);
9560      else if (flag_pic
9561	       && GET_CODE (XEXP (x, 0)) == PLUS
9562	       && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9563	/* This will be split into two instructions.
9564	   See arm.md:calculate_pic_address.  */
9565	*cost = COSTS_N_INSNS (2);
9566      else
9567	*cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9568
9569      /* For speed optimizations, add the costs of the address and
9570	 accessing memory.  */
9571      if (speed_p)
9572#ifdef NOT_YET
9573	*cost += (extra_cost->ldst.load
9574		  + arm_address_cost (XEXP (x, 0), mode,
9575				      ADDR_SPACE_GENERIC, speed_p));
9576#else
9577        *cost += extra_cost->ldst.load;
9578#endif
9579      return true;
9580
9581    case PARALLEL:
9582    {
9583   /* Calculations of LDM costs are complex.  We assume an initial cost
9584   (ldm_1st) which will load the number of registers mentioned in
9585   ldm_regs_per_insn_1st registers; then each additional
9586   ldm_regs_per_insn_subsequent registers cost one more insn.  The
9587   formula for N regs is thus:
9588
9589   ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9590			     + ldm_regs_per_insn_subsequent - 1)
9591			    / ldm_regs_per_insn_subsequent).
9592
9593   Additional costs may also be added for addressing.  A similar
9594   formula is used for STM.  */
9595
9596      bool is_ldm = load_multiple_operation (x, SImode);
9597      bool is_stm = store_multiple_operation (x, SImode);
9598
9599      *cost = COSTS_N_INSNS (1);
9600
9601      if (is_ldm || is_stm)
9602        {
9603	  if (speed_p)
9604	    {
9605	      HOST_WIDE_INT nregs = XVECLEN (x, 0);
9606	      HOST_WIDE_INT regs_per_insn_1st = is_ldm
9607	                              ? extra_cost->ldst.ldm_regs_per_insn_1st
9608	                              : extra_cost->ldst.stm_regs_per_insn_1st;
9609	      HOST_WIDE_INT regs_per_insn_sub = is_ldm
9610	                       ? extra_cost->ldst.ldm_regs_per_insn_subsequent
9611	                       : extra_cost->ldst.stm_regs_per_insn_subsequent;
9612
9613	      *cost += regs_per_insn_1st
9614	               + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
9615					    + regs_per_insn_sub - 1)
9616					  / regs_per_insn_sub);
9617	      return true;
9618	    }
9619
9620        }
9621      return false;
9622    }
9623    case DIV:
9624    case UDIV:
9625      if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9626	  && (mode == SFmode || !TARGET_VFP_SINGLE))
9627	*cost = COSTS_N_INSNS (speed_p
9628			       ? extra_cost->fp[mode != SFmode].div : 1);
9629      else if (mode == SImode && TARGET_IDIV)
9630	*cost = COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 1);
9631      else
9632	*cost = LIBCALL_COST (2);
9633      return false;	/* All arguments must be in registers.  */
9634
9635    case MOD:
9636    case UMOD:
9637      *cost = LIBCALL_COST (2);
9638      return false;	/* All arguments must be in registers.  */
9639
9640    case ROTATE:
9641      if (mode == SImode && REG_P (XEXP (x, 1)))
9642	{
9643	  *cost = (COSTS_N_INSNS (2)
9644		   + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9645	  if (speed_p)
9646	    *cost += extra_cost->alu.shift_reg;
9647	  return true;
9648	}
9649      /* Fall through */
9650    case ROTATERT:
9651    case ASHIFT:
9652    case LSHIFTRT:
9653    case ASHIFTRT:
9654      if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9655	{
9656	  *cost = (COSTS_N_INSNS (3)
9657		   + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9658	  if (speed_p)
9659	    *cost += 2 * extra_cost->alu.shift;
9660	  return true;
9661	}
9662      else if (mode == SImode)
9663	{
9664	  *cost = (COSTS_N_INSNS (1)
9665		   + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9666	  /* Slightly disparage register shifts at -Os, but not by much.  */
9667	  if (!CONST_INT_P (XEXP (x, 1)))
9668	    *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9669		      + rtx_cost (XEXP (x, 1), code, 1, speed_p));
9670	  return true;
9671	}
9672      else if (GET_MODE_CLASS (mode) == MODE_INT
9673	       && GET_MODE_SIZE (mode) < 4)
9674	{
9675	  if (code == ASHIFT)
9676	    {
9677	      *cost = (COSTS_N_INSNS (1)
9678		       + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9679	      /* Slightly disparage register shifts at -Os, but not by
9680	         much.  */
9681	      if (!CONST_INT_P (XEXP (x, 1)))
9682		*cost += (speed_p ? extra_cost->alu.shift_reg : 1
9683			  + rtx_cost (XEXP (x, 1), code, 1, speed_p));
9684	    }
9685	  else if (code == LSHIFTRT || code == ASHIFTRT)
9686	    {
9687	      if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
9688		{
9689		  /* Can use SBFX/UBFX.  */
9690		  *cost = COSTS_N_INSNS (1);
9691		  if (speed_p)
9692		    *cost += extra_cost->alu.bfx;
9693		  *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9694		}
9695	      else
9696		{
9697		  *cost = COSTS_N_INSNS (2);
9698		  *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9699		  if (speed_p)
9700		    {
9701		      if (CONST_INT_P (XEXP (x, 1)))
9702			*cost += 2 * extra_cost->alu.shift;
9703		      else
9704			*cost += (extra_cost->alu.shift
9705				  + extra_cost->alu.shift_reg);
9706		    }
9707		  else
9708		    /* Slightly disparage register shifts.  */
9709		    *cost += !CONST_INT_P (XEXP (x, 1));
9710		}
9711	    }
9712	  else /* Rotates.  */
9713	    {
9714	      *cost = COSTS_N_INSNS (3 + !CONST_INT_P (XEXP (x, 1)));
9715	      *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9716	      if (speed_p)
9717		{
9718		  if (CONST_INT_P (XEXP (x, 1)))
9719		    *cost += (2 * extra_cost->alu.shift
9720			      + extra_cost->alu.log_shift);
9721		  else
9722		    *cost += (extra_cost->alu.shift
9723			      + extra_cost->alu.shift_reg
9724			      + extra_cost->alu.log_shift_reg);
9725		}
9726	    }
9727	  return true;
9728	}
9729
9730      *cost = LIBCALL_COST (2);
9731      return false;
9732
9733    case BSWAP:
9734      if (arm_arch6)
9735        {
9736          if (mode == SImode)
9737            {
9738              *cost = COSTS_N_INSNS (1);
9739              if (speed_p)
9740                *cost += extra_cost->alu.rev;
9741
9742              return false;
9743            }
9744        }
9745      else
9746        {
9747        /* No rev instruction available.  Look at arm_legacy_rev
9748           and thumb_legacy_rev for the form of RTL used then.  */
9749          if (TARGET_THUMB)
9750            {
9751              *cost = COSTS_N_INSNS (10);
9752
9753              if (speed_p)
9754                {
9755                  *cost += 6 * extra_cost->alu.shift;
9756                  *cost += 3 * extra_cost->alu.logical;
9757                }
9758            }
9759          else
9760            {
9761              *cost = COSTS_N_INSNS (5);
9762
9763              if (speed_p)
9764                {
9765                  *cost += 2 * extra_cost->alu.shift;
9766                  *cost += extra_cost->alu.arith_shift;
9767                  *cost += 2 * extra_cost->alu.logical;
9768                }
9769            }
9770          return true;
9771        }
9772      return false;
9773
9774    case MINUS:
9775      if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9776	  && (mode == SFmode || !TARGET_VFP_SINGLE))
9777	{
9778	  *cost = COSTS_N_INSNS (1);
9779	  if (GET_CODE (XEXP (x, 0)) == MULT
9780	      || GET_CODE (XEXP (x, 1)) == MULT)
9781	    {
9782	      rtx mul_op0, mul_op1, sub_op;
9783
9784	      if (speed_p)
9785		*cost += extra_cost->fp[mode != SFmode].mult_addsub;
9786
9787	      if (GET_CODE (XEXP (x, 0)) == MULT)
9788		{
9789		  mul_op0 = XEXP (XEXP (x, 0), 0);
9790		  mul_op1 = XEXP (XEXP (x, 0), 1);
9791		  sub_op = XEXP (x, 1);
9792		}
9793	      else
9794		{
9795		  mul_op0 = XEXP (XEXP (x, 1), 0);
9796		  mul_op1 = XEXP (XEXP (x, 1), 1);
9797		  sub_op = XEXP (x, 0);
9798		}
9799
9800	      /* The first operand of the multiply may be optionally
9801		 negated.  */
9802	      if (GET_CODE (mul_op0) == NEG)
9803		mul_op0 = XEXP (mul_op0, 0);
9804
9805	      *cost += (rtx_cost (mul_op0, code, 0, speed_p)
9806			+ rtx_cost (mul_op1, code, 0, speed_p)
9807			+ rtx_cost (sub_op, code, 0, speed_p));
9808
9809	      return true;
9810	    }
9811
9812	  if (speed_p)
9813	    *cost += extra_cost->fp[mode != SFmode].addsub;
9814	  return false;
9815	}
9816
9817      if (mode == SImode)
9818	{
9819	  rtx shift_by_reg = NULL;
9820	  rtx shift_op;
9821	  rtx non_shift_op;
9822
9823	  *cost = COSTS_N_INSNS (1);
9824
9825	  shift_op = shifter_op_p (XEXP (x, 0), &shift_by_reg);
9826	  if (shift_op == NULL)
9827	    {
9828	      shift_op = shifter_op_p (XEXP (x, 1), &shift_by_reg);
9829	      non_shift_op = XEXP (x, 0);
9830	    }
9831	  else
9832	    non_shift_op = XEXP (x, 1);
9833
9834	  if (shift_op != NULL)
9835	    {
9836	      if (shift_by_reg != NULL)
9837		{
9838		  if (speed_p)
9839		    *cost += extra_cost->alu.arith_shift_reg;
9840		  *cost += rtx_cost (shift_by_reg, code, 0, speed_p);
9841		}
9842	      else if (speed_p)
9843		*cost += extra_cost->alu.arith_shift;
9844
9845	      *cost += (rtx_cost (shift_op, code, 0, speed_p)
9846			+ rtx_cost (non_shift_op, code, 0, speed_p));
9847	      return true;
9848	    }
9849
9850	  if (arm_arch_thumb2
9851	      && GET_CODE (XEXP (x, 1)) == MULT)
9852	    {
9853	      /* MLS.  */
9854	      if (speed_p)
9855		*cost += extra_cost->mult[0].add;
9856	      *cost += (rtx_cost (XEXP (x, 0), MINUS, 0, speed_p)
9857			+ rtx_cost (XEXP (XEXP (x, 1), 0), MULT, 0, speed_p)
9858			+ rtx_cost (XEXP (XEXP (x, 1), 1), MULT, 1, speed_p));
9859	      return true;
9860	    }
9861
9862	  if (CONST_INT_P (XEXP (x, 0)))
9863	    {
9864	      int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
9865					    INTVAL (XEXP (x, 0)), NULL_RTX,
9866					    NULL_RTX, 1, 0);
9867	      *cost = COSTS_N_INSNS (insns);
9868	      if (speed_p)
9869		*cost += insns * extra_cost->alu.arith;
9870	      *cost += rtx_cost (XEXP (x, 1), code, 1, speed_p);
9871	      return true;
9872	    }
9873	  else if (speed_p)
9874	    *cost += extra_cost->alu.arith;
9875
9876	  return false;
9877	}
9878
9879      if (GET_MODE_CLASS (mode) == MODE_INT
9880	  && GET_MODE_SIZE (mode) < 4)
9881	{
9882	  rtx shift_op, shift_reg;
9883	  shift_reg = NULL;
9884
9885	  /* We check both sides of the MINUS for shifter operands since,
9886	     unlike PLUS, it's not commutative.  */
9887
9888	  HANDLE_NARROW_SHIFT_ARITH (MINUS, 0)
9889	  HANDLE_NARROW_SHIFT_ARITH (MINUS, 1)
9890
9891	  /* Slightly disparage, as we might need to widen the result.  */
9892	  *cost = 1 + COSTS_N_INSNS (1);
9893	  if (speed_p)
9894	    *cost += extra_cost->alu.arith;
9895
9896	  if (CONST_INT_P (XEXP (x, 0)))
9897	    {
9898	      *cost += rtx_cost (XEXP (x, 1), code, 1, speed_p);
9899	      return true;
9900	    }
9901
9902	  return false;
9903	}
9904
9905      if (mode == DImode)
9906	{
9907	  *cost = COSTS_N_INSNS (2);
9908
9909	  if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
9910	    {
9911	      rtx op1 = XEXP (x, 1);
9912
9913	      if (speed_p)
9914		*cost += 2 * extra_cost->alu.arith;
9915
9916	      if (GET_CODE (op1) == ZERO_EXTEND)
9917		*cost += rtx_cost (XEXP (op1, 0), ZERO_EXTEND, 0, speed_p);
9918	      else
9919		*cost += rtx_cost (op1, MINUS, 1, speed_p);
9920	      *cost += rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND,
9921				 0, speed_p);
9922	      return true;
9923	    }
9924	  else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9925	    {
9926	      if (speed_p)
9927		*cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
9928	      *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), SIGN_EXTEND,
9929				  0, speed_p)
9930			+ rtx_cost (XEXP (x, 1), MINUS, 1, speed_p));
9931	      return true;
9932	    }
9933	  else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9934		   || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
9935	    {
9936	      if (speed_p)
9937		*cost += (extra_cost->alu.arith
9938			  + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9939			     ? extra_cost->alu.arith
9940			     : extra_cost->alu.arith_shift));
9941	      *cost += (rtx_cost (XEXP (x, 0), MINUS, 0, speed_p)
9942			+ rtx_cost (XEXP (XEXP (x, 1), 0),
9943				    GET_CODE (XEXP (x, 1)), 0, speed_p));
9944	      return true;
9945	    }
9946
9947	  if (speed_p)
9948	    *cost += 2 * extra_cost->alu.arith;
9949	  return false;
9950	}
9951
9952      /* Vector mode?  */
9953
9954      *cost = LIBCALL_COST (2);
9955      return false;
9956
9957    case PLUS:
9958      if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9959	  && (mode == SFmode || !TARGET_VFP_SINGLE))
9960	{
9961	  *cost = COSTS_N_INSNS (1);
9962	  if (GET_CODE (XEXP (x, 0)) == MULT)
9963	    {
9964	      rtx mul_op0, mul_op1, add_op;
9965
9966	      if (speed_p)
9967		*cost += extra_cost->fp[mode != SFmode].mult_addsub;
9968
9969	      mul_op0 = XEXP (XEXP (x, 0), 0);
9970	      mul_op1 = XEXP (XEXP (x, 0), 1);
9971	      add_op = XEXP (x, 1);
9972
9973	      *cost += (rtx_cost (mul_op0, code, 0, speed_p)
9974			+ rtx_cost (mul_op1, code, 0, speed_p)
9975			+ rtx_cost (add_op, code, 0, speed_p));
9976
9977	      return true;
9978	    }
9979
9980	  if (speed_p)
9981	    *cost += extra_cost->fp[mode != SFmode].addsub;
9982	  return false;
9983	}
9984      else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9985	{
9986	  *cost = LIBCALL_COST (2);
9987	  return false;
9988	}
9989
9990	/* Narrow modes can be synthesized in SImode, but the range
9991	   of useful sub-operations is limited.  Check for shift operations
9992	   on one of the operands.  Only left shifts can be used in the
9993	   narrow modes.  */
9994      if (GET_MODE_CLASS (mode) == MODE_INT
9995	  && GET_MODE_SIZE (mode) < 4)
9996	{
9997	  rtx shift_op, shift_reg;
9998	  shift_reg = NULL;
9999
10000	  HANDLE_NARROW_SHIFT_ARITH (PLUS, 0)
10001
10002	  if (CONST_INT_P (XEXP (x, 1)))
10003	    {
10004	      int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
10005					    INTVAL (XEXP (x, 1)), NULL_RTX,
10006					    NULL_RTX, 1, 0);
10007	      *cost = COSTS_N_INSNS (insns);
10008	      if (speed_p)
10009		*cost += insns * extra_cost->alu.arith;
10010	      /* Slightly penalize a narrow operation as the result may
10011		 need widening.  */
10012	      *cost += 1 + rtx_cost (XEXP (x, 0), PLUS, 0, speed_p);
10013	      return true;
10014	    }
10015
10016	  /* Slightly penalize a narrow operation as the result may
10017	     need widening.  */
10018	  *cost = 1 + COSTS_N_INSNS (1);
10019	  if (speed_p)
10020	    *cost += extra_cost->alu.arith;
10021
10022	  return false;
10023	}
10024
10025      if (mode == SImode)
10026	{
10027	  rtx shift_op, shift_reg;
10028
10029	  *cost = COSTS_N_INSNS (1);
10030	  if (TARGET_INT_SIMD
10031	      && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10032		  || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
10033	    {
10034	      /* UXTA[BH] or SXTA[BH].  */
10035	      if (speed_p)
10036		*cost += extra_cost->alu.extend_arith;
10037	      *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND, 0,
10038				  speed_p)
10039			+ rtx_cost (XEXP (x, 1), PLUS, 0, speed_p));
10040	      return true;
10041	    }
10042
10043	  shift_reg = NULL;
10044	  shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10045	  if (shift_op != NULL)
10046	    {
10047	      if (shift_reg)
10048		{
10049		  if (speed_p)
10050		    *cost += extra_cost->alu.arith_shift_reg;
10051		  *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
10052		}
10053	      else if (speed_p)
10054		*cost += extra_cost->alu.arith_shift;
10055
10056	      *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
10057			+ rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
10058	      return true;
10059	    }
10060	  if (GET_CODE (XEXP (x, 0)) == MULT)
10061	    {
10062	      rtx mul_op = XEXP (x, 0);
10063
10064	      *cost = COSTS_N_INSNS (1);
10065
10066	      if (TARGET_DSP_MULTIPLY
10067		  && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
10068		       && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
10069			   || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
10070			       && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
10071			       && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
10072		      || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
10073			  && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
10074			  && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
10075			  && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
10076			      || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
10077				  && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
10078				  && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
10079				      == 16))))))
10080		{
10081		  /* SMLA[BT][BT].  */
10082		  if (speed_p)
10083		    *cost += extra_cost->mult[0].extend_add;
10084		  *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0),
10085				      SIGN_EXTEND, 0, speed_p)
10086			    + rtx_cost (XEXP (XEXP (mul_op, 1), 0),
10087					SIGN_EXTEND, 0, speed_p)
10088			    + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
10089		  return true;
10090		}
10091
10092	      if (speed_p)
10093		*cost += extra_cost->mult[0].add;
10094	      *cost += (rtx_cost (XEXP (mul_op, 0), MULT, 0, speed_p)
10095			+ rtx_cost (XEXP (mul_op, 1), MULT, 1, speed_p)
10096			+ rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
10097	      return true;
10098	    }
10099	  if (CONST_INT_P (XEXP (x, 1)))
10100	    {
10101	      int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
10102					    INTVAL (XEXP (x, 1)), NULL_RTX,
10103					    NULL_RTX, 1, 0);
10104	      *cost = COSTS_N_INSNS (insns);
10105	      if (speed_p)
10106		*cost += insns * extra_cost->alu.arith;
10107	      *cost += rtx_cost (XEXP (x, 0), PLUS, 0, speed_p);
10108	      return true;
10109	    }
10110	  else if (speed_p)
10111	    *cost += extra_cost->alu.arith;
10112
10113	  return false;
10114	}
10115
10116      if (mode == DImode)
10117	{
10118	  if (arm_arch3m
10119	      && GET_CODE (XEXP (x, 0)) == MULT
10120	      && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
10121		   && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
10122		  || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
10123		      && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
10124	    {
10125	      *cost = COSTS_N_INSNS (1);
10126	      if (speed_p)
10127		*cost += extra_cost->mult[1].extend_add;
10128	      *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
10129				  ZERO_EXTEND, 0, speed_p)
10130			+ rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0),
10131				    ZERO_EXTEND, 0, speed_p)
10132			+ rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
10133	      return true;
10134	    }
10135
10136	  *cost = COSTS_N_INSNS (2);
10137
10138	  if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10139	      || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
10140	    {
10141	      if (speed_p)
10142		*cost += (extra_cost->alu.arith
10143			  + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10144			     ? extra_cost->alu.arith
10145			     : extra_cost->alu.arith_shift));
10146
10147	      *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND, 0,
10148				  speed_p)
10149			+ rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
10150	      return true;
10151	    }
10152
10153	  if (speed_p)
10154	    *cost += 2 * extra_cost->alu.arith;
10155	  return false;
10156	}
10157
10158      /* Vector mode?  */
10159      *cost = LIBCALL_COST (2);
10160      return false;
10161    case IOR:
10162      if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
10163        {
10164          *cost = COSTS_N_INSNS (1);
10165          if (speed_p)
10166            *cost += extra_cost->alu.rev;
10167
10168          return true;
10169        }
10170    /* Fall through.  */
10171    case AND: case XOR:
10172      if (mode == SImode)
10173	{
10174	  enum rtx_code subcode = GET_CODE (XEXP (x, 0));
10175	  rtx op0 = XEXP (x, 0);
10176	  rtx shift_op, shift_reg;
10177
10178	  *cost = COSTS_N_INSNS (1);
10179
10180	  if (subcode == NOT
10181	      && (code == AND
10182		  || (code == IOR && TARGET_THUMB2)))
10183	    op0 = XEXP (op0, 0);
10184
10185	  shift_reg = NULL;
10186	  shift_op = shifter_op_p (op0, &shift_reg);
10187	  if (shift_op != NULL)
10188	    {
10189	      if (shift_reg)
10190		{
10191		  if (speed_p)
10192		    *cost += extra_cost->alu.log_shift_reg;
10193		  *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
10194		}
10195	      else if (speed_p)
10196		*cost += extra_cost->alu.log_shift;
10197
10198	      *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
10199			+ rtx_cost (XEXP (x, 1), code, 1, speed_p));
10200	      return true;
10201	    }
10202
10203	  if (CONST_INT_P (XEXP (x, 1)))
10204	    {
10205	      int insns = arm_gen_constant (code, SImode, NULL_RTX,
10206					    INTVAL (XEXP (x, 1)), NULL_RTX,
10207					    NULL_RTX, 1, 0);
10208
10209	      *cost = COSTS_N_INSNS (insns);
10210	      if (speed_p)
10211		*cost += insns * extra_cost->alu.logical;
10212	      *cost += rtx_cost (op0, code, 0, speed_p);
10213	      return true;
10214	    }
10215
10216	  if (speed_p)
10217	    *cost += extra_cost->alu.logical;
10218	  *cost += (rtx_cost (op0, code, 0, speed_p)
10219		    + rtx_cost (XEXP (x, 1), code, 1, speed_p));
10220	  return true;
10221	}
10222
10223      if (mode == DImode)
10224	{
10225	  rtx op0 = XEXP (x, 0);
10226	  enum rtx_code subcode = GET_CODE (op0);
10227
10228	  *cost = COSTS_N_INSNS (2);
10229
10230	  if (subcode == NOT
10231	      && (code == AND
10232		  || (code == IOR && TARGET_THUMB2)))
10233	    op0 = XEXP (op0, 0);
10234
10235	  if (GET_CODE (op0) == ZERO_EXTEND)
10236	    {
10237	      if (speed_p)
10238		*cost += 2 * extra_cost->alu.logical;
10239
10240	      *cost += (rtx_cost (XEXP (op0, 0), ZERO_EXTEND, 0, speed_p)
10241			+ rtx_cost (XEXP (x, 1), code, 0, speed_p));
10242	      return true;
10243	    }
10244	  else if (GET_CODE (op0) == SIGN_EXTEND)
10245	    {
10246	      if (speed_p)
10247		*cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
10248
10249	      *cost += (rtx_cost (XEXP (op0, 0), SIGN_EXTEND, 0, speed_p)
10250			+ rtx_cost (XEXP (x, 1), code, 0, speed_p));
10251	      return true;
10252	    }
10253
10254	  if (speed_p)
10255	    *cost += 2 * extra_cost->alu.logical;
10256
10257	  return true;
10258	}
10259      /* Vector mode?  */
10260
10261      *cost = LIBCALL_COST (2);
10262      return false;
10263
10264    case MULT:
10265      if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10266	  && (mode == SFmode || !TARGET_VFP_SINGLE))
10267	{
10268	  rtx op0 = XEXP (x, 0);
10269
10270	  *cost = COSTS_N_INSNS (1);
10271
10272	  if (GET_CODE (op0) == NEG && !flag_rounding_math)
10273	    op0 = XEXP (op0, 0);
10274
10275	  if (speed_p)
10276	    *cost += extra_cost->fp[mode != SFmode].mult;
10277
10278	  *cost += (rtx_cost (op0, MULT, 0, speed_p)
10279		    + rtx_cost (XEXP (x, 1), MULT, 1, speed_p));
10280	  return true;
10281	}
10282      else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10283	{
10284	  *cost = LIBCALL_COST (2);
10285	  return false;
10286	}
10287
10288      if (mode == SImode)
10289	{
10290	  *cost = COSTS_N_INSNS (1);
10291	  if (TARGET_DSP_MULTIPLY
10292	      && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10293		   && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10294		       || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10295			   && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10296			   && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
10297		  || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10298		      && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10299		      && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
10300		      && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10301			  || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10302			      && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10303			      && (INTVAL (XEXP (XEXP (x, 1), 1))
10304				  == 16))))))
10305	    {
10306	      /* SMUL[TB][TB].  */
10307	      if (speed_p)
10308		*cost += extra_cost->mult[0].extend;
10309	      *cost += rtx_cost (XEXP (XEXP (x, 0), 0),
10310				 SIGN_EXTEND, 0, speed_p);
10311	      *cost += rtx_cost (XEXP (XEXP (x, 1), 0),
10312				 SIGN_EXTEND, 1, speed_p);
10313	      return true;
10314	    }
10315	  if (speed_p)
10316	    *cost += extra_cost->mult[0].simple;
10317	  return false;
10318	}
10319
10320      if (mode == DImode)
10321	{
10322	  if (arm_arch3m
10323	      && ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10324		   && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
10325		  || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10326		      && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)))
10327	    {
10328	      *cost = COSTS_N_INSNS (1);
10329	      if (speed_p)
10330		*cost += extra_cost->mult[1].extend;
10331	      *cost += (rtx_cost (XEXP (XEXP (x, 0), 0),
10332				  ZERO_EXTEND, 0, speed_p)
10333			+ rtx_cost (XEXP (XEXP (x, 1), 0),
10334				    ZERO_EXTEND, 0, speed_p));
10335	      return true;
10336	    }
10337
10338	  *cost = LIBCALL_COST (2);
10339	  return false;
10340	}
10341
10342      /* Vector mode?  */
10343      *cost = LIBCALL_COST (2);
10344      return false;
10345
10346    case NEG:
10347      if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10348	  && (mode == SFmode || !TARGET_VFP_SINGLE))
10349	{
10350	  if (GET_CODE (XEXP (x, 0)) == MULT)
10351	    {
10352	      /* VNMUL.  */
10353	      *cost = rtx_cost (XEXP (x, 0), NEG, 0, speed_p);
10354	      return true;
10355	    }
10356
10357	  *cost = COSTS_N_INSNS (1);
10358	  if (speed_p)
10359	    *cost += extra_cost->fp[mode != SFmode].neg;
10360
10361	  return false;
10362	}
10363      else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10364	{
10365	  *cost = LIBCALL_COST (1);
10366	  return false;
10367	}
10368
10369      if (mode == SImode)
10370	{
10371	  if (GET_CODE (XEXP (x, 0)) == ABS)
10372	    {
10373	      *cost = COSTS_N_INSNS (2);
10374	      /* Assume the non-flag-changing variant.  */
10375	      if (speed_p)
10376		*cost += (extra_cost->alu.log_shift
10377			  + extra_cost->alu.arith_shift);
10378	      *cost += rtx_cost (XEXP (XEXP (x, 0), 0), ABS, 0, speed_p);
10379	      return true;
10380	    }
10381
10382	  if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
10383	      || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
10384	    {
10385	      *cost = COSTS_N_INSNS (2);
10386	      /* No extra cost for MOV imm and MVN imm.  */
10387	      /* If the comparison op is using the flags, there's no further
10388		 cost, otherwise we need to add the cost of the comparison.  */
10389	      if (!(REG_P (XEXP (XEXP (x, 0), 0))
10390		    && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
10391		    && XEXP (XEXP (x, 0), 1) == const0_rtx))
10392		{
10393		  *cost += (COSTS_N_INSNS (1)
10394			    + rtx_cost (XEXP (XEXP (x, 0), 0), COMPARE, 0,
10395					speed_p)
10396			    + rtx_cost (XEXP (XEXP (x, 0), 1), COMPARE, 1,
10397					speed_p));
10398		  if (speed_p)
10399		    *cost += extra_cost->alu.arith;
10400		}
10401	      return true;
10402	    }
10403	  *cost = COSTS_N_INSNS (1);
10404	  if (speed_p)
10405	    *cost += extra_cost->alu.arith;
10406	  return false;
10407	}
10408
10409      if (GET_MODE_CLASS (mode) == MODE_INT
10410	  && GET_MODE_SIZE (mode) < 4)
10411	{
10412	  /* Slightly disparage, as we might need an extend operation.  */
10413	  *cost = 1 + COSTS_N_INSNS (1);
10414	  if (speed_p)
10415	    *cost += extra_cost->alu.arith;
10416	  return false;
10417	}
10418
10419      if (mode == DImode)
10420	{
10421	  *cost = COSTS_N_INSNS (2);
10422	  if (speed_p)
10423	    *cost += 2 * extra_cost->alu.arith;
10424	  return false;
10425	}
10426
10427      /* Vector mode?  */
10428      *cost = LIBCALL_COST (1);
10429      return false;
10430
10431    case NOT:
10432      if (mode == SImode)
10433	{
10434	  rtx shift_op;
10435	  rtx shift_reg = NULL;
10436
10437	  *cost = COSTS_N_INSNS (1);
10438	  shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10439
10440	  if (shift_op)
10441	    {
10442	      if (shift_reg != NULL)
10443		{
10444		  if (speed_p)
10445		    *cost += extra_cost->alu.log_shift_reg;
10446		  *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
10447		}
10448	      else if (speed_p)
10449		*cost += extra_cost->alu.log_shift;
10450	      *cost += rtx_cost (shift_op, ASHIFT, 0, speed_p);
10451	      return true;
10452	    }
10453
10454	  if (speed_p)
10455	    *cost += extra_cost->alu.logical;
10456	  return false;
10457	}
10458      if (mode == DImode)
10459	{
10460	  *cost = COSTS_N_INSNS (2);
10461	  return false;
10462	}
10463
10464      /* Vector mode?  */
10465
10466      *cost += LIBCALL_COST (1);
10467      return false;
10468
10469    case IF_THEN_ELSE:
10470      {
10471        if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
10472	  {
10473	    *cost = COSTS_N_INSNS (4);
10474	    return true;
10475	  }
10476	int op1cost = rtx_cost (XEXP (x, 1), SET, 1, speed_p);
10477	int op2cost = rtx_cost (XEXP (x, 2), SET, 1, speed_p);
10478
10479	*cost = rtx_cost (XEXP (x, 0), IF_THEN_ELSE, 0, speed_p);
10480	/* Assume that if one arm of the if_then_else is a register,
10481	   that it will be tied with the result and eliminate the
10482	   conditional insn.  */
10483	if (REG_P (XEXP (x, 1)))
10484	  *cost += op2cost;
10485	else if (REG_P (XEXP (x, 2)))
10486	  *cost += op1cost;
10487	else
10488	  {
10489	    if (speed_p)
10490	      {
10491		if (extra_cost->alu.non_exec_costs_exec)
10492		  *cost += op1cost + op2cost + extra_cost->alu.non_exec;
10493		else
10494		  *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
10495	      }
10496	    else
10497	      *cost += op1cost + op2cost;
10498	  }
10499      }
10500      return true;
10501
10502    case COMPARE:
10503      if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
10504	*cost = 0;
10505      else
10506	{
10507	  machine_mode op0mode;
10508	  /* We'll mostly assume that the cost of a compare is the cost of the
10509	     LHS.  However, there are some notable exceptions.  */
10510
10511	  /* Floating point compares are never done as side-effects.  */
10512	  op0mode = GET_MODE (XEXP (x, 0));
10513	  if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
10514	      && (op0mode == SFmode || !TARGET_VFP_SINGLE))
10515	    {
10516	      *cost = COSTS_N_INSNS (1);
10517	      if (speed_p)
10518		*cost += extra_cost->fp[op0mode != SFmode].compare;
10519
10520	      if (XEXP (x, 1) == CONST0_RTX (op0mode))
10521		{
10522		  *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10523		  return true;
10524		}
10525
10526	      return false;
10527	    }
10528	  else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
10529	    {
10530	      *cost = LIBCALL_COST (2);
10531	      return false;
10532	    }
10533
10534	  /* DImode compares normally take two insns.  */
10535	  if (op0mode == DImode)
10536	    {
10537	      *cost = COSTS_N_INSNS (2);
10538	      if (speed_p)
10539		*cost += 2 * extra_cost->alu.arith;
10540	      return false;
10541	    }
10542
10543	  if (op0mode == SImode)
10544	    {
10545	      rtx shift_op;
10546	      rtx shift_reg;
10547
10548	      if (XEXP (x, 1) == const0_rtx
10549		  && !(REG_P (XEXP (x, 0))
10550		       || (GET_CODE (XEXP (x, 0)) == SUBREG
10551			   && REG_P (SUBREG_REG (XEXP (x, 0))))))
10552		{
10553		  *cost = rtx_cost (XEXP (x, 0), COMPARE, 0, speed_p);
10554
10555		  /* Multiply operations that set the flags are often
10556		     significantly more expensive.  */
10557		  if (speed_p
10558		      && GET_CODE (XEXP (x, 0)) == MULT
10559		      && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
10560		    *cost += extra_cost->mult[0].flag_setting;
10561
10562		  if (speed_p
10563		      && GET_CODE (XEXP (x, 0)) == PLUS
10564		      && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10565		      && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
10566							    0), 1), mode))
10567		    *cost += extra_cost->mult[0].flag_setting;
10568		  return true;
10569		}
10570
10571	      shift_reg = NULL;
10572	      shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10573	      if (shift_op != NULL)
10574		{
10575		  *cost = COSTS_N_INSNS (1);
10576		  if (shift_reg != NULL)
10577		    {
10578		      *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
10579		      if (speed_p)
10580			*cost += extra_cost->alu.arith_shift_reg;
10581		    }
10582		  else if (speed_p)
10583		    *cost += extra_cost->alu.arith_shift;
10584		  *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
10585			    + rtx_cost (XEXP (x, 1), COMPARE, 1, speed_p));
10586		  return true;
10587		}
10588
10589	      *cost = COSTS_N_INSNS (1);
10590	      if (speed_p)
10591		*cost += extra_cost->alu.arith;
10592	      if (CONST_INT_P (XEXP (x, 1))
10593		  && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10594		{
10595		  *cost += rtx_cost (XEXP (x, 0), COMPARE, 0, speed_p);
10596		  return true;
10597		}
10598	      return false;
10599	    }
10600
10601	  /* Vector mode?  */
10602
10603	  *cost = LIBCALL_COST (2);
10604	  return false;
10605	}
10606      return true;
10607
10608    case EQ:
10609    case NE:
10610    case LT:
10611    case LE:
10612    case GT:
10613    case GE:
10614    case LTU:
10615    case LEU:
10616    case GEU:
10617    case GTU:
10618    case ORDERED:
10619    case UNORDERED:
10620    case UNEQ:
10621    case UNLE:
10622    case UNLT:
10623    case UNGE:
10624    case UNGT:
10625    case LTGT:
10626      if (outer_code == SET)
10627	{
10628	  /* Is it a store-flag operation?  */
10629	  if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10630	      && XEXP (x, 1) == const0_rtx)
10631	    {
10632	      /* Thumb also needs an IT insn.  */
10633	      *cost = COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
10634	      return true;
10635	    }
10636	  if (XEXP (x, 1) == const0_rtx)
10637	    {
10638	      switch (code)
10639		{
10640		case LT:
10641		  /* LSR Rd, Rn, #31.  */
10642		  *cost = COSTS_N_INSNS (1);
10643		  if (speed_p)
10644		    *cost += extra_cost->alu.shift;
10645		  break;
10646
10647		case EQ:
10648		  /* RSBS T1, Rn, #0
10649		     ADC  Rd, Rn, T1.  */
10650
10651		case NE:
10652		  /* SUBS T1, Rn, #1
10653		     SBC  Rd, Rn, T1.  */
10654		  *cost = COSTS_N_INSNS (2);
10655		  break;
10656
10657		case LE:
10658		  /* RSBS T1, Rn, Rn, LSR #31
10659		     ADC  Rd, Rn, T1. */
10660		  *cost = COSTS_N_INSNS (2);
10661		  if (speed_p)
10662		    *cost += extra_cost->alu.arith_shift;
10663		  break;
10664
10665		case GT:
10666		  /* RSB  Rd, Rn, Rn, ASR #1
10667		     LSR  Rd, Rd, #31.  */
10668		  *cost = COSTS_N_INSNS (2);
10669		  if (speed_p)
10670		    *cost += (extra_cost->alu.arith_shift
10671			      + extra_cost->alu.shift);
10672		  break;
10673
10674		case GE:
10675		  /* ASR  Rd, Rn, #31
10676		     ADD  Rd, Rn, #1.  */
10677		  *cost = COSTS_N_INSNS (2);
10678		  if (speed_p)
10679		    *cost += extra_cost->alu.shift;
10680		  break;
10681
10682		default:
10683		  /* Remaining cases are either meaningless or would take
10684		     three insns anyway.  */
10685		  *cost = COSTS_N_INSNS (3);
10686		  break;
10687		}
10688	      *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10689	      return true;
10690	    }
10691	  else
10692	    {
10693	      *cost = COSTS_N_INSNS (TARGET_THUMB ? 4 : 3);
10694	      if (CONST_INT_P (XEXP (x, 1))
10695		  && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10696		{
10697		  *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10698		  return true;
10699		}
10700
10701	      return false;
10702	    }
10703	}
10704      /* Not directly inside a set.  If it involves the condition code
10705	 register it must be the condition for a branch, cond_exec or
10706	 I_T_E operation.  Since the comparison is performed elsewhere
10707	 this is just the control part which has no additional
10708	 cost.  */
10709      else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10710	       && XEXP (x, 1) == const0_rtx)
10711	{
10712	  *cost = 0;
10713	  return true;
10714	}
10715      return false;
10716
10717    case ABS:
10718      if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10719	  && (mode == SFmode || !TARGET_VFP_SINGLE))
10720	{
10721	  *cost = COSTS_N_INSNS (1);
10722	  if (speed_p)
10723	    *cost += extra_cost->fp[mode != SFmode].neg;
10724
10725	  return false;
10726	}
10727      else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10728	{
10729	  *cost = LIBCALL_COST (1);
10730	  return false;
10731	}
10732
10733      if (mode == SImode)
10734	{
10735	  *cost = COSTS_N_INSNS (1);
10736	  if (speed_p)
10737	    *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
10738	  return false;
10739	}
10740      /* Vector mode?  */
10741      *cost = LIBCALL_COST (1);
10742      return false;
10743
10744    case SIGN_EXTEND:
10745      if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
10746	  && MEM_P (XEXP (x, 0)))
10747	{
10748	  *cost = rtx_cost (XEXP (x, 0), code, 0, speed_p);
10749
10750	  if (mode == DImode)
10751	    *cost += COSTS_N_INSNS (1);
10752
10753	  if (!speed_p)
10754	    return true;
10755
10756	  if (GET_MODE (XEXP (x, 0)) == SImode)
10757	    *cost += extra_cost->ldst.load;
10758	  else
10759	    *cost += extra_cost->ldst.load_sign_extend;
10760
10761	  if (mode == DImode)
10762	    *cost += extra_cost->alu.shift;
10763
10764	  return true;
10765	}
10766
10767      /* Widening from less than 32-bits requires an extend operation.  */
10768      if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10769	{
10770	  /* We have SXTB/SXTH.  */
10771	  *cost = COSTS_N_INSNS (1);
10772	  *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10773	  if (speed_p)
10774	    *cost += extra_cost->alu.extend;
10775	}
10776      else if (GET_MODE (XEXP (x, 0)) != SImode)
10777	{
10778	  /* Needs two shifts.  */
10779	  *cost = COSTS_N_INSNS (2);
10780	  *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10781	  if (speed_p)
10782	    *cost += 2 * extra_cost->alu.shift;
10783	}
10784
10785      /* Widening beyond 32-bits requires one more insn.  */
10786      if (mode == DImode)
10787	{
10788	  *cost += COSTS_N_INSNS (1);
10789	  if (speed_p)
10790	    *cost += extra_cost->alu.shift;
10791	}
10792
10793      return true;
10794
10795    case ZERO_EXTEND:
10796      if ((arm_arch4
10797	   || GET_MODE (XEXP (x, 0)) == SImode
10798	   || GET_MODE (XEXP (x, 0)) == QImode)
10799	  && MEM_P (XEXP (x, 0)))
10800	{
10801	  *cost = rtx_cost (XEXP (x, 0), code, 0, speed_p);
10802
10803	  if (mode == DImode)
10804	    *cost += COSTS_N_INSNS (1);  /* No speed penalty.  */
10805
10806	  return true;
10807	}
10808
10809      /* Widening from less than 32-bits requires an extend operation.  */
10810      if (GET_MODE (XEXP (x, 0)) == QImode)
10811	{
10812	  /* UXTB can be a shorter instruction in Thumb2, but it might
10813	     be slower than the AND Rd, Rn, #255 alternative.  When
10814	     optimizing for speed it should never be slower to use
10815	     AND, and we don't really model 16-bit vs 32-bit insns
10816	     here.  */
10817	  *cost = COSTS_N_INSNS (1);
10818	  if (speed_p)
10819	    *cost += extra_cost->alu.logical;
10820	}
10821      else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10822	{
10823	  /* We have UXTB/UXTH.  */
10824	  *cost = COSTS_N_INSNS (1);
10825	  *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10826	  if (speed_p)
10827	    *cost += extra_cost->alu.extend;
10828	}
10829      else if (GET_MODE (XEXP (x, 0)) != SImode)
10830	{
10831	  /* Needs two shifts.  It's marginally preferable to use
10832	     shifts rather than two BIC instructions as the second
10833	     shift may merge with a subsequent insn as a shifter
10834	     op.  */
10835	  *cost = COSTS_N_INSNS (2);
10836	  *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10837	  if (speed_p)
10838	    *cost += 2 * extra_cost->alu.shift;
10839	}
10840      else  /* GET_MODE (XEXP (x, 0)) == SImode.  */
10841        *cost = COSTS_N_INSNS (1);
10842
10843      /* Widening beyond 32-bits requires one more insn.  */
10844      if (mode == DImode)
10845	{
10846	  *cost += COSTS_N_INSNS (1);	/* No speed penalty.  */
10847	}
10848
10849      return true;
10850
10851    case CONST_INT:
10852      *cost = 0;
10853      /* CONST_INT has no mode, so we cannot tell for sure how many
10854	 insns are really going to be needed.  The best we can do is
10855	 look at the value passed.  If it fits in SImode, then assume
10856	 that's the mode it will be used for.  Otherwise assume it
10857	 will be used in DImode.  */
10858      if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
10859	mode = SImode;
10860      else
10861	mode = DImode;
10862
10863      /* Avoid blowing up in arm_gen_constant ().  */
10864      if (!(outer_code == PLUS
10865	    || outer_code == AND
10866	    || outer_code == IOR
10867	    || outer_code == XOR
10868	    || outer_code == MINUS))
10869	outer_code = SET;
10870
10871    const_int_cost:
10872      if (mode == SImode)
10873	{
10874	  *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
10875						    INTVAL (x), NULL, NULL,
10876						    0, 0));
10877	  /* Extra costs?  */
10878	}
10879      else
10880	{
10881	  *cost += COSTS_N_INSNS (arm_gen_constant
10882				  (outer_code, SImode, NULL,
10883				   trunc_int_for_mode (INTVAL (x), SImode),
10884				   NULL, NULL, 0, 0)
10885				  + arm_gen_constant (outer_code, SImode, NULL,
10886						      INTVAL (x) >> 32, NULL,
10887						      NULL, 0, 0));
10888	  /* Extra costs?  */
10889	}
10890
10891      return true;
10892
10893    case CONST:
10894    case LABEL_REF:
10895    case SYMBOL_REF:
10896      if (speed_p)
10897	{
10898	  if (arm_arch_thumb2 && !flag_pic)
10899	    *cost = COSTS_N_INSNS (2);
10900	  else
10901	    *cost = COSTS_N_INSNS (1) + extra_cost->ldst.load;
10902	}
10903      else
10904	*cost = COSTS_N_INSNS (2);
10905
10906      if (flag_pic)
10907	{
10908	  *cost += COSTS_N_INSNS (1);
10909	  if (speed_p)
10910	    *cost += extra_cost->alu.arith;
10911	}
10912
10913      return true;
10914
10915    case CONST_FIXED:
10916      *cost = COSTS_N_INSNS (4);
10917      /* Fixme.  */
10918      return true;
10919
10920    case CONST_DOUBLE:
10921      if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10922	  && (mode == SFmode || !TARGET_VFP_SINGLE))
10923	{
10924	  if (vfp3_const_double_rtx (x))
10925	    {
10926	      *cost = COSTS_N_INSNS (1);
10927	      if (speed_p)
10928		*cost += extra_cost->fp[mode == DFmode].fpconst;
10929	      return true;
10930	    }
10931
10932	  if (speed_p)
10933	    {
10934	      *cost = COSTS_N_INSNS (1);
10935	      if (mode == DFmode)
10936		*cost += extra_cost->ldst.loadd;
10937	      else
10938		*cost += extra_cost->ldst.loadf;
10939	    }
10940	  else
10941	    *cost = COSTS_N_INSNS (2 + (mode == DFmode));
10942
10943	  return true;
10944	}
10945      *cost = COSTS_N_INSNS (4);
10946      return true;
10947
10948    case CONST_VECTOR:
10949      /* Fixme.  */
10950      if (TARGET_NEON
10951	  && TARGET_HARD_FLOAT
10952	  && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
10953	  && neon_immediate_valid_for_move (x, mode, NULL, NULL))
10954	*cost = COSTS_N_INSNS (1);
10955      else
10956	*cost = COSTS_N_INSNS (4);
10957      return true;
10958
10959    case HIGH:
10960    case LO_SUM:
10961      *cost = COSTS_N_INSNS (1);
10962      /* When optimizing for size, we prefer constant pool entries to
10963	 MOVW/MOVT pairs, so bump the cost of these slightly.  */
10964      if (!speed_p)
10965	*cost += 1;
10966      return true;
10967
10968    case CLZ:
10969      *cost = COSTS_N_INSNS (1);
10970      if (speed_p)
10971	*cost += extra_cost->alu.clz;
10972      return false;
10973
10974    case SMIN:
10975      if (XEXP (x, 1) == const0_rtx)
10976	{
10977	  *cost = COSTS_N_INSNS (1);
10978	  if (speed_p)
10979	    *cost += extra_cost->alu.log_shift;
10980	  *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10981	  return true;
10982	}
10983      /* Fall through.  */
10984    case SMAX:
10985    case UMIN:
10986    case UMAX:
10987      *cost = COSTS_N_INSNS (2);
10988      return false;
10989
10990    case TRUNCATE:
10991      if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10992	  && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10993	  && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
10994	  && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10995	  && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
10996	       && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
10997	      || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
10998		  && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
10999		      == ZERO_EXTEND))))
11000	{
11001	  *cost = COSTS_N_INSNS (1);
11002	  if (speed_p)
11003	    *cost += extra_cost->mult[1].extend;
11004	  *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), ZERO_EXTEND, 0,
11005			      speed_p)
11006		    + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), ZERO_EXTEND,
11007				0, speed_p));
11008	  return true;
11009	}
11010      *cost = LIBCALL_COST (1);
11011      return false;
11012
11013    case UNSPEC:
11014      return arm_unspec_cost (x, outer_code, speed_p, cost);
11015
11016    case PC:
11017      /* Reading the PC is like reading any other register.  Writing it
11018	 is more expensive, but we take that into account elsewhere.  */
11019      *cost = 0;
11020      return true;
11021
11022    case ZERO_EXTRACT:
11023      /* TODO: Simple zero_extract of bottom bits using AND.  */
11024      /* Fall through.  */
11025    case SIGN_EXTRACT:
11026      if (arm_arch6
11027	  && mode == SImode
11028	  && CONST_INT_P (XEXP (x, 1))
11029	  && CONST_INT_P (XEXP (x, 2)))
11030	{
11031	  *cost = COSTS_N_INSNS (1);
11032	  if (speed_p)
11033	    *cost += extra_cost->alu.bfx;
11034	  *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
11035	  return true;
11036	}
11037      /* Without UBFX/SBFX, need to resort to shift operations.  */
11038      *cost = COSTS_N_INSNS (2);
11039      if (speed_p)
11040	*cost += 2 * extra_cost->alu.shift;
11041      *cost += rtx_cost (XEXP (x, 0), ASHIFT, 0, speed_p);
11042      return true;
11043
11044    case FLOAT_EXTEND:
11045      if (TARGET_HARD_FLOAT)
11046	{
11047	  *cost = COSTS_N_INSNS (1);
11048	  if (speed_p)
11049	    *cost += extra_cost->fp[mode == DFmode].widen;
11050	  if (!TARGET_FPU_ARMV8
11051	      && GET_MODE (XEXP (x, 0)) == HFmode)
11052	    {
11053	      /* Pre v8, widening HF->DF is a two-step process, first
11054	         widening to SFmode.  */
11055	      *cost += COSTS_N_INSNS (1);
11056	      if (speed_p)
11057		*cost += extra_cost->fp[0].widen;
11058	    }
11059	  *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
11060	  return true;
11061	}
11062
11063      *cost = LIBCALL_COST (1);
11064      return false;
11065
11066    case FLOAT_TRUNCATE:
11067      if (TARGET_HARD_FLOAT)
11068	{
11069	  *cost = COSTS_N_INSNS (1);
11070	  if (speed_p)
11071	    *cost += extra_cost->fp[mode == DFmode].narrow;
11072	  *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
11073	  return true;
11074	  /* Vector modes?  */
11075	}
11076      *cost = LIBCALL_COST (1);
11077      return false;
11078
11079    case FMA:
11080      if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA)
11081        {
11082          rtx op0 = XEXP (x, 0);
11083          rtx op1 = XEXP (x, 1);
11084          rtx op2 = XEXP (x, 2);
11085
11086          *cost = COSTS_N_INSNS (1);
11087
11088          /* vfms or vfnma.  */
11089          if (GET_CODE (op0) == NEG)
11090            op0 = XEXP (op0, 0);
11091
11092          /* vfnms or vfnma.  */
11093          if (GET_CODE (op2) == NEG)
11094            op2 = XEXP (op2, 0);
11095
11096          *cost += rtx_cost (op0, FMA, 0, speed_p);
11097          *cost += rtx_cost (op1, FMA, 1, speed_p);
11098          *cost += rtx_cost (op2, FMA, 2, speed_p);
11099
11100          if (speed_p)
11101            *cost += extra_cost->fp[mode ==DFmode].fma;
11102
11103          return true;
11104        }
11105
11106      *cost = LIBCALL_COST (3);
11107      return false;
11108
11109    case FIX:
11110    case UNSIGNED_FIX:
11111      if (TARGET_HARD_FLOAT)
11112	{
11113	  if (GET_MODE_CLASS (mode) == MODE_INT)
11114	    {
11115	      *cost = COSTS_N_INSNS (1);
11116	      if (speed_p)
11117		*cost += extra_cost->fp[GET_MODE (XEXP (x, 0)) == DFmode].toint;
11118	      /* Strip of the 'cost' of rounding towards zero.  */
11119	      if (GET_CODE (XEXP (x, 0)) == FIX)
11120		*cost += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed_p);
11121	      else
11122		*cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
11123	      /* ??? Increase the cost to deal with transferring from
11124		 FP -> CORE registers?  */
11125	      return true;
11126	    }
11127	  else if (GET_MODE_CLASS (mode) == MODE_FLOAT
11128		   && TARGET_FPU_ARMV8)
11129	    {
11130	      *cost = COSTS_N_INSNS (1);
11131	      if (speed_p)
11132		*cost += extra_cost->fp[mode == DFmode].roundint;
11133	      return false;
11134	    }
11135	  /* Vector costs? */
11136	}
11137      *cost = LIBCALL_COST (1);
11138      return false;
11139
11140    case FLOAT:
11141    case UNSIGNED_FLOAT:
11142      if (TARGET_HARD_FLOAT)
11143	{
11144	  /* ??? Increase the cost to deal with transferring from CORE
11145	     -> FP registers?  */
11146	  *cost = COSTS_N_INSNS (1);
11147	  if (speed_p)
11148	    *cost += extra_cost->fp[mode == DFmode].fromint;
11149	  return false;
11150	}
11151      *cost = LIBCALL_COST (1);
11152      return false;
11153
11154    case CALL:
11155      *cost = COSTS_N_INSNS (1);
11156      return true;
11157
11158    case ASM_OPERANDS:
11159      {
11160      /* Just a guess.  Guess number of instructions in the asm
11161         plus one insn per input.  Always a minimum of COSTS_N_INSNS (1)
11162         though (see PR60663).  */
11163        int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
11164        int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
11165
11166        *cost = COSTS_N_INSNS (asm_length + num_operands);
11167        return true;
11168      }
11169    default:
11170      if (mode != VOIDmode)
11171	*cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
11172      else
11173	*cost = COSTS_N_INSNS (4); /* Who knows?  */
11174      return false;
11175    }
11176}
11177
11178#undef HANDLE_NARROW_SHIFT_ARITH
11179
11180/* RTX costs when optimizing for size.  */
11181static bool
11182arm_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
11183	       int *total, bool speed)
11184{
11185  bool result;
11186
11187  if (TARGET_OLD_RTX_COSTS
11188      || (!current_tune->insn_extra_cost && !TARGET_NEW_GENERIC_COSTS))
11189    {
11190      /* Old way.  (Deprecated.)  */
11191      if (!speed)
11192	result = arm_size_rtx_costs (x, (enum rtx_code) code,
11193				     (enum rtx_code) outer_code, total);
11194      else
11195	result = current_tune->rtx_costs (x,  (enum rtx_code) code,
11196					  (enum rtx_code) outer_code, total,
11197					  speed);
11198    }
11199  else
11200    {
11201    /* New way.  */
11202      if (current_tune->insn_extra_cost)
11203        result =  arm_new_rtx_costs (x, (enum rtx_code) code,
11204				     (enum rtx_code) outer_code,
11205				     current_tune->insn_extra_cost,
11206				     total, speed);
11207    /* TARGET_NEW_GENERIC_COSTS && !TARGET_OLD_RTX_COSTS
11208       && current_tune->insn_extra_cost != NULL  */
11209      else
11210        result =  arm_new_rtx_costs (x, (enum rtx_code) code,
11211				    (enum rtx_code) outer_code,
11212				    &generic_extra_costs, total, speed);
11213    }
11214
11215  if (dump_file && (dump_flags & TDF_DETAILS))
11216    {
11217      print_rtl_single (dump_file, x);
11218      fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
11219	       *total, result ? "final" : "partial");
11220    }
11221  return result;
11222}
11223
11224/* RTX costs for cores with a slow MUL implementation.  Thumb-2 is not
11225   supported on any "slowmul" cores, so it can be ignored.  */
11226
11227static bool
11228arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11229		       int *total, bool speed)
11230{
11231  machine_mode mode = GET_MODE (x);
11232
11233  if (TARGET_THUMB)
11234    {
11235      *total = thumb1_rtx_costs (x, code, outer_code);
11236      return true;
11237    }
11238
11239  switch (code)
11240    {
11241    case MULT:
11242      if (GET_MODE_CLASS (mode) == MODE_FLOAT
11243	  || mode == DImode)
11244	{
11245	  *total = COSTS_N_INSNS (20);
11246	  return false;
11247	}
11248
11249      if (CONST_INT_P (XEXP (x, 1)))
11250	{
11251	  unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
11252				      & (unsigned HOST_WIDE_INT) 0xffffffff);
11253	  int cost, const_ok = const_ok_for_arm (i);
11254	  int j, booth_unit_size;
11255
11256	  /* Tune as appropriate.  */
11257	  cost = const_ok ? 4 : 8;
11258	  booth_unit_size = 2;
11259	  for (j = 0; i && j < 32; j += booth_unit_size)
11260	    {
11261	      i >>= booth_unit_size;
11262	      cost++;
11263	    }
11264
11265	  *total = COSTS_N_INSNS (cost);
11266	  *total += rtx_cost (XEXP (x, 0), code, 0, speed);
11267	  return true;
11268	}
11269
11270      *total = COSTS_N_INSNS (20);
11271      return false;
11272
11273    default:
11274      return arm_rtx_costs_1 (x, outer_code, total, speed);;
11275    }
11276}
11277
11278
11279/* RTX cost for cores with a fast multiply unit (M variants).  */
11280
11281static bool
11282arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11283		       int *total, bool speed)
11284{
11285  machine_mode mode = GET_MODE (x);
11286
11287  if (TARGET_THUMB1)
11288    {
11289      *total = thumb1_rtx_costs (x, code, outer_code);
11290      return true;
11291    }
11292
11293  /* ??? should thumb2 use different costs?  */
11294  switch (code)
11295    {
11296    case MULT:
11297      /* There is no point basing this on the tuning, since it is always the
11298	 fast variant if it exists at all.  */
11299      if (mode == DImode
11300	  && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11301	  && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11302	      || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11303	{
11304	  *total = COSTS_N_INSNS(2);
11305	  return false;
11306	}
11307
11308
11309      if (mode == DImode)
11310	{
11311	  *total = COSTS_N_INSNS (5);
11312	  return false;
11313	}
11314
11315      if (CONST_INT_P (XEXP (x, 1)))
11316	{
11317	  unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
11318				      & (unsigned HOST_WIDE_INT) 0xffffffff);
11319	  int cost, const_ok = const_ok_for_arm (i);
11320	  int j, booth_unit_size;
11321
11322	  /* Tune as appropriate.  */
11323	  cost = const_ok ? 4 : 8;
11324	  booth_unit_size = 8;
11325	  for (j = 0; i && j < 32; j += booth_unit_size)
11326	    {
11327	      i >>= booth_unit_size;
11328	      cost++;
11329	    }
11330
11331	  *total = COSTS_N_INSNS(cost);
11332	  return false;
11333	}
11334
11335      if (mode == SImode)
11336	{
11337	  *total = COSTS_N_INSNS (4);
11338	  return false;
11339	}
11340
11341      if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11342	{
11343	  if (TARGET_HARD_FLOAT
11344	      && (mode == SFmode
11345		  || (mode == DFmode && !TARGET_VFP_SINGLE)))
11346	    {
11347	      *total = COSTS_N_INSNS (1);
11348	      return false;
11349	    }
11350	}
11351
11352      /* Requires a lib call */
11353      *total = COSTS_N_INSNS (20);
11354      return false;
11355
11356    default:
11357      return arm_rtx_costs_1 (x, outer_code, total, speed);
11358    }
11359}
11360
11361
11362/* RTX cost for XScale CPUs.  Thumb-2 is not supported on any xscale cores,
11363   so it can be ignored.  */
11364
11365static bool
11366arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11367		      int *total, bool speed)
11368{
11369  machine_mode mode = GET_MODE (x);
11370
11371  if (TARGET_THUMB)
11372    {
11373      *total = thumb1_rtx_costs (x, code, outer_code);
11374      return true;
11375    }
11376
11377  switch (code)
11378    {
11379    case COMPARE:
11380      if (GET_CODE (XEXP (x, 0)) != MULT)
11381	return arm_rtx_costs_1 (x, outer_code, total, speed);
11382
11383      /* A COMPARE of a MULT is slow on XScale; the muls instruction
11384	 will stall until the multiplication is complete.  */
11385      *total = COSTS_N_INSNS (3);
11386      return false;
11387
11388    case MULT:
11389      /* There is no point basing this on the tuning, since it is always the
11390	 fast variant if it exists at all.  */
11391      if (mode == DImode
11392	  && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11393	  && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11394	      || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11395	{
11396	  *total = COSTS_N_INSNS (2);
11397	  return false;
11398	}
11399
11400
11401      if (mode == DImode)
11402	{
11403	  *total = COSTS_N_INSNS (5);
11404	  return false;
11405	}
11406
11407      if (CONST_INT_P (XEXP (x, 1)))
11408	{
11409	  /* If operand 1 is a constant we can more accurately
11410	     calculate the cost of the multiply.  The multiplier can
11411	     retire 15 bits on the first cycle and a further 12 on the
11412	     second.  We do, of course, have to load the constant into
11413	     a register first.  */
11414	  unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
11415	  /* There's a general overhead of one cycle.  */
11416	  int cost = 1;
11417	  unsigned HOST_WIDE_INT masked_const;
11418
11419	  if (i & 0x80000000)
11420	    i = ~i;
11421
11422	  i &= (unsigned HOST_WIDE_INT) 0xffffffff;
11423
11424	  masked_const = i & 0xffff8000;
11425	  if (masked_const != 0)
11426	    {
11427	      cost++;
11428	      masked_const = i & 0xf8000000;
11429	      if (masked_const != 0)
11430		cost++;
11431	    }
11432	  *total = COSTS_N_INSNS (cost);
11433	  return false;
11434	}
11435
11436      if (mode == SImode)
11437	{
11438	  *total = COSTS_N_INSNS (3);
11439	  return false;
11440	}
11441
11442      /* Requires a lib call */
11443      *total = COSTS_N_INSNS (20);
11444      return false;
11445
11446    default:
11447      return arm_rtx_costs_1 (x, outer_code, total, speed);
11448    }
11449}
11450
11451
11452/* RTX costs for 9e (and later) cores.  */
11453
11454static bool
11455arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11456		  int *total, bool speed)
11457{
11458  machine_mode mode = GET_MODE (x);
11459
11460  if (TARGET_THUMB1)
11461    {
11462      switch (code)
11463	{
11464	case MULT:
11465	  /* Small multiply: 32 cycles for an integer multiply inst.  */
11466	  if (arm_arch6m && arm_m_profile_small_mul)
11467	    *total = COSTS_N_INSNS (32);
11468	  else
11469	    *total = COSTS_N_INSNS (3);
11470	  return true;
11471
11472	default:
11473	  *total = thumb1_rtx_costs (x, code, outer_code);
11474	  return true;
11475	}
11476    }
11477
11478  switch (code)
11479    {
11480    case MULT:
11481      /* There is no point basing this on the tuning, since it is always the
11482	 fast variant if it exists at all.  */
11483      if (mode == DImode
11484	  && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11485	  && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11486	      || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11487	{
11488	  *total = COSTS_N_INSNS (2);
11489	  return false;
11490	}
11491
11492
11493      if (mode == DImode)
11494	{
11495	  *total = COSTS_N_INSNS (5);
11496	  return false;
11497	}
11498
11499      if (mode == SImode)
11500	{
11501	  *total = COSTS_N_INSNS (2);
11502	  return false;
11503	}
11504
11505      if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11506	{
11507	  if (TARGET_HARD_FLOAT
11508	      && (mode == SFmode
11509		  || (mode == DFmode && !TARGET_VFP_SINGLE)))
11510	    {
11511	      *total = COSTS_N_INSNS (1);
11512	      return false;
11513	    }
11514	}
11515
11516      *total = COSTS_N_INSNS (20);
11517      return false;
11518
11519    default:
11520      return arm_rtx_costs_1 (x, outer_code, total, speed);
11521    }
11522}
11523/* All address computations that can be done are free, but rtx cost returns
11524   the same for practically all of them.  So we weight the different types
11525   of address here in the order (most pref first):
11526   PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL.  */
11527static inline int
11528arm_arm_address_cost (rtx x)
11529{
11530  enum rtx_code c  = GET_CODE (x);
11531
11532  if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
11533    return 0;
11534  if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
11535    return 10;
11536
11537  if (c == PLUS)
11538    {
11539      if (CONST_INT_P (XEXP (x, 1)))
11540	return 2;
11541
11542      if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
11543	return 3;
11544
11545      return 4;
11546    }
11547
11548  return 6;
11549}
11550
11551static inline int
11552arm_thumb_address_cost (rtx x)
11553{
11554  enum rtx_code c  = GET_CODE (x);
11555
11556  if (c == REG)
11557    return 1;
11558  if (c == PLUS
11559      && REG_P (XEXP (x, 0))
11560      && CONST_INT_P (XEXP (x, 1)))
11561    return 1;
11562
11563  return 2;
11564}
11565
11566static int
11567arm_address_cost (rtx x, machine_mode mode ATTRIBUTE_UNUSED,
11568		  addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
11569{
11570  return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
11571}
11572
11573/* Adjust cost hook for XScale.  */
11574static bool
11575xscale_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int * cost)
11576{
11577  /* Some true dependencies can have a higher cost depending
11578     on precisely how certain input operands are used.  */
11579  if (REG_NOTE_KIND(link) == 0
11580      && recog_memoized (insn) >= 0
11581      && recog_memoized (dep) >= 0)
11582    {
11583      int shift_opnum = get_attr_shift (insn);
11584      enum attr_type attr_type = get_attr_type (dep);
11585
11586      /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
11587	 operand for INSN.  If we have a shifted input operand and the
11588	 instruction we depend on is another ALU instruction, then we may
11589	 have to account for an additional stall.  */
11590      if (shift_opnum != 0
11591	  && (attr_type == TYPE_ALU_SHIFT_IMM
11592	      || attr_type == TYPE_ALUS_SHIFT_IMM
11593	      || attr_type == TYPE_LOGIC_SHIFT_IMM
11594	      || attr_type == TYPE_LOGICS_SHIFT_IMM
11595	      || attr_type == TYPE_ALU_SHIFT_REG
11596	      || attr_type == TYPE_ALUS_SHIFT_REG
11597	      || attr_type == TYPE_LOGIC_SHIFT_REG
11598	      || attr_type == TYPE_LOGICS_SHIFT_REG
11599	      || attr_type == TYPE_MOV_SHIFT
11600	      || attr_type == TYPE_MVN_SHIFT
11601	      || attr_type == TYPE_MOV_SHIFT_REG
11602	      || attr_type == TYPE_MVN_SHIFT_REG))
11603	{
11604	  rtx shifted_operand;
11605	  int opno;
11606
11607	  /* Get the shifted operand.  */
11608	  extract_insn (insn);
11609	  shifted_operand = recog_data.operand[shift_opnum];
11610
11611	  /* Iterate over all the operands in DEP.  If we write an operand
11612	     that overlaps with SHIFTED_OPERAND, then we have increase the
11613	     cost of this dependency.  */
11614	  extract_insn (dep);
11615	  preprocess_constraints (dep);
11616	  for (opno = 0; opno < recog_data.n_operands; opno++)
11617	    {
11618	      /* We can ignore strict inputs.  */
11619	      if (recog_data.operand_type[opno] == OP_IN)
11620		continue;
11621
11622	      if (reg_overlap_mentioned_p (recog_data.operand[opno],
11623					   shifted_operand))
11624		{
11625		  *cost = 2;
11626		  return false;
11627		}
11628	    }
11629	}
11630    }
11631  return true;
11632}
11633
11634/* Adjust cost hook for Cortex A9.  */
11635static bool
11636cortex_a9_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int * cost)
11637{
11638  switch (REG_NOTE_KIND (link))
11639    {
11640    case REG_DEP_ANTI:
11641      *cost = 0;
11642      return false;
11643
11644    case REG_DEP_TRUE:
11645    case REG_DEP_OUTPUT:
11646	if (recog_memoized (insn) >= 0
11647	    && recog_memoized (dep) >= 0)
11648	  {
11649	    if (GET_CODE (PATTERN (insn)) == SET)
11650	      {
11651		if (GET_MODE_CLASS
11652		    (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
11653		  || GET_MODE_CLASS
11654		    (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
11655		  {
11656		    enum attr_type attr_type_insn = get_attr_type (insn);
11657		    enum attr_type attr_type_dep = get_attr_type (dep);
11658
11659		    /* By default all dependencies of the form
11660		       s0 = s0 <op> s1
11661		       s0 = s0 <op> s2
11662		       have an extra latency of 1 cycle because
11663		       of the input and output dependency in this
11664		       case. However this gets modeled as an true
11665		       dependency and hence all these checks.  */
11666		    if (REG_P (SET_DEST (PATTERN (insn)))
11667			&& REG_P (SET_DEST (PATTERN (dep)))
11668			&& reg_overlap_mentioned_p (SET_DEST (PATTERN (insn)),
11669						    SET_DEST (PATTERN (dep))))
11670		      {
11671			/* FMACS is a special case where the dependent
11672			   instruction can be issued 3 cycles before
11673			   the normal latency in case of an output
11674			   dependency.  */
11675			if ((attr_type_insn == TYPE_FMACS
11676			     || attr_type_insn == TYPE_FMACD)
11677			    && (attr_type_dep == TYPE_FMACS
11678				|| attr_type_dep == TYPE_FMACD))
11679			  {
11680			    if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11681			      *cost = insn_default_latency (dep) - 3;
11682			    else
11683			      *cost = insn_default_latency (dep);
11684			    return false;
11685			  }
11686			else
11687			  {
11688			    if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11689			      *cost = insn_default_latency (dep) + 1;
11690			    else
11691			      *cost = insn_default_latency (dep);
11692			  }
11693			return false;
11694		      }
11695		  }
11696	      }
11697	  }
11698	break;
11699
11700    default:
11701      gcc_unreachable ();
11702    }
11703
11704  return true;
11705}
11706
11707/* Adjust cost hook for FA726TE.  */
11708static bool
11709fa726te_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int * cost)
11710{
11711  /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11712     have penalty of 3.  */
11713  if (REG_NOTE_KIND (link) == REG_DEP_TRUE
11714      && recog_memoized (insn) >= 0
11715      && recog_memoized (dep) >= 0
11716      && get_attr_conds (dep) == CONDS_SET)
11717    {
11718      /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency.  */
11719      if (get_attr_conds (insn) == CONDS_USE
11720          && get_attr_type (insn) != TYPE_BRANCH)
11721        {
11722          *cost = 3;
11723          return false;
11724        }
11725
11726      if (GET_CODE (PATTERN (insn)) == COND_EXEC
11727          || get_attr_conds (insn) == CONDS_USE)
11728        {
11729          *cost = 0;
11730          return false;
11731        }
11732    }
11733
11734  return true;
11735}
11736
11737/* Implement TARGET_REGISTER_MOVE_COST.
11738
11739   Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11740   it is typically more expensive than a single memory access.  We set
11741   the cost to less than two memory accesses so that floating
11742   point to integer conversion does not go through memory.  */
11743
11744int
11745arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
11746			reg_class_t from, reg_class_t to)
11747{
11748  if (TARGET_32BIT)
11749    {
11750      if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
11751	  || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
11752	return 15;
11753      else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
11754	       || (from != IWMMXT_REGS && to == IWMMXT_REGS))
11755	return 4;
11756      else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
11757	return 20;
11758      else
11759	return 2;
11760    }
11761  else
11762    {
11763      if (from == HI_REGS || to == HI_REGS)
11764	return 4;
11765      else
11766	return 2;
11767    }
11768}
11769
11770/* Implement TARGET_MEMORY_MOVE_COST.  */
11771
11772int
11773arm_memory_move_cost (machine_mode mode, reg_class_t rclass,
11774		      bool in ATTRIBUTE_UNUSED)
11775{
11776  if (TARGET_32BIT)
11777    return 10;
11778  else
11779    {
11780      if (GET_MODE_SIZE (mode) < 4)
11781	return 8;
11782      else
11783	return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
11784    }
11785}
11786
11787/* Vectorizer cost model implementation.  */
11788
11789/* Implement targetm.vectorize.builtin_vectorization_cost.  */
11790static int
11791arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
11792				tree vectype,
11793				int misalign ATTRIBUTE_UNUSED)
11794{
11795  unsigned elements;
11796
11797  switch (type_of_cost)
11798    {
11799      case scalar_stmt:
11800        return current_tune->vec_costs->scalar_stmt_cost;
11801
11802      case scalar_load:
11803        return current_tune->vec_costs->scalar_load_cost;
11804
11805      case scalar_store:
11806        return current_tune->vec_costs->scalar_store_cost;
11807
11808      case vector_stmt:
11809        return current_tune->vec_costs->vec_stmt_cost;
11810
11811      case vector_load:
11812        return current_tune->vec_costs->vec_align_load_cost;
11813
11814      case vector_store:
11815        return current_tune->vec_costs->vec_store_cost;
11816
11817      case vec_to_scalar:
11818        return current_tune->vec_costs->vec_to_scalar_cost;
11819
11820      case scalar_to_vec:
11821        return current_tune->vec_costs->scalar_to_vec_cost;
11822
11823      case unaligned_load:
11824        return current_tune->vec_costs->vec_unalign_load_cost;
11825
11826      case unaligned_store:
11827        return current_tune->vec_costs->vec_unalign_store_cost;
11828
11829      case cond_branch_taken:
11830        return current_tune->vec_costs->cond_taken_branch_cost;
11831
11832      case cond_branch_not_taken:
11833        return current_tune->vec_costs->cond_not_taken_branch_cost;
11834
11835      case vec_perm:
11836      case vec_promote_demote:
11837        return current_tune->vec_costs->vec_stmt_cost;
11838
11839      case vec_construct:
11840	elements = TYPE_VECTOR_SUBPARTS (vectype);
11841	return elements / 2 + 1;
11842
11843      default:
11844        gcc_unreachable ();
11845    }
11846}
11847
11848/* Implement targetm.vectorize.add_stmt_cost.  */
11849
11850static unsigned
11851arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
11852		   struct _stmt_vec_info *stmt_info, int misalign,
11853		   enum vect_cost_model_location where)
11854{
11855  unsigned *cost = (unsigned *) data;
11856  unsigned retval = 0;
11857
11858  if (flag_vect_cost_model)
11859    {
11860      tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
11861      int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign);
11862
11863      /* Statements in an inner loop relative to the loop being
11864	 vectorized are weighted more heavily.  The value here is
11865	 arbitrary and could potentially be improved with analysis.  */
11866      if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
11867	count *= 50;  /* FIXME.  */
11868
11869      retval = (unsigned) (count * stmt_cost);
11870      cost[where] += retval;
11871    }
11872
11873  return retval;
11874}
11875
11876/* Return true if and only if this insn can dual-issue only as older.  */
11877static bool
11878cortexa7_older_only (rtx_insn *insn)
11879{
11880  if (recog_memoized (insn) < 0)
11881    return false;
11882
11883  switch (get_attr_type (insn))
11884    {
11885    case TYPE_ALU_DSP_REG:
11886    case TYPE_ALU_SREG:
11887    case TYPE_ALUS_SREG:
11888    case TYPE_LOGIC_REG:
11889    case TYPE_LOGICS_REG:
11890    case TYPE_ADC_REG:
11891    case TYPE_ADCS_REG:
11892    case TYPE_ADR:
11893    case TYPE_BFM:
11894    case TYPE_REV:
11895    case TYPE_MVN_REG:
11896    case TYPE_SHIFT_IMM:
11897    case TYPE_SHIFT_REG:
11898    case TYPE_LOAD_BYTE:
11899    case TYPE_LOAD1:
11900    case TYPE_STORE1:
11901    case TYPE_FFARITHS:
11902    case TYPE_FADDS:
11903    case TYPE_FFARITHD:
11904    case TYPE_FADDD:
11905    case TYPE_FMOV:
11906    case TYPE_F_CVT:
11907    case TYPE_FCMPS:
11908    case TYPE_FCMPD:
11909    case TYPE_FCONSTS:
11910    case TYPE_FCONSTD:
11911    case TYPE_FMULS:
11912    case TYPE_FMACS:
11913    case TYPE_FMULD:
11914    case TYPE_FMACD:
11915    case TYPE_FDIVS:
11916    case TYPE_FDIVD:
11917    case TYPE_F_MRC:
11918    case TYPE_F_MRRC:
11919    case TYPE_F_FLAG:
11920    case TYPE_F_LOADS:
11921    case TYPE_F_STORES:
11922      return true;
11923    default:
11924      return false;
11925    }
11926}
11927
11928/* Return true if and only if this insn can dual-issue as younger.  */
11929static bool
11930cortexa7_younger (FILE *file, int verbose, rtx_insn *insn)
11931{
11932  if (recog_memoized (insn) < 0)
11933    {
11934      if (verbose > 5)
11935        fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
11936      return false;
11937    }
11938
11939  switch (get_attr_type (insn))
11940    {
11941    case TYPE_ALU_IMM:
11942    case TYPE_ALUS_IMM:
11943    case TYPE_LOGIC_IMM:
11944    case TYPE_LOGICS_IMM:
11945    case TYPE_EXTEND:
11946    case TYPE_MVN_IMM:
11947    case TYPE_MOV_IMM:
11948    case TYPE_MOV_REG:
11949    case TYPE_MOV_SHIFT:
11950    case TYPE_MOV_SHIFT_REG:
11951    case TYPE_BRANCH:
11952    case TYPE_CALL:
11953      return true;
11954    default:
11955      return false;
11956    }
11957}
11958
11959
11960/* Look for an instruction that can dual issue only as an older
11961   instruction, and move it in front of any instructions that can
11962   dual-issue as younger, while preserving the relative order of all
11963   other instructions in the ready list.  This is a hueuristic to help
11964   dual-issue in later cycles, by postponing issue of more flexible
11965   instructions.  This heuristic may affect dual issue opportunities
11966   in the current cycle.  */
11967static void
11968cortexa7_sched_reorder (FILE *file, int verbose, rtx_insn **ready,
11969			int *n_readyp, int clock)
11970{
11971  int i;
11972  int first_older_only = -1, first_younger = -1;
11973
11974  if (verbose > 5)
11975    fprintf (file,
11976             ";; sched_reorder for cycle %d with %d insns in ready list\n",
11977             clock,
11978             *n_readyp);
11979
11980  /* Traverse the ready list from the head (the instruction to issue
11981     first), and looking for the first instruction that can issue as
11982     younger and the first instruction that can dual-issue only as
11983     older.  */
11984  for (i = *n_readyp - 1; i >= 0; i--)
11985    {
11986      rtx_insn *insn = ready[i];
11987      if (cortexa7_older_only (insn))
11988        {
11989          first_older_only = i;
11990          if (verbose > 5)
11991            fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
11992          break;
11993        }
11994      else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
11995        first_younger = i;
11996    }
11997
11998  /* Nothing to reorder because either no younger insn found or insn
11999     that can dual-issue only as older appears before any insn that
12000     can dual-issue as younger.  */
12001  if (first_younger == -1)
12002    {
12003      if (verbose > 5)
12004        fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
12005      return;
12006    }
12007
12008  /* Nothing to reorder because no older-only insn in the ready list.  */
12009  if (first_older_only == -1)
12010    {
12011      if (verbose > 5)
12012        fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
12013      return;
12014    }
12015
12016  /* Move first_older_only insn before first_younger.  */
12017  if (verbose > 5)
12018    fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
12019             INSN_UID(ready [first_older_only]),
12020             INSN_UID(ready [first_younger]));
12021  rtx_insn *first_older_only_insn = ready [first_older_only];
12022  for (i = first_older_only; i < first_younger; i++)
12023    {
12024      ready[i] = ready[i+1];
12025    }
12026
12027  ready[i] = first_older_only_insn;
12028  return;
12029}
12030
12031/* Implement TARGET_SCHED_REORDER. */
12032static int
12033arm_sched_reorder (FILE *file, int verbose, rtx_insn **ready, int *n_readyp,
12034                   int clock)
12035{
12036  switch (arm_tune)
12037    {
12038    case cortexa7:
12039      cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
12040      break;
12041    default:
12042      /* Do nothing for other cores.  */
12043      break;
12044    }
12045
12046  return arm_issue_rate ();
12047}
12048
12049/* This function implements the target macro TARGET_SCHED_ADJUST_COST.
12050   It corrects the value of COST based on the relationship between
12051   INSN and DEP through the dependence LINK.  It returns the new
12052   value. There is a per-core adjust_cost hook to adjust scheduler costs
12053   and the per-core hook can choose to completely override the generic
12054   adjust_cost function. Only put bits of code into arm_adjust_cost that
12055   are common across all cores.  */
12056static int
12057arm_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int cost)
12058{
12059  rtx i_pat, d_pat;
12060
12061 /* When generating Thumb-1 code, we want to place flag-setting operations
12062    close to a conditional branch which depends on them, so that we can
12063    omit the comparison. */
12064  if (TARGET_THUMB1
12065      && REG_NOTE_KIND (link) == 0
12066      && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
12067      && recog_memoized (dep) >= 0
12068      && get_attr_conds (dep) == CONDS_SET)
12069    return 0;
12070
12071  if (current_tune->sched_adjust_cost != NULL)
12072    {
12073      if (!current_tune->sched_adjust_cost (insn, link, dep, &cost))
12074	return cost;
12075    }
12076
12077  /* XXX Is this strictly true?  */
12078  if (REG_NOTE_KIND (link) == REG_DEP_ANTI
12079      || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
12080    return 0;
12081
12082  /* Call insns don't incur a stall, even if they follow a load.  */
12083  if (REG_NOTE_KIND (link) == 0
12084      && CALL_P (insn))
12085    return 1;
12086
12087  if ((i_pat = single_set (insn)) != NULL
12088      && MEM_P (SET_SRC (i_pat))
12089      && (d_pat = single_set (dep)) != NULL
12090      && MEM_P (SET_DEST (d_pat)))
12091    {
12092      rtx src_mem = XEXP (SET_SRC (i_pat), 0);
12093      /* This is a load after a store, there is no conflict if the load reads
12094	 from a cached area.  Assume that loads from the stack, and from the
12095	 constant pool are cached, and that others will miss.  This is a
12096	 hack.  */
12097
12098      if ((GET_CODE (src_mem) == SYMBOL_REF
12099	   && CONSTANT_POOL_ADDRESS_P (src_mem))
12100	  || reg_mentioned_p (stack_pointer_rtx, src_mem)
12101	  || reg_mentioned_p (frame_pointer_rtx, src_mem)
12102	  || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
12103	return 1;
12104    }
12105
12106  return cost;
12107}
12108
12109int
12110arm_max_conditional_execute (void)
12111{
12112  return max_insns_skipped;
12113}
12114
12115static int
12116arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
12117{
12118  if (TARGET_32BIT)
12119    return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
12120  else
12121    return (optimize > 0) ? 2 : 0;
12122}
12123
12124static int
12125arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
12126{
12127  return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
12128}
12129
12130/* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
12131   on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
12132   sequences of non-executed instructions in IT blocks probably take the same
12133   amount of time as executed instructions (and the IT instruction itself takes
12134   space in icache).  This function was experimentally determined to give good
12135   results on a popular embedded benchmark.  */
12136
12137static int
12138arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
12139{
12140  return (TARGET_32BIT && speed_p) ? 1
12141         : arm_default_branch_cost (speed_p, predictable_p);
12142}
12143
12144static int
12145arm_cortex_m7_branch_cost (bool speed_p, bool predictable_p)
12146{
12147  return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
12148}
12149
12150static bool fp_consts_inited = false;
12151
12152static REAL_VALUE_TYPE value_fp0;
12153
12154static void
12155init_fp_table (void)
12156{
12157  REAL_VALUE_TYPE r;
12158
12159  r = REAL_VALUE_ATOF ("0", DFmode);
12160  value_fp0 = r;
12161  fp_consts_inited = true;
12162}
12163
12164/* Return TRUE if rtx X is a valid immediate FP constant.  */
12165int
12166arm_const_double_rtx (rtx x)
12167{
12168  REAL_VALUE_TYPE r;
12169
12170  if (!fp_consts_inited)
12171    init_fp_table ();
12172
12173  REAL_VALUE_FROM_CONST_DOUBLE (r, x);
12174  if (REAL_VALUE_MINUS_ZERO (r))
12175    return 0;
12176
12177  if (REAL_VALUES_EQUAL (r, value_fp0))
12178    return 1;
12179
12180  return 0;
12181}
12182
12183/* VFPv3 has a fairly wide range of representable immediates, formed from
12184   "quarter-precision" floating-point values. These can be evaluated using this
12185   formula (with ^ for exponentiation):
12186
12187     -1^s * n * 2^-r
12188
12189   Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
12190   16 <= n <= 31 and 0 <= r <= 7.
12191
12192   These values are mapped onto an 8-bit integer ABCDEFGH s.t.
12193
12194     - A (most-significant) is the sign bit.
12195     - BCD are the exponent (encoded as r XOR 3).
12196     - EFGH are the mantissa (encoded as n - 16).
12197*/
12198
12199/* Return an integer index for a VFPv3 immediate operand X suitable for the
12200   fconst[sd] instruction, or -1 if X isn't suitable.  */
12201static int
12202vfp3_const_double_index (rtx x)
12203{
12204  REAL_VALUE_TYPE r, m;
12205  int sign, exponent;
12206  unsigned HOST_WIDE_INT mantissa, mant_hi;
12207  unsigned HOST_WIDE_INT mask;
12208  int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
12209  bool fail;
12210
12211  if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
12212    return -1;
12213
12214  REAL_VALUE_FROM_CONST_DOUBLE (r, x);
12215
12216  /* We can't represent these things, so detect them first.  */
12217  if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
12218    return -1;
12219
12220  /* Extract sign, exponent and mantissa.  */
12221  sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
12222  r = real_value_abs (&r);
12223  exponent = REAL_EXP (&r);
12224  /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
12225     highest (sign) bit, with a fixed binary point at bit point_pos.
12226     WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
12227     bits for the mantissa, this may fail (low bits would be lost).  */
12228  real_ldexp (&m, &r, point_pos - exponent);
12229  wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
12230  mantissa = w.elt (0);
12231  mant_hi = w.elt (1);
12232
12233  /* If there are bits set in the low part of the mantissa, we can't
12234     represent this value.  */
12235  if (mantissa != 0)
12236    return -1;
12237
12238  /* Now make it so that mantissa contains the most-significant bits, and move
12239     the point_pos to indicate that the least-significant bits have been
12240     discarded.  */
12241  point_pos -= HOST_BITS_PER_WIDE_INT;
12242  mantissa = mant_hi;
12243
12244  /* We can permit four significant bits of mantissa only, plus a high bit
12245     which is always 1.  */
12246  mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
12247  if ((mantissa & mask) != 0)
12248    return -1;
12249
12250  /* Now we know the mantissa is in range, chop off the unneeded bits.  */
12251  mantissa >>= point_pos - 5;
12252
12253  /* The mantissa may be zero. Disallow that case. (It's possible to load the
12254     floating-point immediate zero with Neon using an integer-zero load, but
12255     that case is handled elsewhere.)  */
12256  if (mantissa == 0)
12257    return -1;
12258
12259  gcc_assert (mantissa >= 16 && mantissa <= 31);
12260
12261  /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
12262     normalized significands are in the range [1, 2). (Our mantissa is shifted
12263     left 4 places at this point relative to normalized IEEE754 values).  GCC
12264     internally uses [0.5, 1) (see real.c), so the exponent returned from
12265     REAL_EXP must be altered.  */
12266  exponent = 5 - exponent;
12267
12268  if (exponent < 0 || exponent > 7)
12269    return -1;
12270
12271  /* Sign, mantissa and exponent are now in the correct form to plug into the
12272     formula described in the comment above.  */
12273  return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
12274}
12275
12276/* Return TRUE if rtx X is a valid immediate VFPv3 constant.  */
12277int
12278vfp3_const_double_rtx (rtx x)
12279{
12280  if (!TARGET_VFP3)
12281    return 0;
12282
12283  return vfp3_const_double_index (x) != -1;
12284}
12285
12286/* Recognize immediates which can be used in various Neon instructions. Legal
12287   immediates are described by the following table (for VMVN variants, the
12288   bitwise inverse of the constant shown is recognized. In either case, VMOV
12289   is output and the correct instruction to use for a given constant is chosen
12290   by the assembler). The constant shown is replicated across all elements of
12291   the destination vector.
12292
12293   insn elems variant constant (binary)
12294   ---- ----- ------- -----------------
12295   vmov  i32     0    00000000 00000000 00000000 abcdefgh
12296   vmov  i32     1    00000000 00000000 abcdefgh 00000000
12297   vmov  i32     2    00000000 abcdefgh 00000000 00000000
12298   vmov  i32     3    abcdefgh 00000000 00000000 00000000
12299   vmov  i16     4    00000000 abcdefgh
12300   vmov  i16     5    abcdefgh 00000000
12301   vmvn  i32     6    00000000 00000000 00000000 abcdefgh
12302   vmvn  i32     7    00000000 00000000 abcdefgh 00000000
12303   vmvn  i32     8    00000000 abcdefgh 00000000 00000000
12304   vmvn  i32     9    abcdefgh 00000000 00000000 00000000
12305   vmvn  i16    10    00000000 abcdefgh
12306   vmvn  i16    11    abcdefgh 00000000
12307   vmov  i32    12    00000000 00000000 abcdefgh 11111111
12308   vmvn  i32    13    00000000 00000000 abcdefgh 11111111
12309   vmov  i32    14    00000000 abcdefgh 11111111 11111111
12310   vmvn  i32    15    00000000 abcdefgh 11111111 11111111
12311   vmov   i8    16    abcdefgh
12312   vmov  i64    17    aaaaaaaa bbbbbbbb cccccccc dddddddd
12313                      eeeeeeee ffffffff gggggggg hhhhhhhh
12314   vmov  f32    18    aBbbbbbc defgh000 00000000 00000000
12315   vmov  f32    19    00000000 00000000 00000000 00000000
12316
12317   For case 18, B = !b. Representable values are exactly those accepted by
12318   vfp3_const_double_index, but are output as floating-point numbers rather
12319   than indices.
12320
12321   For case 19, we will change it to vmov.i32 when assembling.
12322
12323   Variants 0-5 (inclusive) may also be used as immediates for the second
12324   operand of VORR/VBIC instructions.
12325
12326   The INVERSE argument causes the bitwise inverse of the given operand to be
12327   recognized instead (used for recognizing legal immediates for the VAND/VORN
12328   pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
12329   *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
12330   output, rather than the real insns vbic/vorr).
12331
12332   INVERSE makes no difference to the recognition of float vectors.
12333
12334   The return value is the variant of immediate as shown in the above table, or
12335   -1 if the given value doesn't match any of the listed patterns.
12336*/
12337static int
12338neon_valid_immediate (rtx op, machine_mode mode, int inverse,
12339		      rtx *modconst, int *elementwidth)
12340{
12341#define CHECK(STRIDE, ELSIZE, CLASS, TEST)	\
12342  matches = 1;					\
12343  for (i = 0; i < idx; i += (STRIDE))		\
12344    if (!(TEST))				\
12345      matches = 0;				\
12346  if (matches)					\
12347    {						\
12348      immtype = (CLASS);			\
12349      elsize = (ELSIZE);			\
12350      break;					\
12351    }
12352
12353  unsigned int i, elsize = 0, idx = 0, n_elts;
12354  unsigned int innersize;
12355  unsigned char bytes[16];
12356  int immtype = -1, matches;
12357  unsigned int invmask = inverse ? 0xff : 0;
12358  bool vector = GET_CODE (op) == CONST_VECTOR;
12359
12360  if (vector)
12361    {
12362      n_elts = CONST_VECTOR_NUNITS (op);
12363      innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
12364    }
12365  else
12366    {
12367      n_elts = 1;
12368      if (mode == VOIDmode)
12369	mode = DImode;
12370      innersize = GET_MODE_SIZE (mode);
12371    }
12372
12373  /* Vectors of float constants.  */
12374  if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
12375    {
12376      rtx el0 = CONST_VECTOR_ELT (op, 0);
12377      REAL_VALUE_TYPE r0;
12378
12379      if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
12380        return -1;
12381
12382      REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
12383
12384      for (i = 1; i < n_elts; i++)
12385        {
12386          rtx elt = CONST_VECTOR_ELT (op, i);
12387          REAL_VALUE_TYPE re;
12388
12389          REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
12390
12391          if (!REAL_VALUES_EQUAL (r0, re))
12392            return -1;
12393        }
12394
12395      if (modconst)
12396        *modconst = CONST_VECTOR_ELT (op, 0);
12397
12398      if (elementwidth)
12399        *elementwidth = 0;
12400
12401      if (el0 == CONST0_RTX (GET_MODE (el0)))
12402	return 19;
12403      else
12404	return 18;
12405    }
12406
12407  /* Splat vector constant out into a byte vector.  */
12408  for (i = 0; i < n_elts; i++)
12409    {
12410      rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
12411      unsigned HOST_WIDE_INT elpart;
12412      unsigned int part, parts;
12413
12414      if (CONST_INT_P (el))
12415        {
12416          elpart = INTVAL (el);
12417          parts = 1;
12418        }
12419      else if (CONST_DOUBLE_P (el))
12420        {
12421          elpart = CONST_DOUBLE_LOW (el);
12422          parts = 2;
12423        }
12424      else
12425        gcc_unreachable ();
12426
12427      for (part = 0; part < parts; part++)
12428        {
12429          unsigned int byte;
12430          for (byte = 0; byte < innersize; byte++)
12431            {
12432              bytes[idx++] = (elpart & 0xff) ^ invmask;
12433              elpart >>= BITS_PER_UNIT;
12434            }
12435          if (CONST_DOUBLE_P (el))
12436            elpart = CONST_DOUBLE_HIGH (el);
12437        }
12438    }
12439
12440  /* Sanity check.  */
12441  gcc_assert (idx == GET_MODE_SIZE (mode));
12442
12443  do
12444    {
12445      CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
12446		       && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12447
12448      CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12449		       && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12450
12451      CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
12452		       && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12453
12454      CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
12455		       && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
12456
12457      CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
12458
12459      CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
12460
12461      CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
12462		       && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12463
12464      CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12465		       && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12466
12467      CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
12468		       && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12469
12470      CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
12471		       && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
12472
12473      CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
12474
12475      CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
12476
12477      CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12478			&& bytes[i + 2] == 0 && bytes[i + 3] == 0);
12479
12480      CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12481			&& bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12482
12483      CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
12484			&& bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12485
12486      CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
12487			&& bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12488
12489      CHECK (1, 8, 16, bytes[i] == bytes[0]);
12490
12491      CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
12492			&& bytes[i] == bytes[(i + 8) % idx]);
12493    }
12494  while (0);
12495
12496  if (immtype == -1)
12497    return -1;
12498
12499  if (elementwidth)
12500    *elementwidth = elsize;
12501
12502  if (modconst)
12503    {
12504      unsigned HOST_WIDE_INT imm = 0;
12505
12506      /* Un-invert bytes of recognized vector, if necessary.  */
12507      if (invmask != 0)
12508        for (i = 0; i < idx; i++)
12509          bytes[i] ^= invmask;
12510
12511      if (immtype == 17)
12512        {
12513          /* FIXME: Broken on 32-bit H_W_I hosts.  */
12514          gcc_assert (sizeof (HOST_WIDE_INT) == 8);
12515
12516          for (i = 0; i < 8; i++)
12517            imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
12518                   << (i * BITS_PER_UNIT);
12519
12520          *modconst = GEN_INT (imm);
12521        }
12522      else
12523        {
12524          unsigned HOST_WIDE_INT imm = 0;
12525
12526          for (i = 0; i < elsize / BITS_PER_UNIT; i++)
12527            imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
12528
12529          *modconst = GEN_INT (imm);
12530        }
12531    }
12532
12533  return immtype;
12534#undef CHECK
12535}
12536
12537/* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
12538   VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
12539   float elements), and a modified constant (whatever should be output for a
12540   VMOV) in *MODCONST.  */
12541
12542int
12543neon_immediate_valid_for_move (rtx op, machine_mode mode,
12544			       rtx *modconst, int *elementwidth)
12545{
12546  rtx tmpconst;
12547  int tmpwidth;
12548  int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
12549
12550  if (retval == -1)
12551    return 0;
12552
12553  if (modconst)
12554    *modconst = tmpconst;
12555
12556  if (elementwidth)
12557    *elementwidth = tmpwidth;
12558
12559  return 1;
12560}
12561
12562/* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction.  If
12563   the immediate is valid, write a constant suitable for using as an operand
12564   to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
12565   *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE.  */
12566
12567int
12568neon_immediate_valid_for_logic (rtx op, machine_mode mode, int inverse,
12569				rtx *modconst, int *elementwidth)
12570{
12571  rtx tmpconst;
12572  int tmpwidth;
12573  int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
12574
12575  if (retval < 0 || retval > 5)
12576    return 0;
12577
12578  if (modconst)
12579    *modconst = tmpconst;
12580
12581  if (elementwidth)
12582    *elementwidth = tmpwidth;
12583
12584  return 1;
12585}
12586
12587/* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction.  If
12588   the immediate is valid, write a constant suitable for using as an operand
12589   to VSHR/VSHL to *MODCONST and the corresponding element width to
12590   *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
12591   because they have different limitations.  */
12592
12593int
12594neon_immediate_valid_for_shift (rtx op, machine_mode mode,
12595				rtx *modconst, int *elementwidth,
12596				bool isleftshift)
12597{
12598  unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
12599  unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
12600  unsigned HOST_WIDE_INT last_elt = 0;
12601  unsigned HOST_WIDE_INT maxshift;
12602
12603  /* Split vector constant out into a byte vector.  */
12604  for (i = 0; i < n_elts; i++)
12605    {
12606      rtx el = CONST_VECTOR_ELT (op, i);
12607      unsigned HOST_WIDE_INT elpart;
12608
12609      if (CONST_INT_P (el))
12610        elpart = INTVAL (el);
12611      else if (CONST_DOUBLE_P (el))
12612        return 0;
12613      else
12614        gcc_unreachable ();
12615
12616      if (i != 0 && elpart != last_elt)
12617        return 0;
12618
12619      last_elt = elpart;
12620    }
12621
12622  /* Shift less than element size.  */
12623  maxshift = innersize * 8;
12624
12625  if (isleftshift)
12626    {
12627      /* Left shift immediate value can be from 0 to <size>-1.  */
12628      if (last_elt >= maxshift)
12629        return 0;
12630    }
12631  else
12632    {
12633      /* Right shift immediate value can be from 1 to <size>.  */
12634      if (last_elt == 0 || last_elt > maxshift)
12635	return 0;
12636    }
12637
12638  if (elementwidth)
12639    *elementwidth = innersize * 8;
12640
12641  if (modconst)
12642    *modconst = CONST_VECTOR_ELT (op, 0);
12643
12644  return 1;
12645}
12646
12647/* Return a string suitable for output of Neon immediate logic operation
12648   MNEM.  */
12649
12650char *
12651neon_output_logic_immediate (const char *mnem, rtx *op2, machine_mode mode,
12652			     int inverse, int quad)
12653{
12654  int width, is_valid;
12655  static char templ[40];
12656
12657  is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
12658
12659  gcc_assert (is_valid != 0);
12660
12661  if (quad)
12662    sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
12663  else
12664    sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
12665
12666  return templ;
12667}
12668
12669/* Return a string suitable for output of Neon immediate shift operation
12670   (VSHR or VSHL) MNEM.  */
12671
12672char *
12673neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
12674			     machine_mode mode, int quad,
12675			     bool isleftshift)
12676{
12677  int width, is_valid;
12678  static char templ[40];
12679
12680  is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
12681  gcc_assert (is_valid != 0);
12682
12683  if (quad)
12684    sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
12685  else
12686    sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
12687
12688  return templ;
12689}
12690
12691/* Output a sequence of pairwise operations to implement a reduction.
12692   NOTE: We do "too much work" here, because pairwise operations work on two
12693   registers-worth of operands in one go. Unfortunately we can't exploit those
12694   extra calculations to do the full operation in fewer steps, I don't think.
12695   Although all vector elements of the result but the first are ignored, we
12696   actually calculate the same result in each of the elements. An alternative
12697   such as initially loading a vector with zero to use as each of the second
12698   operands would use up an additional register and take an extra instruction,
12699   for no particular gain.  */
12700
12701void
12702neon_pairwise_reduce (rtx op0, rtx op1, machine_mode mode,
12703		      rtx (*reduc) (rtx, rtx, rtx))
12704{
12705  machine_mode inner = GET_MODE_INNER (mode);
12706  unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
12707  rtx tmpsum = op1;
12708
12709  for (i = parts / 2; i >= 1; i /= 2)
12710    {
12711      rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
12712      emit_insn (reduc (dest, tmpsum, tmpsum));
12713      tmpsum = dest;
12714    }
12715}
12716
12717/* If VALS is a vector constant that can be loaded into a register
12718   using VDUP, generate instructions to do so and return an RTX to
12719   assign to the register.  Otherwise return NULL_RTX.  */
12720
12721static rtx
12722neon_vdup_constant (rtx vals)
12723{
12724  machine_mode mode = GET_MODE (vals);
12725  machine_mode inner_mode = GET_MODE_INNER (mode);
12726  int n_elts = GET_MODE_NUNITS (mode);
12727  bool all_same = true;
12728  rtx x;
12729  int i;
12730
12731  if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
12732    return NULL_RTX;
12733
12734  for (i = 0; i < n_elts; ++i)
12735    {
12736      x = XVECEXP (vals, 0, i);
12737      if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12738	all_same = false;
12739    }
12740
12741  if (!all_same)
12742    /* The elements are not all the same.  We could handle repeating
12743       patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12744       {0, C, 0, C, 0, C, 0, C} which can be loaded using
12745       vdup.i16).  */
12746    return NULL_RTX;
12747
12748  /* We can load this constant by using VDUP and a constant in a
12749     single ARM register.  This will be cheaper than a vector
12750     load.  */
12751
12752  x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12753  return gen_rtx_VEC_DUPLICATE (mode, x);
12754}
12755
12756/* Generate code to load VALS, which is a PARALLEL containing only
12757   constants (for vec_init) or CONST_VECTOR, efficiently into a
12758   register.  Returns an RTX to copy into the register, or NULL_RTX
12759   for a PARALLEL that can not be converted into a CONST_VECTOR.  */
12760
12761rtx
12762neon_make_constant (rtx vals)
12763{
12764  machine_mode mode = GET_MODE (vals);
12765  rtx target;
12766  rtx const_vec = NULL_RTX;
12767  int n_elts = GET_MODE_NUNITS (mode);
12768  int n_const = 0;
12769  int i;
12770
12771  if (GET_CODE (vals) == CONST_VECTOR)
12772    const_vec = vals;
12773  else if (GET_CODE (vals) == PARALLEL)
12774    {
12775      /* A CONST_VECTOR must contain only CONST_INTs and
12776	 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12777	 Only store valid constants in a CONST_VECTOR.  */
12778      for (i = 0; i < n_elts; ++i)
12779	{
12780	  rtx x = XVECEXP (vals, 0, i);
12781	  if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
12782	    n_const++;
12783	}
12784      if (n_const == n_elts)
12785	const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
12786    }
12787  else
12788    gcc_unreachable ();
12789
12790  if (const_vec != NULL
12791      && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
12792    /* Load using VMOV.  On Cortex-A8 this takes one cycle.  */
12793    return const_vec;
12794  else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
12795    /* Loaded using VDUP.  On Cortex-A8 the VDUP takes one NEON
12796       pipeline cycle; creating the constant takes one or two ARM
12797       pipeline cycles.  */
12798    return target;
12799  else if (const_vec != NULL_RTX)
12800    /* Load from constant pool.  On Cortex-A8 this takes two cycles
12801       (for either double or quad vectors).  We can not take advantage
12802       of single-cycle VLD1 because we need a PC-relative addressing
12803       mode.  */
12804    return const_vec;
12805  else
12806    /* A PARALLEL containing something not valid inside CONST_VECTOR.
12807       We can not construct an initializer.  */
12808    return NULL_RTX;
12809}
12810
12811/* Initialize vector TARGET to VALS.  */
12812
12813void
12814neon_expand_vector_init (rtx target, rtx vals)
12815{
12816  machine_mode mode = GET_MODE (target);
12817  machine_mode inner_mode = GET_MODE_INNER (mode);
12818  int n_elts = GET_MODE_NUNITS (mode);
12819  int n_var = 0, one_var = -1;
12820  bool all_same = true;
12821  rtx x, mem;
12822  int i;
12823
12824  for (i = 0; i < n_elts; ++i)
12825    {
12826      x = XVECEXP (vals, 0, i);
12827      if (!CONSTANT_P (x))
12828	++n_var, one_var = i;
12829
12830      if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12831	all_same = false;
12832    }
12833
12834  if (n_var == 0)
12835    {
12836      rtx constant = neon_make_constant (vals);
12837      if (constant != NULL_RTX)
12838	{
12839	  emit_move_insn (target, constant);
12840	  return;
12841	}
12842    }
12843
12844  /* Splat a single non-constant element if we can.  */
12845  if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
12846    {
12847      x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12848      emit_insn (gen_rtx_SET (VOIDmode, target,
12849			      gen_rtx_VEC_DUPLICATE (mode, x)));
12850      return;
12851    }
12852
12853  /* One field is non-constant.  Load constant then overwrite varying
12854     field.  This is more efficient than using the stack.  */
12855  if (n_var == 1)
12856    {
12857      rtx copy = copy_rtx (vals);
12858      rtx index = GEN_INT (one_var);
12859
12860      /* Load constant part of vector, substitute neighboring value for
12861	 varying element.  */
12862      XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
12863      neon_expand_vector_init (target, copy);
12864
12865      /* Insert variable.  */
12866      x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
12867      switch (mode)
12868	{
12869	case V8QImode:
12870	  emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
12871	  break;
12872	case V16QImode:
12873	  emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
12874	  break;
12875	case V4HImode:
12876	  emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
12877	  break;
12878	case V8HImode:
12879	  emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
12880	  break;
12881	case V2SImode:
12882	  emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
12883	  break;
12884	case V4SImode:
12885	  emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
12886	  break;
12887	case V2SFmode:
12888	  emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
12889	  break;
12890	case V4SFmode:
12891	  emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
12892	  break;
12893	case V2DImode:
12894	  emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
12895	  break;
12896	default:
12897	  gcc_unreachable ();
12898	}
12899      return;
12900    }
12901
12902  /* Construct the vector in memory one field at a time
12903     and load the whole vector.  */
12904  mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12905  for (i = 0; i < n_elts; i++)
12906    emit_move_insn (adjust_address_nv (mem, inner_mode,
12907				    i * GET_MODE_SIZE (inner_mode)),
12908		    XVECEXP (vals, 0, i));
12909  emit_move_insn (target, mem);
12910}
12911
12912/* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive).  Raise
12913   ERR if it doesn't.  FIXME: NEON bounds checks occur late in compilation, so
12914   reported source locations are bogus.  */
12915
12916static void
12917bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12918	      const char *err)
12919{
12920  HOST_WIDE_INT lane;
12921
12922  gcc_assert (CONST_INT_P (operand));
12923
12924  lane = INTVAL (operand);
12925
12926  if (lane < low || lane >= high)
12927    error (err);
12928}
12929
12930/* Bounds-check lanes.  */
12931
12932void
12933neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12934{
12935  bounds_check (operand, low, high, "lane out of range");
12936}
12937
12938/* Bounds-check constants.  */
12939
12940void
12941neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12942{
12943  bounds_check (operand, low, high, "constant out of range");
12944}
12945
12946HOST_WIDE_INT
12947neon_element_bits (machine_mode mode)
12948{
12949  if (mode == DImode)
12950    return GET_MODE_BITSIZE (mode);
12951  else
12952    return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
12953}
12954
12955
12956/* Predicates for `match_operand' and `match_operator'.  */
12957
12958/* Return TRUE if OP is a valid coprocessor memory address pattern.
12959   WB is true if full writeback address modes are allowed and is false
12960   if limited writeback address modes (POST_INC and PRE_DEC) are
12961   allowed.  */
12962
12963int
12964arm_coproc_mem_operand (rtx op, bool wb)
12965{
12966  rtx ind;
12967
12968  /* Reject eliminable registers.  */
12969  if (! (reload_in_progress || reload_completed || lra_in_progress)
12970      && (   reg_mentioned_p (frame_pointer_rtx, op)
12971	  || reg_mentioned_p (arg_pointer_rtx, op)
12972	  || reg_mentioned_p (virtual_incoming_args_rtx, op)
12973	  || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12974	  || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12975	  || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12976    return FALSE;
12977
12978  /* Constants are converted into offsets from labels.  */
12979  if (!MEM_P (op))
12980    return FALSE;
12981
12982  ind = XEXP (op, 0);
12983
12984  if (reload_completed
12985      && (GET_CODE (ind) == LABEL_REF
12986	  || (GET_CODE (ind) == CONST
12987	      && GET_CODE (XEXP (ind, 0)) == PLUS
12988	      && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12989	      && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12990    return TRUE;
12991
12992  /* Match: (mem (reg)).  */
12993  if (REG_P (ind))
12994    return arm_address_register_rtx_p (ind, 0);
12995
12996  /* Autoincremment addressing modes.  POST_INC and PRE_DEC are
12997     acceptable in any case (subject to verification by
12998     arm_address_register_rtx_p).  We need WB to be true to accept
12999     PRE_INC and POST_DEC.  */
13000  if (GET_CODE (ind) == POST_INC
13001      || GET_CODE (ind) == PRE_DEC
13002      || (wb
13003	  && (GET_CODE (ind) == PRE_INC
13004	      || GET_CODE (ind) == POST_DEC)))
13005    return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13006
13007  if (wb
13008      && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
13009      && arm_address_register_rtx_p (XEXP (ind, 0), 0)
13010      && GET_CODE (XEXP (ind, 1)) == PLUS
13011      && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
13012    ind = XEXP (ind, 1);
13013
13014  /* Match:
13015     (plus (reg)
13016	   (const)).  */
13017  if (GET_CODE (ind) == PLUS
13018      && REG_P (XEXP (ind, 0))
13019      && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
13020      && CONST_INT_P (XEXP (ind, 1))
13021      && INTVAL (XEXP (ind, 1)) > -1024
13022      && INTVAL (XEXP (ind, 1)) <  1024
13023      && (INTVAL (XEXP (ind, 1)) & 3) == 0)
13024    return TRUE;
13025
13026  return FALSE;
13027}
13028
13029/* Return TRUE if OP is a memory operand which we can load or store a vector
13030   to/from. TYPE is one of the following values:
13031    0 - Vector load/stor (vldr)
13032    1 - Core registers (ldm)
13033    2 - Element/structure loads (vld1)
13034 */
13035int
13036neon_vector_mem_operand (rtx op, int type, bool strict)
13037{
13038  rtx ind;
13039
13040  /* Reject eliminable registers.  */
13041  if (! (reload_in_progress || reload_completed)
13042      && (   reg_mentioned_p (frame_pointer_rtx, op)
13043	  || reg_mentioned_p (arg_pointer_rtx, op)
13044	  || reg_mentioned_p (virtual_incoming_args_rtx, op)
13045	  || reg_mentioned_p (virtual_outgoing_args_rtx, op)
13046	  || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
13047	  || reg_mentioned_p (virtual_stack_vars_rtx, op)))
13048    return !strict;
13049
13050  /* Constants are converted into offsets from labels.  */
13051  if (!MEM_P (op))
13052    return FALSE;
13053
13054  ind = XEXP (op, 0);
13055
13056  if (reload_completed
13057      && (GET_CODE (ind) == LABEL_REF
13058	  || (GET_CODE (ind) == CONST
13059	      && GET_CODE (XEXP (ind, 0)) == PLUS
13060	      && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
13061	      && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
13062    return TRUE;
13063
13064  /* Match: (mem (reg)).  */
13065  if (REG_P (ind))
13066    return arm_address_register_rtx_p (ind, 0);
13067
13068  /* Allow post-increment with Neon registers.  */
13069  if ((type != 1 && GET_CODE (ind) == POST_INC)
13070      || (type == 0 && GET_CODE (ind) == PRE_DEC))
13071    return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13072
13073  /* Allow post-increment by register for VLDn */
13074  if (type == 2 && GET_CODE (ind) == POST_MODIFY
13075      && GET_CODE (XEXP (ind, 1)) == PLUS
13076      && REG_P (XEXP (XEXP (ind, 1), 1)))
13077     return true;
13078
13079  /* Match:
13080     (plus (reg)
13081          (const)).  */
13082  if (type == 0
13083      && GET_CODE (ind) == PLUS
13084      && REG_P (XEXP (ind, 0))
13085      && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
13086      && CONST_INT_P (XEXP (ind, 1))
13087      && INTVAL (XEXP (ind, 1)) > -1024
13088      /* For quad modes, we restrict the constant offset to be slightly less
13089	 than what the instruction format permits.  We have no such constraint
13090	 on double mode offsets.  (This must match arm_legitimate_index_p.)  */
13091      && (INTVAL (XEXP (ind, 1))
13092	  < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
13093      && (INTVAL (XEXP (ind, 1)) & 3) == 0)
13094    return TRUE;
13095
13096  return FALSE;
13097}
13098
13099/* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
13100   type.  */
13101int
13102neon_struct_mem_operand (rtx op)
13103{
13104  rtx ind;
13105
13106  /* Reject eliminable registers.  */
13107  if (! (reload_in_progress || reload_completed)
13108      && (   reg_mentioned_p (frame_pointer_rtx, op)
13109	  || reg_mentioned_p (arg_pointer_rtx, op)
13110	  || reg_mentioned_p (virtual_incoming_args_rtx, op)
13111	  || reg_mentioned_p (virtual_outgoing_args_rtx, op)
13112	  || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
13113	  || reg_mentioned_p (virtual_stack_vars_rtx, op)))
13114    return FALSE;
13115
13116  /* Constants are converted into offsets from labels.  */
13117  if (!MEM_P (op))
13118    return FALSE;
13119
13120  ind = XEXP (op, 0);
13121
13122  if (reload_completed
13123      && (GET_CODE (ind) == LABEL_REF
13124	  || (GET_CODE (ind) == CONST
13125	      && GET_CODE (XEXP (ind, 0)) == PLUS
13126	      && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
13127	      && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
13128    return TRUE;
13129
13130  /* Match: (mem (reg)).  */
13131  if (REG_P (ind))
13132    return arm_address_register_rtx_p (ind, 0);
13133
13134  /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db).  */
13135  if (GET_CODE (ind) == POST_INC
13136      || GET_CODE (ind) == PRE_DEC)
13137    return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13138
13139  return FALSE;
13140}
13141
13142/* Return true if X is a register that will be eliminated later on.  */
13143int
13144arm_eliminable_register (rtx x)
13145{
13146  return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
13147		       || REGNO (x) == ARG_POINTER_REGNUM
13148		       || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
13149			   && REGNO (x) <= LAST_VIRTUAL_REGISTER));
13150}
13151
13152/* Return GENERAL_REGS if a scratch register required to reload x to/from
13153   coprocessor registers.  Otherwise return NO_REGS.  */
13154
13155enum reg_class
13156coproc_secondary_reload_class (machine_mode mode, rtx x, bool wb)
13157{
13158  if (mode == HFmode)
13159    {
13160      if (!TARGET_NEON_FP16)
13161	return GENERAL_REGS;
13162      if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
13163	return NO_REGS;
13164      return GENERAL_REGS;
13165    }
13166
13167  /* The neon move patterns handle all legitimate vector and struct
13168     addresses.  */
13169  if (TARGET_NEON
13170      && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
13171      && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
13172	  || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
13173	  || VALID_NEON_STRUCT_MODE (mode)))
13174    return NO_REGS;
13175
13176  if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
13177    return NO_REGS;
13178
13179  return GENERAL_REGS;
13180}
13181
13182/* Values which must be returned in the most-significant end of the return
13183   register.  */
13184
13185static bool
13186arm_return_in_msb (const_tree valtype)
13187{
13188  return (TARGET_AAPCS_BASED
13189          && BYTES_BIG_ENDIAN
13190	  && (AGGREGATE_TYPE_P (valtype)
13191	      || TREE_CODE (valtype) == COMPLEX_TYPE
13192	      || FIXED_POINT_TYPE_P (valtype)));
13193}
13194
13195/* Return TRUE if X references a SYMBOL_REF.  */
13196int
13197symbol_mentioned_p (rtx x)
13198{
13199  const char * fmt;
13200  int i;
13201
13202  if (GET_CODE (x) == SYMBOL_REF)
13203    return 1;
13204
13205  /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
13206     are constant offsets, not symbols.  */
13207  if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
13208    return 0;
13209
13210  fmt = GET_RTX_FORMAT (GET_CODE (x));
13211
13212  for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
13213    {
13214      if (fmt[i] == 'E')
13215	{
13216	  int j;
13217
13218	  for (j = XVECLEN (x, i) - 1; j >= 0; j--)
13219	    if (symbol_mentioned_p (XVECEXP (x, i, j)))
13220	      return 1;
13221	}
13222      else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
13223	return 1;
13224    }
13225
13226  return 0;
13227}
13228
13229/* Return TRUE if X references a LABEL_REF.  */
13230int
13231label_mentioned_p (rtx x)
13232{
13233  const char * fmt;
13234  int i;
13235
13236  if (GET_CODE (x) == LABEL_REF)
13237    return 1;
13238
13239  /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
13240     instruction, but they are constant offsets, not symbols.  */
13241  if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
13242    return 0;
13243
13244  fmt = GET_RTX_FORMAT (GET_CODE (x));
13245  for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
13246    {
13247      if (fmt[i] == 'E')
13248	{
13249	  int j;
13250
13251	  for (j = XVECLEN (x, i) - 1; j >= 0; j--)
13252	    if (label_mentioned_p (XVECEXP (x, i, j)))
13253	      return 1;
13254	}
13255      else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
13256	return 1;
13257    }
13258
13259  return 0;
13260}
13261
13262int
13263tls_mentioned_p (rtx x)
13264{
13265  switch (GET_CODE (x))
13266    {
13267    case CONST:
13268      return tls_mentioned_p (XEXP (x, 0));
13269
13270    case UNSPEC:
13271      if (XINT (x, 1) == UNSPEC_TLS)
13272	return 1;
13273
13274    default:
13275      return 0;
13276    }
13277}
13278
13279/* Must not copy any rtx that uses a pc-relative address.  */
13280
13281static bool
13282arm_cannot_copy_insn_p (rtx_insn *insn)
13283{
13284  /* The tls call insn cannot be copied, as it is paired with a data
13285     word.  */
13286  if (recog_memoized (insn) == CODE_FOR_tlscall)
13287    return true;
13288
13289  subrtx_iterator::array_type array;
13290  FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
13291    {
13292      const_rtx x = *iter;
13293      if (GET_CODE (x) == UNSPEC
13294	  && (XINT (x, 1) == UNSPEC_PIC_BASE
13295	      || XINT (x, 1) == UNSPEC_PIC_UNIFIED))
13296	return true;
13297    }
13298  return false;
13299}
13300
13301enum rtx_code
13302minmax_code (rtx x)
13303{
13304  enum rtx_code code = GET_CODE (x);
13305
13306  switch (code)
13307    {
13308    case SMAX:
13309      return GE;
13310    case SMIN:
13311      return LE;
13312    case UMIN:
13313      return LEU;
13314    case UMAX:
13315      return GEU;
13316    default:
13317      gcc_unreachable ();
13318    }
13319}
13320
13321/* Match pair of min/max operators that can be implemented via usat/ssat.  */
13322
13323bool
13324arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
13325			int *mask, bool *signed_sat)
13326{
13327  /* The high bound must be a power of two minus one.  */
13328  int log = exact_log2 (INTVAL (hi_bound) + 1);
13329  if (log == -1)
13330    return false;
13331
13332  /* The low bound is either zero (for usat) or one less than the
13333     negation of the high bound (for ssat).  */
13334  if (INTVAL (lo_bound) == 0)
13335    {
13336      if (mask)
13337        *mask = log;
13338      if (signed_sat)
13339        *signed_sat = false;
13340
13341      return true;
13342    }
13343
13344  if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
13345    {
13346      if (mask)
13347        *mask = log + 1;
13348      if (signed_sat)
13349        *signed_sat = true;
13350
13351      return true;
13352    }
13353
13354  return false;
13355}
13356
13357/* Return 1 if memory locations are adjacent.  */
13358int
13359adjacent_mem_locations (rtx a, rtx b)
13360{
13361  /* We don't guarantee to preserve the order of these memory refs.  */
13362  if (volatile_refs_p (a) || volatile_refs_p (b))
13363    return 0;
13364
13365  if ((REG_P (XEXP (a, 0))
13366       || (GET_CODE (XEXP (a, 0)) == PLUS
13367	   && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
13368      && (REG_P (XEXP (b, 0))
13369	  || (GET_CODE (XEXP (b, 0)) == PLUS
13370	      && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
13371    {
13372      HOST_WIDE_INT val0 = 0, val1 = 0;
13373      rtx reg0, reg1;
13374      int val_diff;
13375
13376      if (GET_CODE (XEXP (a, 0)) == PLUS)
13377        {
13378	  reg0 = XEXP (XEXP (a, 0), 0);
13379	  val0 = INTVAL (XEXP (XEXP (a, 0), 1));
13380        }
13381      else
13382	reg0 = XEXP (a, 0);
13383
13384      if (GET_CODE (XEXP (b, 0)) == PLUS)
13385        {
13386	  reg1 = XEXP (XEXP (b, 0), 0);
13387	  val1 = INTVAL (XEXP (XEXP (b, 0), 1));
13388        }
13389      else
13390	reg1 = XEXP (b, 0);
13391
13392      /* Don't accept any offset that will require multiple
13393	 instructions to handle, since this would cause the
13394	 arith_adjacentmem pattern to output an overlong sequence.  */
13395      if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
13396	return 0;
13397
13398      /* Don't allow an eliminable register: register elimination can make
13399	 the offset too large.  */
13400      if (arm_eliminable_register (reg0))
13401	return 0;
13402
13403      val_diff = val1 - val0;
13404
13405      if (arm_ld_sched)
13406	{
13407	  /* If the target has load delay slots, then there's no benefit
13408	     to using an ldm instruction unless the offset is zero and
13409	     we are optimizing for size.  */
13410	  return (optimize_size && (REGNO (reg0) == REGNO (reg1))
13411		  && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
13412		  && (val_diff == 4 || val_diff == -4));
13413	}
13414
13415      return ((REGNO (reg0) == REGNO (reg1))
13416	      && (val_diff == 4 || val_diff == -4));
13417    }
13418
13419  return 0;
13420}
13421
13422/* Return true if OP is a valid load or store multiple operation.  LOAD is true
13423   for load operations, false for store operations.  CONSECUTIVE is true
13424   if the register numbers in the operation must be consecutive in the register
13425   bank. RETURN_PC is true if value is to be loaded in PC.
13426   The pattern we are trying to match for load is:
13427     [(SET (R_d0) (MEM (PLUS (addr) (offset))))
13428      (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
13429       :
13430       :
13431      (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
13432     ]
13433     where
13434     1.  If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
13435     2.  REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
13436     3.  If consecutive is TRUE, then for kth register being loaded,
13437         REGNO (R_dk) = REGNO (R_d0) + k.
13438   The pattern for store is similar.  */
13439bool
13440ldm_stm_operation_p (rtx op, bool load, machine_mode mode,
13441                     bool consecutive, bool return_pc)
13442{
13443  HOST_WIDE_INT count = XVECLEN (op, 0);
13444  rtx reg, mem, addr;
13445  unsigned regno;
13446  unsigned first_regno;
13447  HOST_WIDE_INT i = 1, base = 0, offset = 0;
13448  rtx elt;
13449  bool addr_reg_in_reglist = false;
13450  bool update = false;
13451  int reg_increment;
13452  int offset_adj;
13453  int regs_per_val;
13454
13455  /* If not in SImode, then registers must be consecutive
13456     (e.g., VLDM instructions for DFmode).  */
13457  gcc_assert ((mode == SImode) || consecutive);
13458  /* Setting return_pc for stores is illegal.  */
13459  gcc_assert (!return_pc || load);
13460
13461  /* Set up the increments and the regs per val based on the mode.  */
13462  reg_increment = GET_MODE_SIZE (mode);
13463  regs_per_val = reg_increment / 4;
13464  offset_adj = return_pc ? 1 : 0;
13465
13466  if (count <= 1
13467      || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
13468      || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
13469    return false;
13470
13471  /* Check if this is a write-back.  */
13472  elt = XVECEXP (op, 0, offset_adj);
13473  if (GET_CODE (SET_SRC (elt)) == PLUS)
13474    {
13475      i++;
13476      base = 1;
13477      update = true;
13478
13479      /* The offset adjustment must be the number of registers being
13480         popped times the size of a single register.  */
13481      if (!REG_P (SET_DEST (elt))
13482          || !REG_P (XEXP (SET_SRC (elt), 0))
13483          || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
13484          || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
13485          || INTVAL (XEXP (SET_SRC (elt), 1)) !=
13486             ((count - 1 - offset_adj) * reg_increment))
13487        return false;
13488    }
13489
13490  i = i + offset_adj;
13491  base = base + offset_adj;
13492  /* Perform a quick check so we don't blow up below. If only one reg is loaded,
13493     success depends on the type: VLDM can do just one reg,
13494     LDM must do at least two.  */
13495  if ((count <= i) && (mode == SImode))
13496      return false;
13497
13498  elt = XVECEXP (op, 0, i - 1);
13499  if (GET_CODE (elt) != SET)
13500    return false;
13501
13502  if (load)
13503    {
13504      reg = SET_DEST (elt);
13505      mem = SET_SRC (elt);
13506    }
13507  else
13508    {
13509      reg = SET_SRC (elt);
13510      mem = SET_DEST (elt);
13511    }
13512
13513  if (!REG_P (reg) || !MEM_P (mem))
13514    return false;
13515
13516  regno = REGNO (reg);
13517  first_regno = regno;
13518  addr = XEXP (mem, 0);
13519  if (GET_CODE (addr) == PLUS)
13520    {
13521      if (!CONST_INT_P (XEXP (addr, 1)))
13522	return false;
13523
13524      offset = INTVAL (XEXP (addr, 1));
13525      addr = XEXP (addr, 0);
13526    }
13527
13528  if (!REG_P (addr))
13529    return false;
13530
13531  /* Don't allow SP to be loaded unless it is also the base register. It
13532     guarantees that SP is reset correctly when an LDM instruction
13533     is interrupted. Otherwise, we might end up with a corrupt stack.  */
13534  if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13535    return false;
13536
13537  for (; i < count; i++)
13538    {
13539      elt = XVECEXP (op, 0, i);
13540      if (GET_CODE (elt) != SET)
13541        return false;
13542
13543      if (load)
13544        {
13545          reg = SET_DEST (elt);
13546          mem = SET_SRC (elt);
13547        }
13548      else
13549        {
13550          reg = SET_SRC (elt);
13551          mem = SET_DEST (elt);
13552        }
13553
13554      if (!REG_P (reg)
13555          || GET_MODE (reg) != mode
13556          || REGNO (reg) <= regno
13557          || (consecutive
13558              && (REGNO (reg) !=
13559                  (unsigned int) (first_regno + regs_per_val * (i - base))))
13560          /* Don't allow SP to be loaded unless it is also the base register. It
13561             guarantees that SP is reset correctly when an LDM instruction
13562             is interrupted. Otherwise, we might end up with a corrupt stack.  */
13563          || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13564          || !MEM_P (mem)
13565          || GET_MODE (mem) != mode
13566          || ((GET_CODE (XEXP (mem, 0)) != PLUS
13567	       || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
13568	       || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
13569	       || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
13570                   offset + (i - base) * reg_increment))
13571	      && (!REG_P (XEXP (mem, 0))
13572		  || offset + (i - base) * reg_increment != 0)))
13573        return false;
13574
13575      regno = REGNO (reg);
13576      if (regno == REGNO (addr))
13577        addr_reg_in_reglist = true;
13578    }
13579
13580  if (load)
13581    {
13582      if (update && addr_reg_in_reglist)
13583        return false;
13584
13585      /* For Thumb-1, address register is always modified - either by write-back
13586         or by explicit load.  If the pattern does not describe an update,
13587         then the address register must be in the list of loaded registers.  */
13588      if (TARGET_THUMB1)
13589        return update || addr_reg_in_reglist;
13590    }
13591
13592  return true;
13593}
13594
13595/* Return true iff it would be profitable to turn a sequence of NOPS loads
13596   or stores (depending on IS_STORE) into a load-multiple or store-multiple
13597   instruction.  ADD_OFFSET is nonzero if the base address register needs
13598   to be modified with an add instruction before we can use it.  */
13599
13600static bool
13601multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
13602				 int nops, HOST_WIDE_INT add_offset)
13603 {
13604  /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
13605     if the offset isn't small enough.  The reason 2 ldrs are faster
13606     is because these ARMs are able to do more than one cache access
13607     in a single cycle.  The ARM9 and StrongARM have Harvard caches,
13608     whilst the ARM8 has a double bandwidth cache.  This means that
13609     these cores can do both an instruction fetch and a data fetch in
13610     a single cycle, so the trick of calculating the address into a
13611     scratch register (one of the result regs) and then doing a load
13612     multiple actually becomes slower (and no smaller in code size).
13613     That is the transformation
13614
13615 	ldr	rd1, [rbase + offset]
13616 	ldr	rd2, [rbase + offset + 4]
13617
13618     to
13619
13620 	add	rd1, rbase, offset
13621 	ldmia	rd1, {rd1, rd2}
13622
13623     produces worse code -- '3 cycles + any stalls on rd2' instead of
13624     '2 cycles + any stalls on rd2'.  On ARMs with only one cache
13625     access per cycle, the first sequence could never complete in less
13626     than 6 cycles, whereas the ldm sequence would only take 5 and
13627     would make better use of sequential accesses if not hitting the
13628     cache.
13629
13630     We cheat here and test 'arm_ld_sched' which we currently know to
13631     only be true for the ARM8, ARM9 and StrongARM.  If this ever
13632     changes, then the test below needs to be reworked.  */
13633  if (nops == 2 && arm_ld_sched && add_offset != 0)
13634    return false;
13635
13636  /* XScale has load-store double instructions, but they have stricter
13637     alignment requirements than load-store multiple, so we cannot
13638     use them.
13639
13640     For XScale ldm requires 2 + NREGS cycles to complete and blocks
13641     the pipeline until completion.
13642
13643	NREGS		CYCLES
13644	  1		  3
13645	  2		  4
13646	  3		  5
13647	  4		  6
13648
13649     An ldr instruction takes 1-3 cycles, but does not block the
13650     pipeline.
13651
13652	NREGS		CYCLES
13653	  1		 1-3
13654	  2		 2-6
13655	  3		 3-9
13656	  4		 4-12
13657
13658     Best case ldr will always win.  However, the more ldr instructions
13659     we issue, the less likely we are to be able to schedule them well.
13660     Using ldr instructions also increases code size.
13661
13662     As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13663     for counts of 3 or 4 regs.  */
13664  if (nops <= 2 && arm_tune_xscale && !optimize_size)
13665    return false;
13666  return true;
13667}
13668
13669/* Subroutine of load_multiple_sequence and store_multiple_sequence.
13670   Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13671   an array ORDER which describes the sequence to use when accessing the
13672   offsets that produces an ascending order.  In this sequence, each
13673   offset must be larger by exactly 4 than the previous one.  ORDER[0]
13674   must have been filled in with the lowest offset by the caller.
13675   If UNSORTED_REGS is nonnull, it is an array of register numbers that
13676   we use to verify that ORDER produces an ascending order of registers.
13677   Return true if it was possible to construct such an order, false if
13678   not.  */
13679
13680static bool
13681compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
13682		      int *unsorted_regs)
13683{
13684  int i;
13685  for (i = 1; i < nops; i++)
13686    {
13687      int j;
13688
13689      order[i] = order[i - 1];
13690      for (j = 0; j < nops; j++)
13691	if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
13692	  {
13693	    /* We must find exactly one offset that is higher than the
13694	       previous one by 4.  */
13695	    if (order[i] != order[i - 1])
13696	      return false;
13697	    order[i] = j;
13698	  }
13699      if (order[i] == order[i - 1])
13700	return false;
13701      /* The register numbers must be ascending.  */
13702      if (unsorted_regs != NULL
13703	  && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
13704	return false;
13705    }
13706  return true;
13707}
13708
13709/* Used to determine in a peephole whether a sequence of load
13710   instructions can be changed into a load-multiple instruction.
13711   NOPS is the number of separate load instructions we are examining.  The
13712   first NOPS entries in OPERANDS are the destination registers, the
13713   next NOPS entries are memory operands.  If this function is
13714   successful, *BASE is set to the common base register of the memory
13715   accesses; *LOAD_OFFSET is set to the first memory location's offset
13716   from that base register.
13717   REGS is an array filled in with the destination register numbers.
13718   SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13719   insn numbers to an ascending order of stores.  If CHECK_REGS is true,
13720   the sequence of registers in REGS matches the loads from ascending memory
13721   locations, and the function verifies that the register numbers are
13722   themselves ascending.  If CHECK_REGS is false, the register numbers
13723   are stored in the order they are found in the operands.  */
13724static int
13725load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
13726			int *base, HOST_WIDE_INT *load_offset, bool check_regs)
13727{
13728  int unsorted_regs[MAX_LDM_STM_OPS];
13729  HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13730  int order[MAX_LDM_STM_OPS];
13731  rtx base_reg_rtx = NULL;
13732  int base_reg = -1;
13733  int i, ldm_case;
13734
13735  /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13736     easily extended if required.  */
13737  gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13738
13739  memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13740
13741  /* Loop over the operands and check that the memory references are
13742     suitable (i.e. immediate offsets from the same base register).  At
13743     the same time, extract the target register, and the memory
13744     offsets.  */
13745  for (i = 0; i < nops; i++)
13746    {
13747      rtx reg;
13748      rtx offset;
13749
13750      /* Convert a subreg of a mem into the mem itself.  */
13751      if (GET_CODE (operands[nops + i]) == SUBREG)
13752	operands[nops + i] = alter_subreg (operands + (nops + i), true);
13753
13754      gcc_assert (MEM_P (operands[nops + i]));
13755
13756      /* Don't reorder volatile memory references; it doesn't seem worth
13757	 looking for the case where the order is ok anyway.  */
13758      if (MEM_VOLATILE_P (operands[nops + i]))
13759	return 0;
13760
13761      offset = const0_rtx;
13762
13763      if ((REG_P (reg = XEXP (operands[nops + i], 0))
13764	   || (GET_CODE (reg) == SUBREG
13765	       && REG_P (reg = SUBREG_REG (reg))))
13766	  || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13767	      && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13768		  || (GET_CODE (reg) == SUBREG
13769		      && REG_P (reg = SUBREG_REG (reg))))
13770	      && (CONST_INT_P (offset
13771		  = XEXP (XEXP (operands[nops + i], 0), 1)))))
13772	{
13773	  if (i == 0)
13774	    {
13775	      base_reg = REGNO (reg);
13776	      base_reg_rtx = reg;
13777	      if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13778		return 0;
13779	    }
13780	  else if (base_reg != (int) REGNO (reg))
13781	    /* Not addressed from the same base register.  */
13782	    return 0;
13783
13784	  unsorted_regs[i] = (REG_P (operands[i])
13785			      ? REGNO (operands[i])
13786			      : REGNO (SUBREG_REG (operands[i])));
13787
13788	  /* If it isn't an integer register, or if it overwrites the
13789	     base register but isn't the last insn in the list, then
13790	     we can't do this.  */
13791	  if (unsorted_regs[i] < 0
13792	      || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13793	      || unsorted_regs[i] > 14
13794	      || (i != nops - 1 && unsorted_regs[i] == base_reg))
13795	    return 0;
13796
13797          /* Don't allow SP to be loaded unless it is also the base
13798             register.  It guarantees that SP is reset correctly when
13799             an LDM instruction is interrupted.  Otherwise, we might
13800             end up with a corrupt stack.  */
13801          if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
13802            return 0;
13803
13804	  unsorted_offsets[i] = INTVAL (offset);
13805	  if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13806	    order[0] = i;
13807	}
13808      else
13809	/* Not a suitable memory address.  */
13810	return 0;
13811    }
13812
13813  /* All the useful information has now been extracted from the
13814     operands into unsorted_regs and unsorted_offsets; additionally,
13815     order[0] has been set to the lowest offset in the list.  Sort
13816     the offsets into order, verifying that they are adjacent, and
13817     check that the register numbers are ascending.  */
13818  if (!compute_offset_order (nops, unsorted_offsets, order,
13819			     check_regs ? unsorted_regs : NULL))
13820    return 0;
13821
13822  if (saved_order)
13823    memcpy (saved_order, order, sizeof order);
13824
13825  if (base)
13826    {
13827      *base = base_reg;
13828
13829      for (i = 0; i < nops; i++)
13830	regs[i] = unsorted_regs[check_regs ? order[i] : i];
13831
13832      *load_offset = unsorted_offsets[order[0]];
13833    }
13834
13835  if (TARGET_THUMB1
13836      && !peep2_reg_dead_p (nops, base_reg_rtx))
13837    return 0;
13838
13839  if (unsorted_offsets[order[0]] == 0)
13840    ldm_case = 1; /* ldmia */
13841  else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13842    ldm_case = 2; /* ldmib */
13843  else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13844    ldm_case = 3; /* ldmda */
13845  else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13846    ldm_case = 4; /* ldmdb */
13847  else if (const_ok_for_arm (unsorted_offsets[order[0]])
13848	   || const_ok_for_arm (-unsorted_offsets[order[0]]))
13849    ldm_case = 5;
13850  else
13851    return 0;
13852
13853  if (!multiple_operation_profitable_p (false, nops,
13854					ldm_case == 5
13855					? unsorted_offsets[order[0]] : 0))
13856    return 0;
13857
13858  return ldm_case;
13859}
13860
13861/* Used to determine in a peephole whether a sequence of store instructions can
13862   be changed into a store-multiple instruction.
13863   NOPS is the number of separate store instructions we are examining.
13864   NOPS_TOTAL is the total number of instructions recognized by the peephole
13865   pattern.
13866   The first NOPS entries in OPERANDS are the source registers, the next
13867   NOPS entries are memory operands.  If this function is successful, *BASE is
13868   set to the common base register of the memory accesses; *LOAD_OFFSET is set
13869   to the first memory location's offset from that base register.  REGS is an
13870   array filled in with the source register numbers, REG_RTXS (if nonnull) is
13871   likewise filled with the corresponding rtx's.
13872   SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13873   numbers to an ascending order of stores.
13874   If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13875   from ascending memory locations, and the function verifies that the register
13876   numbers are themselves ascending.  If CHECK_REGS is false, the register
13877   numbers are stored in the order they are found in the operands.  */
13878static int
13879store_multiple_sequence (rtx *operands, int nops, int nops_total,
13880			 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
13881			 HOST_WIDE_INT *load_offset, bool check_regs)
13882{
13883  int unsorted_regs[MAX_LDM_STM_OPS];
13884  rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
13885  HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13886  int order[MAX_LDM_STM_OPS];
13887  int base_reg = -1;
13888  rtx base_reg_rtx = NULL;
13889  int i, stm_case;
13890
13891  /* Write back of base register is currently only supported for Thumb 1.  */
13892  int base_writeback = TARGET_THUMB1;
13893
13894  /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13895     easily extended if required.  */
13896  gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13897
13898  memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13899
13900  /* Loop over the operands and check that the memory references are
13901     suitable (i.e. immediate offsets from the same base register).  At
13902     the same time, extract the target register, and the memory
13903     offsets.  */
13904  for (i = 0; i < nops; i++)
13905    {
13906      rtx reg;
13907      rtx offset;
13908
13909      /* Convert a subreg of a mem into the mem itself.  */
13910      if (GET_CODE (operands[nops + i]) == SUBREG)
13911	operands[nops + i] = alter_subreg (operands + (nops + i), true);
13912
13913      gcc_assert (MEM_P (operands[nops + i]));
13914
13915      /* Don't reorder volatile memory references; it doesn't seem worth
13916	 looking for the case where the order is ok anyway.  */
13917      if (MEM_VOLATILE_P (operands[nops + i]))
13918	return 0;
13919
13920      offset = const0_rtx;
13921
13922      if ((REG_P (reg = XEXP (operands[nops + i], 0))
13923	   || (GET_CODE (reg) == SUBREG
13924	       && REG_P (reg = SUBREG_REG (reg))))
13925	  || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13926	      && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13927		  || (GET_CODE (reg) == SUBREG
13928		      && REG_P (reg = SUBREG_REG (reg))))
13929	      && (CONST_INT_P (offset
13930		  = XEXP (XEXP (operands[nops + i], 0), 1)))))
13931	{
13932	  unsorted_reg_rtxs[i] = (REG_P (operands[i])
13933				  ? operands[i] : SUBREG_REG (operands[i]));
13934	  unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
13935
13936	  if (i == 0)
13937	    {
13938	      base_reg = REGNO (reg);
13939	      base_reg_rtx = reg;
13940	      if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13941		return 0;
13942	    }
13943	  else if (base_reg != (int) REGNO (reg))
13944	    /* Not addressed from the same base register.  */
13945	    return 0;
13946
13947	  /* If it isn't an integer register, then we can't do this.  */
13948	  if (unsorted_regs[i] < 0
13949	      || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13950	      /* The effects are unpredictable if the base register is
13951		 both updated and stored.  */
13952	      || (base_writeback && unsorted_regs[i] == base_reg)
13953	      || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
13954	      || unsorted_regs[i] > 14)
13955	    return 0;
13956
13957	  unsorted_offsets[i] = INTVAL (offset);
13958	  if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13959	    order[0] = i;
13960	}
13961      else
13962	/* Not a suitable memory address.  */
13963	return 0;
13964    }
13965
13966  /* All the useful information has now been extracted from the
13967     operands into unsorted_regs and unsorted_offsets; additionally,
13968     order[0] has been set to the lowest offset in the list.  Sort
13969     the offsets into order, verifying that they are adjacent, and
13970     check that the register numbers are ascending.  */
13971  if (!compute_offset_order (nops, unsorted_offsets, order,
13972			     check_regs ? unsorted_regs : NULL))
13973    return 0;
13974
13975  if (saved_order)
13976    memcpy (saved_order, order, sizeof order);
13977
13978  if (base)
13979    {
13980      *base = base_reg;
13981
13982      for (i = 0; i < nops; i++)
13983	{
13984	  regs[i] = unsorted_regs[check_regs ? order[i] : i];
13985	  if (reg_rtxs)
13986	    reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
13987	}
13988
13989      *load_offset = unsorted_offsets[order[0]];
13990    }
13991
13992  if (TARGET_THUMB1
13993      && !peep2_reg_dead_p (nops_total, base_reg_rtx))
13994    return 0;
13995
13996  if (unsorted_offsets[order[0]] == 0)
13997    stm_case = 1; /* stmia */
13998  else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13999    stm_case = 2; /* stmib */
14000  else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
14001    stm_case = 3; /* stmda */
14002  else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
14003    stm_case = 4; /* stmdb */
14004  else
14005    return 0;
14006
14007  if (!multiple_operation_profitable_p (false, nops, 0))
14008    return 0;
14009
14010  return stm_case;
14011}
14012
14013/* Routines for use in generating RTL.  */
14014
14015/* Generate a load-multiple instruction.  COUNT is the number of loads in
14016   the instruction; REGS and MEMS are arrays containing the operands.
14017   BASEREG is the base register to be used in addressing the memory operands.
14018   WBACK_OFFSET is nonzero if the instruction should update the base
14019   register.  */
14020
14021static rtx
14022arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
14023			 HOST_WIDE_INT wback_offset)
14024{
14025  int i = 0, j;
14026  rtx result;
14027
14028  if (!multiple_operation_profitable_p (false, count, 0))
14029    {
14030      rtx seq;
14031
14032      start_sequence ();
14033
14034      for (i = 0; i < count; i++)
14035	emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
14036
14037      if (wback_offset != 0)
14038	emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
14039
14040      seq = get_insns ();
14041      end_sequence ();
14042
14043      return seq;
14044    }
14045
14046  result = gen_rtx_PARALLEL (VOIDmode,
14047			     rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
14048  if (wback_offset != 0)
14049    {
14050      XVECEXP (result, 0, 0)
14051	= gen_rtx_SET (VOIDmode, basereg,
14052		       plus_constant (Pmode, basereg, wback_offset));
14053      i = 1;
14054      count++;
14055    }
14056
14057  for (j = 0; i < count; i++, j++)
14058    XVECEXP (result, 0, i)
14059      = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, regs[j]), mems[j]);
14060
14061  return result;
14062}
14063
14064/* Generate a store-multiple instruction.  COUNT is the number of stores in
14065   the instruction; REGS and MEMS are arrays containing the operands.
14066   BASEREG is the base register to be used in addressing the memory operands.
14067   WBACK_OFFSET is nonzero if the instruction should update the base
14068   register.  */
14069
14070static rtx
14071arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
14072			  HOST_WIDE_INT wback_offset)
14073{
14074  int i = 0, j;
14075  rtx result;
14076
14077  if (GET_CODE (basereg) == PLUS)
14078    basereg = XEXP (basereg, 0);
14079
14080  if (!multiple_operation_profitable_p (false, count, 0))
14081    {
14082      rtx seq;
14083
14084      start_sequence ();
14085
14086      for (i = 0; i < count; i++)
14087	emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
14088
14089      if (wback_offset != 0)
14090	emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
14091
14092      seq = get_insns ();
14093      end_sequence ();
14094
14095      return seq;
14096    }
14097
14098  result = gen_rtx_PARALLEL (VOIDmode,
14099			     rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
14100  if (wback_offset != 0)
14101    {
14102      XVECEXP (result, 0, 0)
14103	= gen_rtx_SET (VOIDmode, basereg,
14104		       plus_constant (Pmode, basereg, wback_offset));
14105      i = 1;
14106      count++;
14107    }
14108
14109  for (j = 0; i < count; i++, j++)
14110    XVECEXP (result, 0, i)
14111      = gen_rtx_SET (VOIDmode, mems[j], gen_rtx_REG (SImode, regs[j]));
14112
14113  return result;
14114}
14115
14116/* Generate either a load-multiple or a store-multiple instruction.  This
14117   function can be used in situations where we can start with a single MEM
14118   rtx and adjust its address upwards.
14119   COUNT is the number of operations in the instruction, not counting a
14120   possible update of the base register.  REGS is an array containing the
14121   register operands.
14122   BASEREG is the base register to be used in addressing the memory operands,
14123   which are constructed from BASEMEM.
14124   WRITE_BACK specifies whether the generated instruction should include an
14125   update of the base register.
14126   OFFSETP is used to pass an offset to and from this function; this offset
14127   is not used when constructing the address (instead BASEMEM should have an
14128   appropriate offset in its address), it is used only for setting
14129   MEM_OFFSET.  It is updated only if WRITE_BACK is true.*/
14130
14131static rtx
14132arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
14133		     bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
14134{
14135  rtx mems[MAX_LDM_STM_OPS];
14136  HOST_WIDE_INT offset = *offsetp;
14137  int i;
14138
14139  gcc_assert (count <= MAX_LDM_STM_OPS);
14140
14141  if (GET_CODE (basereg) == PLUS)
14142    basereg = XEXP (basereg, 0);
14143
14144  for (i = 0; i < count; i++)
14145    {
14146      rtx addr = plus_constant (Pmode, basereg, i * 4);
14147      mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
14148      offset += 4;
14149    }
14150
14151  if (write_back)
14152    *offsetp = offset;
14153
14154  if (is_load)
14155    return arm_gen_load_multiple_1 (count, regs, mems, basereg,
14156				    write_back ? 4 * count : 0);
14157  else
14158    return arm_gen_store_multiple_1 (count, regs, mems, basereg,
14159				     write_back ? 4 * count : 0);
14160}
14161
14162rtx
14163arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
14164		       rtx basemem, HOST_WIDE_INT *offsetp)
14165{
14166  return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
14167			      offsetp);
14168}
14169
14170rtx
14171arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
14172			rtx basemem, HOST_WIDE_INT *offsetp)
14173{
14174  return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
14175			      offsetp);
14176}
14177
14178/* Called from a peephole2 expander to turn a sequence of loads into an
14179   LDM instruction.  OPERANDS are the operands found by the peephole matcher;
14180   NOPS indicates how many separate loads we are trying to combine.  SORT_REGS
14181   is true if we can reorder the registers because they are used commutatively
14182   subsequently.
14183   Returns true iff we could generate a new instruction.  */
14184
14185bool
14186gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
14187{
14188  int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14189  rtx mems[MAX_LDM_STM_OPS];
14190  int i, j, base_reg;
14191  rtx base_reg_rtx;
14192  HOST_WIDE_INT offset;
14193  int write_back = FALSE;
14194  int ldm_case;
14195  rtx addr;
14196
14197  ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
14198				     &base_reg, &offset, !sort_regs);
14199
14200  if (ldm_case == 0)
14201    return false;
14202
14203  if (sort_regs)
14204    for (i = 0; i < nops - 1; i++)
14205      for (j = i + 1; j < nops; j++)
14206	if (regs[i] > regs[j])
14207	  {
14208	    int t = regs[i];
14209	    regs[i] = regs[j];
14210	    regs[j] = t;
14211	  }
14212  base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14213
14214  if (TARGET_THUMB1)
14215    {
14216      gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
14217      gcc_assert (ldm_case == 1 || ldm_case == 5);
14218      write_back = TRUE;
14219    }
14220
14221  if (ldm_case == 5)
14222    {
14223      rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
14224      emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
14225      offset = 0;
14226      if (!TARGET_THUMB1)
14227	{
14228	  base_reg = regs[0];
14229	  base_reg_rtx = newbase;
14230	}
14231    }
14232
14233  for (i = 0; i < nops; i++)
14234    {
14235      addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14236      mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14237					      SImode, addr, 0);
14238    }
14239  emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
14240				      write_back ? offset + i * 4 : 0));
14241  return true;
14242}
14243
14244/* Called from a peephole2 expander to turn a sequence of stores into an
14245   STM instruction.  OPERANDS are the operands found by the peephole matcher;
14246   NOPS indicates how many separate stores we are trying to combine.
14247   Returns true iff we could generate a new instruction.  */
14248
14249bool
14250gen_stm_seq (rtx *operands, int nops)
14251{
14252  int i;
14253  int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14254  rtx mems[MAX_LDM_STM_OPS];
14255  int base_reg;
14256  rtx base_reg_rtx;
14257  HOST_WIDE_INT offset;
14258  int write_back = FALSE;
14259  int stm_case;
14260  rtx addr;
14261  bool base_reg_dies;
14262
14263  stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
14264				      mem_order, &base_reg, &offset, true);
14265
14266  if (stm_case == 0)
14267    return false;
14268
14269  base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14270
14271  base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
14272  if (TARGET_THUMB1)
14273    {
14274      gcc_assert (base_reg_dies);
14275      write_back = TRUE;
14276    }
14277
14278  if (stm_case == 5)
14279    {
14280      gcc_assert (base_reg_dies);
14281      emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
14282      offset = 0;
14283    }
14284
14285  addr = plus_constant (Pmode, base_reg_rtx, offset);
14286
14287  for (i = 0; i < nops; i++)
14288    {
14289      addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14290      mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14291					      SImode, addr, 0);
14292    }
14293  emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
14294				       write_back ? offset + i * 4 : 0));
14295  return true;
14296}
14297
14298/* Called from a peephole2 expander to turn a sequence of stores that are
14299   preceded by constant loads into an STM instruction.  OPERANDS are the
14300   operands found by the peephole matcher; NOPS indicates how many
14301   separate stores we are trying to combine; there are 2 * NOPS
14302   instructions in the peephole.
14303   Returns true iff we could generate a new instruction.  */
14304
14305bool
14306gen_const_stm_seq (rtx *operands, int nops)
14307{
14308  int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
14309  int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14310  rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
14311  rtx mems[MAX_LDM_STM_OPS];
14312  int base_reg;
14313  rtx base_reg_rtx;
14314  HOST_WIDE_INT offset;
14315  int write_back = FALSE;
14316  int stm_case;
14317  rtx addr;
14318  bool base_reg_dies;
14319  int i, j;
14320  HARD_REG_SET allocated;
14321
14322  stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
14323				      mem_order, &base_reg, &offset, false);
14324
14325  if (stm_case == 0)
14326    return false;
14327
14328  memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
14329
14330  /* If the same register is used more than once, try to find a free
14331     register.  */
14332  CLEAR_HARD_REG_SET (allocated);
14333  for (i = 0; i < nops; i++)
14334    {
14335      for (j = i + 1; j < nops; j++)
14336	if (regs[i] == regs[j])
14337	  {
14338	    rtx t = peep2_find_free_register (0, nops * 2,
14339					      TARGET_THUMB1 ? "l" : "r",
14340					      SImode, &allocated);
14341	    if (t == NULL_RTX)
14342	      return false;
14343	    reg_rtxs[i] = t;
14344	    regs[i] = REGNO (t);
14345	  }
14346    }
14347
14348  /* Compute an ordering that maps the register numbers to an ascending
14349     sequence.  */
14350  reg_order[0] = 0;
14351  for (i = 0; i < nops; i++)
14352    if (regs[i] < regs[reg_order[0]])
14353      reg_order[0] = i;
14354
14355  for (i = 1; i < nops; i++)
14356    {
14357      int this_order = reg_order[i - 1];
14358      for (j = 0; j < nops; j++)
14359	if (regs[j] > regs[reg_order[i - 1]]
14360	    && (this_order == reg_order[i - 1]
14361		|| regs[j] < regs[this_order]))
14362	  this_order = j;
14363      reg_order[i] = this_order;
14364    }
14365
14366  /* Ensure that registers that must be live after the instruction end
14367     up with the correct value.  */
14368  for (i = 0; i < nops; i++)
14369    {
14370      int this_order = reg_order[i];
14371      if ((this_order != mem_order[i]
14372	   || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
14373	  && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
14374	return false;
14375    }
14376
14377  /* Load the constants.  */
14378  for (i = 0; i < nops; i++)
14379    {
14380      rtx op = operands[2 * nops + mem_order[i]];
14381      sorted_regs[i] = regs[reg_order[i]];
14382      emit_move_insn (reg_rtxs[reg_order[i]], op);
14383    }
14384
14385  base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14386
14387  base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
14388  if (TARGET_THUMB1)
14389    {
14390      gcc_assert (base_reg_dies);
14391      write_back = TRUE;
14392    }
14393
14394  if (stm_case == 5)
14395    {
14396      gcc_assert (base_reg_dies);
14397      emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
14398      offset = 0;
14399    }
14400
14401  addr = plus_constant (Pmode, base_reg_rtx, offset);
14402
14403  for (i = 0; i < nops; i++)
14404    {
14405      addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14406      mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14407					      SImode, addr, 0);
14408    }
14409  emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
14410				       write_back ? offset + i * 4 : 0));
14411  return true;
14412}
14413
14414/* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
14415   unaligned copies on processors which support unaligned semantics for those
14416   instructions.  INTERLEAVE_FACTOR can be used to attempt to hide load latency
14417   (using more registers) by doing e.g. load/load/store/store for a factor of 2.
14418   An interleave factor of 1 (the minimum) will perform no interleaving.
14419   Load/store multiple are used for aligned addresses where possible.  */
14420
14421static void
14422arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
14423				   HOST_WIDE_INT length,
14424				   unsigned int interleave_factor)
14425{
14426  rtx *regs = XALLOCAVEC (rtx, interleave_factor);
14427  int *regnos = XALLOCAVEC (int, interleave_factor);
14428  HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
14429  HOST_WIDE_INT i, j;
14430  HOST_WIDE_INT remaining = length, words;
14431  rtx halfword_tmp = NULL, byte_tmp = NULL;
14432  rtx dst, src;
14433  bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
14434  bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
14435  HOST_WIDE_INT srcoffset, dstoffset;
14436  HOST_WIDE_INT src_autoinc, dst_autoinc;
14437  rtx mem, addr;
14438
14439  gcc_assert (1 <= interleave_factor && interleave_factor <= 4);
14440
14441  /* Use hard registers if we have aligned source or destination so we can use
14442     load/store multiple with contiguous registers.  */
14443  if (dst_aligned || src_aligned)
14444    for (i = 0; i < interleave_factor; i++)
14445      regs[i] = gen_rtx_REG (SImode, i);
14446  else
14447    for (i = 0; i < interleave_factor; i++)
14448      regs[i] = gen_reg_rtx (SImode);
14449
14450  dst = copy_addr_to_reg (XEXP (dstbase, 0));
14451  src = copy_addr_to_reg (XEXP (srcbase, 0));
14452
14453  srcoffset = dstoffset = 0;
14454
14455  /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
14456     For copying the last bytes we want to subtract this offset again.  */
14457  src_autoinc = dst_autoinc = 0;
14458
14459  for (i = 0; i < interleave_factor; i++)
14460    regnos[i] = i;
14461
14462  /* Copy BLOCK_SIZE_BYTES chunks.  */
14463
14464  for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
14465    {
14466      /* Load words.  */
14467      if (src_aligned && interleave_factor > 1)
14468	{
14469	  emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
14470					    TRUE, srcbase, &srcoffset));
14471	  src_autoinc += UNITS_PER_WORD * interleave_factor;
14472	}
14473      else
14474	{
14475	  for (j = 0; j < interleave_factor; j++)
14476	    {
14477	      addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
14478						 - src_autoinc));
14479	      mem = adjust_automodify_address (srcbase, SImode, addr,
14480					       srcoffset + j * UNITS_PER_WORD);
14481	      emit_insn (gen_unaligned_loadsi (regs[j], mem));
14482	    }
14483	  srcoffset += block_size_bytes;
14484	}
14485
14486      /* Store words.  */
14487      if (dst_aligned && interleave_factor > 1)
14488	{
14489	  emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
14490					     TRUE, dstbase, &dstoffset));
14491	  dst_autoinc += UNITS_PER_WORD * interleave_factor;
14492	}
14493      else
14494	{
14495	  for (j = 0; j < interleave_factor; j++)
14496	    {
14497	      addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
14498						 - dst_autoinc));
14499	      mem = adjust_automodify_address (dstbase, SImode, addr,
14500					       dstoffset + j * UNITS_PER_WORD);
14501	      emit_insn (gen_unaligned_storesi (mem, regs[j]));
14502	    }
14503	  dstoffset += block_size_bytes;
14504	}
14505
14506      remaining -= block_size_bytes;
14507    }
14508
14509  /* Copy any whole words left (note these aren't interleaved with any
14510     subsequent halfword/byte load/stores in the interests of simplicity).  */
14511
14512  words = remaining / UNITS_PER_WORD;
14513
14514  gcc_assert (words < interleave_factor);
14515
14516  if (src_aligned && words > 1)
14517    {
14518      emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
14519					&srcoffset));
14520      src_autoinc += UNITS_PER_WORD * words;
14521    }
14522  else
14523    {
14524      for (j = 0; j < words; j++)
14525	{
14526	  addr = plus_constant (Pmode, src,
14527				srcoffset + j * UNITS_PER_WORD - src_autoinc);
14528	  mem = adjust_automodify_address (srcbase, SImode, addr,
14529					   srcoffset + j * UNITS_PER_WORD);
14530	  emit_insn (gen_unaligned_loadsi (regs[j], mem));
14531	}
14532      srcoffset += words * UNITS_PER_WORD;
14533    }
14534
14535  if (dst_aligned && words > 1)
14536    {
14537      emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
14538					 &dstoffset));
14539      dst_autoinc += words * UNITS_PER_WORD;
14540    }
14541  else
14542    {
14543      for (j = 0; j < words; j++)
14544	{
14545	  addr = plus_constant (Pmode, dst,
14546				dstoffset + j * UNITS_PER_WORD - dst_autoinc);
14547	  mem = adjust_automodify_address (dstbase, SImode, addr,
14548					   dstoffset + j * UNITS_PER_WORD);
14549	  emit_insn (gen_unaligned_storesi (mem, regs[j]));
14550	}
14551      dstoffset += words * UNITS_PER_WORD;
14552    }
14553
14554  remaining -= words * UNITS_PER_WORD;
14555
14556  gcc_assert (remaining < 4);
14557
14558  /* Copy a halfword if necessary.  */
14559
14560  if (remaining >= 2)
14561    {
14562      halfword_tmp = gen_reg_rtx (SImode);
14563
14564      addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14565      mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
14566      emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
14567
14568      /* Either write out immediately, or delay until we've loaded the last
14569	 byte, depending on interleave factor.  */
14570      if (interleave_factor == 1)
14571	{
14572	  addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14573	  mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14574	  emit_insn (gen_unaligned_storehi (mem,
14575		       gen_lowpart (HImode, halfword_tmp)));
14576	  halfword_tmp = NULL;
14577	  dstoffset += 2;
14578	}
14579
14580      remaining -= 2;
14581      srcoffset += 2;
14582    }
14583
14584  gcc_assert (remaining < 2);
14585
14586  /* Copy last byte.  */
14587
14588  if ((remaining & 1) != 0)
14589    {
14590      byte_tmp = gen_reg_rtx (SImode);
14591
14592      addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14593      mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
14594      emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
14595
14596      if (interleave_factor == 1)
14597	{
14598	  addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14599	  mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14600	  emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14601	  byte_tmp = NULL;
14602	  dstoffset++;
14603	}
14604
14605      remaining--;
14606      srcoffset++;
14607    }
14608
14609  /* Store last halfword if we haven't done so already.  */
14610
14611  if (halfword_tmp)
14612    {
14613      addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14614      mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14615      emit_insn (gen_unaligned_storehi (mem,
14616		   gen_lowpart (HImode, halfword_tmp)));
14617      dstoffset += 2;
14618    }
14619
14620  /* Likewise for last byte.  */
14621
14622  if (byte_tmp)
14623    {
14624      addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14625      mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14626      emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14627      dstoffset++;
14628    }
14629
14630  gcc_assert (remaining == 0 && srcoffset == dstoffset);
14631}
14632
14633/* From mips_adjust_block_mem:
14634
14635   Helper function for doing a loop-based block operation on memory
14636   reference MEM.  Each iteration of the loop will operate on LENGTH
14637   bytes of MEM.
14638
14639   Create a new base register for use within the loop and point it to
14640   the start of MEM.  Create a new memory reference that uses this
14641   register.  Store them in *LOOP_REG and *LOOP_MEM respectively.  */
14642
14643static void
14644arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
14645		      rtx *loop_mem)
14646{
14647  *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
14648
14649  /* Although the new mem does not refer to a known location,
14650     it does keep up to LENGTH bytes of alignment.  */
14651  *loop_mem = change_address (mem, BLKmode, *loop_reg);
14652  set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
14653}
14654
14655/* From mips_block_move_loop:
14656
14657   Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14658   bytes at a time.  LENGTH must be at least BYTES_PER_ITER.  Assume that
14659   the memory regions do not overlap.  */
14660
14661static void
14662arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
14663			       unsigned int interleave_factor,
14664			       HOST_WIDE_INT bytes_per_iter)
14665{
14666  rtx src_reg, dest_reg, final_src, test;
14667  HOST_WIDE_INT leftover;
14668
14669  leftover = length % bytes_per_iter;
14670  length -= leftover;
14671
14672  /* Create registers and memory references for use within the loop.  */
14673  arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
14674  arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
14675
14676  /* Calculate the value that SRC_REG should have after the last iteration of
14677     the loop.  */
14678  final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
14679				   0, 0, OPTAB_WIDEN);
14680
14681  /* Emit the start of the loop.  */
14682  rtx_code_label *label = gen_label_rtx ();
14683  emit_label (label);
14684
14685  /* Emit the loop body.  */
14686  arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
14687				     interleave_factor);
14688
14689  /* Move on to the next block.  */
14690  emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
14691  emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
14692
14693  /* Emit the loop condition.  */
14694  test = gen_rtx_NE (VOIDmode, src_reg, final_src);
14695  emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
14696
14697  /* Mop up any left-over bytes.  */
14698  if (leftover)
14699    arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
14700}
14701
14702/* Emit a block move when either the source or destination is unaligned (not
14703   aligned to a four-byte boundary).  This may need further tuning depending on
14704   core type, optimize_size setting, etc.  */
14705
14706static int
14707arm_movmemqi_unaligned (rtx *operands)
14708{
14709  HOST_WIDE_INT length = INTVAL (operands[2]);
14710
14711  if (optimize_size)
14712    {
14713      bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
14714      bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
14715      /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14716	 size of code if optimizing for size.  We'll use ldm/stm if src_aligned
14717	 or dst_aligned though: allow more interleaving in those cases since the
14718	 resulting code can be smaller.  */
14719      unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
14720      HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
14721
14722      if (length > 12)
14723	arm_block_move_unaligned_loop (operands[0], operands[1], length,
14724				       interleave_factor, bytes_per_iter);
14725      else
14726	arm_block_move_unaligned_straight (operands[0], operands[1], length,
14727					   interleave_factor);
14728    }
14729  else
14730    {
14731      /* Note that the loop created by arm_block_move_unaligned_loop may be
14732	 subject to loop unrolling, which makes tuning this condition a little
14733	 redundant.  */
14734      if (length > 32)
14735	arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
14736      else
14737	arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
14738    }
14739
14740  return 1;
14741}
14742
14743int
14744arm_gen_movmemqi (rtx *operands)
14745{
14746  HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
14747  HOST_WIDE_INT srcoffset, dstoffset;
14748  int i;
14749  rtx src, dst, srcbase, dstbase;
14750  rtx part_bytes_reg = NULL;
14751  rtx mem;
14752
14753  if (!CONST_INT_P (operands[2])
14754      || !CONST_INT_P (operands[3])
14755      || INTVAL (operands[2]) > 64)
14756    return 0;
14757
14758  if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
14759    return arm_movmemqi_unaligned (operands);
14760
14761  if (INTVAL (operands[3]) & 3)
14762    return 0;
14763
14764  dstbase = operands[0];
14765  srcbase = operands[1];
14766
14767  dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
14768  src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
14769
14770  in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
14771  out_words_to_go = INTVAL (operands[2]) / 4;
14772  last_bytes = INTVAL (operands[2]) & 3;
14773  dstoffset = srcoffset = 0;
14774
14775  if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
14776    part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
14777
14778  for (i = 0; in_words_to_go >= 2; i+=4)
14779    {
14780      if (in_words_to_go > 4)
14781	emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
14782					  TRUE, srcbase, &srcoffset));
14783      else
14784	emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
14785					  src, FALSE, srcbase,
14786					  &srcoffset));
14787
14788      if (out_words_to_go)
14789	{
14790	  if (out_words_to_go > 4)
14791	    emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
14792					       TRUE, dstbase, &dstoffset));
14793	  else if (out_words_to_go != 1)
14794	    emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
14795					       out_words_to_go, dst,
14796					       (last_bytes == 0
14797						? FALSE : TRUE),
14798					       dstbase, &dstoffset));
14799	  else
14800	    {
14801	      mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14802	      emit_move_insn (mem, gen_rtx_REG (SImode, R0_REGNUM));
14803	      if (last_bytes != 0)
14804		{
14805		  emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
14806		  dstoffset += 4;
14807		}
14808	    }
14809	}
14810
14811      in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
14812      out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
14813    }
14814
14815  /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do.  */
14816  if (out_words_to_go)
14817    {
14818      rtx sreg;
14819
14820      mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14821      sreg = copy_to_reg (mem);
14822
14823      mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14824      emit_move_insn (mem, sreg);
14825      in_words_to_go--;
14826
14827      gcc_assert (!in_words_to_go);	/* Sanity check */
14828    }
14829
14830  if (in_words_to_go)
14831    {
14832      gcc_assert (in_words_to_go > 0);
14833
14834      mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14835      part_bytes_reg = copy_to_mode_reg (SImode, mem);
14836    }
14837
14838  gcc_assert (!last_bytes || part_bytes_reg);
14839
14840  if (BYTES_BIG_ENDIAN && last_bytes)
14841    {
14842      rtx tmp = gen_reg_rtx (SImode);
14843
14844      /* The bytes we want are in the top end of the word.  */
14845      emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
14846			      GEN_INT (8 * (4 - last_bytes))));
14847      part_bytes_reg = tmp;
14848
14849      while (last_bytes)
14850	{
14851	  mem = adjust_automodify_address (dstbase, QImode,
14852					   plus_constant (Pmode, dst,
14853							  last_bytes - 1),
14854					   dstoffset + last_bytes - 1);
14855	  emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14856
14857	  if (--last_bytes)
14858	    {
14859	      tmp = gen_reg_rtx (SImode);
14860	      emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
14861	      part_bytes_reg = tmp;
14862	    }
14863	}
14864
14865    }
14866  else
14867    {
14868      if (last_bytes > 1)
14869	{
14870	  mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
14871	  emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
14872	  last_bytes -= 2;
14873	  if (last_bytes)
14874	    {
14875	      rtx tmp = gen_reg_rtx (SImode);
14876	      emit_insn (gen_addsi3 (dst, dst, const2_rtx));
14877	      emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
14878	      part_bytes_reg = tmp;
14879	      dstoffset += 2;
14880	    }
14881	}
14882
14883      if (last_bytes)
14884	{
14885	  mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
14886	  emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14887	}
14888    }
14889
14890  return 1;
14891}
14892
14893/* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14894by mode size.  */
14895inline static rtx
14896next_consecutive_mem (rtx mem)
14897{
14898  machine_mode mode = GET_MODE (mem);
14899  HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
14900  rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
14901
14902  return adjust_automodify_address (mem, mode, addr, offset);
14903}
14904
14905/* Copy using LDRD/STRD instructions whenever possible.
14906   Returns true upon success. */
14907bool
14908gen_movmem_ldrd_strd (rtx *operands)
14909{
14910  unsigned HOST_WIDE_INT len;
14911  HOST_WIDE_INT align;
14912  rtx src, dst, base;
14913  rtx reg0;
14914  bool src_aligned, dst_aligned;
14915  bool src_volatile, dst_volatile;
14916
14917  gcc_assert (CONST_INT_P (operands[2]));
14918  gcc_assert (CONST_INT_P (operands[3]));
14919
14920  len = UINTVAL (operands[2]);
14921  if (len > 64)
14922    return false;
14923
14924  /* Maximum alignment we can assume for both src and dst buffers.  */
14925  align = INTVAL (operands[3]);
14926
14927  if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
14928    return false;
14929
14930  /* Place src and dst addresses in registers
14931     and update the corresponding mem rtx.  */
14932  dst = operands[0];
14933  dst_volatile = MEM_VOLATILE_P (dst);
14934  dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
14935  base = copy_to_mode_reg (SImode, XEXP (dst, 0));
14936  dst = adjust_automodify_address (dst, VOIDmode, base, 0);
14937
14938  src = operands[1];
14939  src_volatile = MEM_VOLATILE_P (src);
14940  src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
14941  base = copy_to_mode_reg (SImode, XEXP (src, 0));
14942  src = adjust_automodify_address (src, VOIDmode, base, 0);
14943
14944  if (!unaligned_access && !(src_aligned && dst_aligned))
14945    return false;
14946
14947  if (src_volatile || dst_volatile)
14948    return false;
14949
14950  /* If we cannot generate any LDRD/STRD, try to generate LDM/STM.  */
14951  if (!(dst_aligned || src_aligned))
14952    return arm_gen_movmemqi (operands);
14953
14954  src = adjust_address (src, DImode, 0);
14955  dst = adjust_address (dst, DImode, 0);
14956  while (len >= 8)
14957    {
14958      len -= 8;
14959      reg0 = gen_reg_rtx (DImode);
14960      if (src_aligned)
14961        emit_move_insn (reg0, src);
14962      else
14963        emit_insn (gen_unaligned_loaddi (reg0, src));
14964
14965      if (dst_aligned)
14966        emit_move_insn (dst, reg0);
14967      else
14968        emit_insn (gen_unaligned_storedi (dst, reg0));
14969
14970      src = next_consecutive_mem (src);
14971      dst = next_consecutive_mem (dst);
14972    }
14973
14974  gcc_assert (len < 8);
14975  if (len >= 4)
14976    {
14977      /* More than a word but less than a double-word to copy.  Copy a word.  */
14978      reg0 = gen_reg_rtx (SImode);
14979      src = adjust_address (src, SImode, 0);
14980      dst = adjust_address (dst, SImode, 0);
14981      if (src_aligned)
14982        emit_move_insn (reg0, src);
14983      else
14984        emit_insn (gen_unaligned_loadsi (reg0, src));
14985
14986      if (dst_aligned)
14987        emit_move_insn (dst, reg0);
14988      else
14989        emit_insn (gen_unaligned_storesi (dst, reg0));
14990
14991      src = next_consecutive_mem (src);
14992      dst = next_consecutive_mem (dst);
14993      len -= 4;
14994    }
14995
14996  if (len == 0)
14997    return true;
14998
14999  /* Copy the remaining bytes.  */
15000  if (len >= 2)
15001    {
15002      dst = adjust_address (dst, HImode, 0);
15003      src = adjust_address (src, HImode, 0);
15004      reg0 = gen_reg_rtx (SImode);
15005      if (src_aligned)
15006        emit_insn (gen_zero_extendhisi2 (reg0, src));
15007      else
15008        emit_insn (gen_unaligned_loadhiu (reg0, src));
15009
15010      if (dst_aligned)
15011        emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
15012      else
15013        emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
15014
15015      src = next_consecutive_mem (src);
15016      dst = next_consecutive_mem (dst);
15017      if (len == 2)
15018        return true;
15019    }
15020
15021  dst = adjust_address (dst, QImode, 0);
15022  src = adjust_address (src, QImode, 0);
15023  reg0 = gen_reg_rtx (QImode);
15024  emit_move_insn (reg0, src);
15025  emit_move_insn (dst, reg0);
15026  return true;
15027}
15028
15029/* Select a dominance comparison mode if possible for a test of the general
15030   form (OP (COND_OR (X) (Y)) (const_int 0)).  We support three forms.
15031   COND_OR == DOM_CC_X_AND_Y => (X && Y)
15032   COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
15033   COND_OR == DOM_CC_X_OR_Y => (X || Y)
15034   In all cases OP will be either EQ or NE, but we don't need to know which
15035   here.  If we are unable to support a dominance comparison we return
15036   CC mode.  This will then fail to match for the RTL expressions that
15037   generate this call.  */
15038machine_mode
15039arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
15040{
15041  enum rtx_code cond1, cond2;
15042  int swapped = 0;
15043
15044  /* Currently we will probably get the wrong result if the individual
15045     comparisons are not simple.  This also ensures that it is safe to
15046     reverse a comparison if necessary.  */
15047  if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
15048       != CCmode)
15049      || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
15050	  != CCmode))
15051    return CCmode;
15052
15053  /* The if_then_else variant of this tests the second condition if the
15054     first passes, but is true if the first fails.  Reverse the first
15055     condition to get a true "inclusive-or" expression.  */
15056  if (cond_or == DOM_CC_NX_OR_Y)
15057    cond1 = reverse_condition (cond1);
15058
15059  /* If the comparisons are not equal, and one doesn't dominate the other,
15060     then we can't do this.  */
15061  if (cond1 != cond2
15062      && !comparison_dominates_p (cond1, cond2)
15063      && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
15064    return CCmode;
15065
15066  if (swapped)
15067    std::swap (cond1, cond2);
15068
15069  switch (cond1)
15070    {
15071    case EQ:
15072      if (cond_or == DOM_CC_X_AND_Y)
15073	return CC_DEQmode;
15074
15075      switch (cond2)
15076	{
15077	case EQ: return CC_DEQmode;
15078	case LE: return CC_DLEmode;
15079	case LEU: return CC_DLEUmode;
15080	case GE: return CC_DGEmode;
15081	case GEU: return CC_DGEUmode;
15082	default: gcc_unreachable ();
15083	}
15084
15085    case LT:
15086      if (cond_or == DOM_CC_X_AND_Y)
15087	return CC_DLTmode;
15088
15089      switch (cond2)
15090	{
15091	case  LT:
15092	    return CC_DLTmode;
15093	case LE:
15094	  return CC_DLEmode;
15095	case NE:
15096	  return CC_DNEmode;
15097	default:
15098	  gcc_unreachable ();
15099	}
15100
15101    case GT:
15102      if (cond_or == DOM_CC_X_AND_Y)
15103	return CC_DGTmode;
15104
15105      switch (cond2)
15106	{
15107	case GT:
15108	  return CC_DGTmode;
15109	case GE:
15110	  return CC_DGEmode;
15111	case NE:
15112	  return CC_DNEmode;
15113	default:
15114	  gcc_unreachable ();
15115	}
15116
15117    case LTU:
15118      if (cond_or == DOM_CC_X_AND_Y)
15119	return CC_DLTUmode;
15120
15121      switch (cond2)
15122	{
15123	case LTU:
15124	  return CC_DLTUmode;
15125	case LEU:
15126	  return CC_DLEUmode;
15127	case NE:
15128	  return CC_DNEmode;
15129	default:
15130	  gcc_unreachable ();
15131	}
15132
15133    case GTU:
15134      if (cond_or == DOM_CC_X_AND_Y)
15135	return CC_DGTUmode;
15136
15137      switch (cond2)
15138	{
15139	case GTU:
15140	  return CC_DGTUmode;
15141	case GEU:
15142	  return CC_DGEUmode;
15143	case NE:
15144	  return CC_DNEmode;
15145	default:
15146	  gcc_unreachable ();
15147	}
15148
15149    /* The remaining cases only occur when both comparisons are the
15150       same.  */
15151    case NE:
15152      gcc_assert (cond1 == cond2);
15153      return CC_DNEmode;
15154
15155    case LE:
15156      gcc_assert (cond1 == cond2);
15157      return CC_DLEmode;
15158
15159    case GE:
15160      gcc_assert (cond1 == cond2);
15161      return CC_DGEmode;
15162
15163    case LEU:
15164      gcc_assert (cond1 == cond2);
15165      return CC_DLEUmode;
15166
15167    case GEU:
15168      gcc_assert (cond1 == cond2);
15169      return CC_DGEUmode;
15170
15171    default:
15172      gcc_unreachable ();
15173    }
15174}
15175
15176machine_mode
15177arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
15178{
15179  /* All floating point compares return CCFP if it is an equality
15180     comparison, and CCFPE otherwise.  */
15181  if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
15182    {
15183      switch (op)
15184	{
15185	case EQ:
15186	case NE:
15187	case UNORDERED:
15188	case ORDERED:
15189	case UNLT:
15190	case UNLE:
15191	case UNGT:
15192	case UNGE:
15193	case UNEQ:
15194	case LTGT:
15195	  return CCFPmode;
15196
15197	case LT:
15198	case LE:
15199	case GT:
15200	case GE:
15201	  return CCFPEmode;
15202
15203	default:
15204	  gcc_unreachable ();
15205	}
15206    }
15207
15208  /* A compare with a shifted operand.  Because of canonicalization, the
15209     comparison will have to be swapped when we emit the assembler.  */
15210  if (GET_MODE (y) == SImode
15211      && (REG_P (y) || (GET_CODE (y) == SUBREG))
15212      && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
15213	  || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
15214	  || GET_CODE (x) == ROTATERT))
15215    return CC_SWPmode;
15216
15217  /* This operation is performed swapped, but since we only rely on the Z
15218     flag we don't need an additional mode.  */
15219  if (GET_MODE (y) == SImode
15220      && (REG_P (y) || (GET_CODE (y) == SUBREG))
15221      && GET_CODE (x) == NEG
15222      && (op ==	EQ || op == NE))
15223    return CC_Zmode;
15224
15225  /* This is a special case that is used by combine to allow a
15226     comparison of a shifted byte load to be split into a zero-extend
15227     followed by a comparison of the shifted integer (only valid for
15228     equalities and unsigned inequalities).  */
15229  if (GET_MODE (x) == SImode
15230      && GET_CODE (x) == ASHIFT
15231      && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
15232      && GET_CODE (XEXP (x, 0)) == SUBREG
15233      && MEM_P (SUBREG_REG (XEXP (x, 0)))
15234      && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
15235      && (op == EQ || op == NE
15236	  || op == GEU || op == GTU || op == LTU || op == LEU)
15237      && CONST_INT_P (y))
15238    return CC_Zmode;
15239
15240  /* A construct for a conditional compare, if the false arm contains
15241     0, then both conditions must be true, otherwise either condition
15242     must be true.  Not all conditions are possible, so CCmode is
15243     returned if it can't be done.  */
15244  if (GET_CODE (x) == IF_THEN_ELSE
15245      && (XEXP (x, 2) == const0_rtx
15246	  || XEXP (x, 2) == const1_rtx)
15247      && COMPARISON_P (XEXP (x, 0))
15248      && COMPARISON_P (XEXP (x, 1)))
15249    return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15250					 INTVAL (XEXP (x, 2)));
15251
15252  /* Alternate canonicalizations of the above.  These are somewhat cleaner.  */
15253  if (GET_CODE (x) == AND
15254      && (op == EQ || op == NE)
15255      && COMPARISON_P (XEXP (x, 0))
15256      && COMPARISON_P (XEXP (x, 1)))
15257    return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15258					 DOM_CC_X_AND_Y);
15259
15260  if (GET_CODE (x) == IOR
15261      && (op == EQ || op == NE)
15262      && COMPARISON_P (XEXP (x, 0))
15263      && COMPARISON_P (XEXP (x, 1)))
15264    return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15265					 DOM_CC_X_OR_Y);
15266
15267  /* An operation (on Thumb) where we want to test for a single bit.
15268     This is done by shifting that bit up into the top bit of a
15269     scratch register; we can then branch on the sign bit.  */
15270  if (TARGET_THUMB1
15271      && GET_MODE (x) == SImode
15272      && (op == EQ || op == NE)
15273      && GET_CODE (x) == ZERO_EXTRACT
15274      && XEXP (x, 1) == const1_rtx)
15275    return CC_Nmode;
15276
15277  /* An operation that sets the condition codes as a side-effect, the
15278     V flag is not set correctly, so we can only use comparisons where
15279     this doesn't matter.  (For LT and GE we can use "mi" and "pl"
15280     instead.)  */
15281  /* ??? Does the ZERO_EXTRACT case really apply to thumb2?  */
15282  if (GET_MODE (x) == SImode
15283      && y == const0_rtx
15284      && (op == EQ || op == NE || op == LT || op == GE)
15285      && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
15286	  || GET_CODE (x) == AND || GET_CODE (x) == IOR
15287	  || GET_CODE (x) == XOR || GET_CODE (x) == MULT
15288	  || GET_CODE (x) == NOT || GET_CODE (x) == NEG
15289	  || GET_CODE (x) == LSHIFTRT
15290	  || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
15291	  || GET_CODE (x) == ROTATERT
15292	  || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
15293    return CC_NOOVmode;
15294
15295  if (GET_MODE (x) == QImode && (op == EQ || op == NE))
15296    return CC_Zmode;
15297
15298  if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
15299      && GET_CODE (x) == PLUS
15300      && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
15301    return CC_Cmode;
15302
15303  if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
15304    {
15305      switch (op)
15306	{
15307	case EQ:
15308	case NE:
15309	  /* A DImode comparison against zero can be implemented by
15310	     or'ing the two halves together.  */
15311	  if (y == const0_rtx)
15312	    return CC_Zmode;
15313
15314	  /* We can do an equality test in three Thumb instructions.  */
15315	  if (!TARGET_32BIT)
15316	    return CC_Zmode;
15317
15318	  /* FALLTHROUGH */
15319
15320	case LTU:
15321	case LEU:
15322	case GTU:
15323	case GEU:
15324	  /* DImode unsigned comparisons can be implemented by cmp +
15325	     cmpeq without a scratch register.  Not worth doing in
15326	     Thumb-2.  */
15327	  if (TARGET_32BIT)
15328	    return CC_CZmode;
15329
15330	  /* FALLTHROUGH */
15331
15332	case LT:
15333	case LE:
15334	case GT:
15335	case GE:
15336	  /* DImode signed and unsigned comparisons can be implemented
15337	     by cmp + sbcs with a scratch register, but that does not
15338	     set the Z flag - we must reverse GT/LE/GTU/LEU.  */
15339	  gcc_assert (op != EQ && op != NE);
15340	  return CC_NCVmode;
15341
15342	default:
15343	  gcc_unreachable ();
15344	}
15345    }
15346
15347  if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
15348    return GET_MODE (x);
15349
15350  return CCmode;
15351}
15352
15353/* X and Y are two things to compare using CODE.  Emit the compare insn and
15354   return the rtx for register 0 in the proper mode.  FP means this is a
15355   floating point compare: I don't think that it is needed on the arm.  */
15356rtx
15357arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
15358{
15359  machine_mode mode;
15360  rtx cc_reg;
15361  int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
15362
15363  /* We might have X as a constant, Y as a register because of the predicates
15364     used for cmpdi.  If so, force X to a register here.  */
15365  if (dimode_comparison && !REG_P (x))
15366    x = force_reg (DImode, x);
15367
15368  mode = SELECT_CC_MODE (code, x, y);
15369  cc_reg = gen_rtx_REG (mode, CC_REGNUM);
15370
15371  if (dimode_comparison
15372      && mode != CC_CZmode)
15373    {
15374      rtx clobber, set;
15375
15376      /* To compare two non-zero values for equality, XOR them and
15377	 then compare against zero.  Not used for ARM mode; there
15378	 CC_CZmode is cheaper.  */
15379      if (mode == CC_Zmode && y != const0_rtx)
15380	{
15381	  gcc_assert (!reload_completed);
15382	  x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
15383	  y = const0_rtx;
15384	}
15385
15386      /* A scratch register is required.  */
15387      if (reload_completed)
15388	gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode);
15389      else
15390	scratch = gen_rtx_SCRATCH (SImode);
15391
15392      clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
15393      set = gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y));
15394      emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
15395    }
15396  else
15397    emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
15398
15399  return cc_reg;
15400}
15401
15402/* Generate a sequence of insns that will generate the correct return
15403   address mask depending on the physical architecture that the program
15404   is running on.  */
15405rtx
15406arm_gen_return_addr_mask (void)
15407{
15408  rtx reg = gen_reg_rtx (Pmode);
15409
15410  emit_insn (gen_return_addr_mask (reg));
15411  return reg;
15412}
15413
15414void
15415arm_reload_in_hi (rtx *operands)
15416{
15417  rtx ref = operands[1];
15418  rtx base, scratch;
15419  HOST_WIDE_INT offset = 0;
15420
15421  if (GET_CODE (ref) == SUBREG)
15422    {
15423      offset = SUBREG_BYTE (ref);
15424      ref = SUBREG_REG (ref);
15425    }
15426
15427  if (REG_P (ref))
15428    {
15429      /* We have a pseudo which has been spilt onto the stack; there
15430	 are two cases here: the first where there is a simple
15431	 stack-slot replacement and a second where the stack-slot is
15432	 out of range, or is used as a subreg.  */
15433      if (reg_equiv_mem (REGNO (ref)))
15434	{
15435	  ref = reg_equiv_mem (REGNO (ref));
15436	  base = find_replacement (&XEXP (ref, 0));
15437	}
15438      else
15439	/* The slot is out of range, or was dressed up in a SUBREG.  */
15440	base = reg_equiv_address (REGNO (ref));
15441    }
15442  else
15443    base = find_replacement (&XEXP (ref, 0));
15444
15445  /* Handle the case where the address is too complex to be offset by 1.  */
15446  if (GET_CODE (base) == MINUS
15447      || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15448    {
15449      rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15450
15451      emit_set_insn (base_plus, base);
15452      base = base_plus;
15453    }
15454  else if (GET_CODE (base) == PLUS)
15455    {
15456      /* The addend must be CONST_INT, or we would have dealt with it above.  */
15457      HOST_WIDE_INT hi, lo;
15458
15459      offset += INTVAL (XEXP (base, 1));
15460      base = XEXP (base, 0);
15461
15462      /* Rework the address into a legal sequence of insns.  */
15463      /* Valid range for lo is -4095 -> 4095 */
15464      lo = (offset >= 0
15465	    ? (offset & 0xfff)
15466	    : -((-offset) & 0xfff));
15467
15468      /* Corner case, if lo is the max offset then we would be out of range
15469	 once we have added the additional 1 below, so bump the msb into the
15470	 pre-loading insn(s).  */
15471      if (lo == 4095)
15472	lo &= 0x7ff;
15473
15474      hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15475	     ^ (HOST_WIDE_INT) 0x80000000)
15476	    - (HOST_WIDE_INT) 0x80000000);
15477
15478      gcc_assert (hi + lo == offset);
15479
15480      if (hi != 0)
15481	{
15482	  rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15483
15484	  /* Get the base address; addsi3 knows how to handle constants
15485	     that require more than one insn.  */
15486	  emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15487	  base = base_plus;
15488	  offset = lo;
15489	}
15490    }
15491
15492  /* Operands[2] may overlap operands[0] (though it won't overlap
15493     operands[1]), that's why we asked for a DImode reg -- so we can
15494     use the bit that does not overlap.  */
15495  if (REGNO (operands[2]) == REGNO (operands[0]))
15496    scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15497  else
15498    scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15499
15500  emit_insn (gen_zero_extendqisi2 (scratch,
15501				   gen_rtx_MEM (QImode,
15502						plus_constant (Pmode, base,
15503							       offset))));
15504  emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
15505				   gen_rtx_MEM (QImode,
15506						plus_constant (Pmode, base,
15507							       offset + 1))));
15508  if (!BYTES_BIG_ENDIAN)
15509    emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15510		   gen_rtx_IOR (SImode,
15511				gen_rtx_ASHIFT
15512				(SImode,
15513				 gen_rtx_SUBREG (SImode, operands[0], 0),
15514				 GEN_INT (8)),
15515				scratch));
15516  else
15517    emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15518		   gen_rtx_IOR (SImode,
15519				gen_rtx_ASHIFT (SImode, scratch,
15520						GEN_INT (8)),
15521				gen_rtx_SUBREG (SImode, operands[0], 0)));
15522}
15523
15524/* Handle storing a half-word to memory during reload by synthesizing as two
15525   byte stores.  Take care not to clobber the input values until after we
15526   have moved them somewhere safe.  This code assumes that if the DImode
15527   scratch in operands[2] overlaps either the input value or output address
15528   in some way, then that value must die in this insn (we absolutely need
15529   two scratch registers for some corner cases).  */
15530void
15531arm_reload_out_hi (rtx *operands)
15532{
15533  rtx ref = operands[0];
15534  rtx outval = operands[1];
15535  rtx base, scratch;
15536  HOST_WIDE_INT offset = 0;
15537
15538  if (GET_CODE (ref) == SUBREG)
15539    {
15540      offset = SUBREG_BYTE (ref);
15541      ref = SUBREG_REG (ref);
15542    }
15543
15544  if (REG_P (ref))
15545    {
15546      /* We have a pseudo which has been spilt onto the stack; there
15547	 are two cases here: the first where there is a simple
15548	 stack-slot replacement and a second where the stack-slot is
15549	 out of range, or is used as a subreg.  */
15550      if (reg_equiv_mem (REGNO (ref)))
15551	{
15552	  ref = reg_equiv_mem (REGNO (ref));
15553	  base = find_replacement (&XEXP (ref, 0));
15554	}
15555      else
15556	/* The slot is out of range, or was dressed up in a SUBREG.  */
15557	base = reg_equiv_address (REGNO (ref));
15558    }
15559  else
15560    base = find_replacement (&XEXP (ref, 0));
15561
15562  scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15563
15564  /* Handle the case where the address is too complex to be offset by 1.  */
15565  if (GET_CODE (base) == MINUS
15566      || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15567    {
15568      rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15569
15570      /* Be careful not to destroy OUTVAL.  */
15571      if (reg_overlap_mentioned_p (base_plus, outval))
15572	{
15573	  /* Updating base_plus might destroy outval, see if we can
15574	     swap the scratch and base_plus.  */
15575	  if (!reg_overlap_mentioned_p (scratch, outval))
15576	    std::swap (scratch, base_plus);
15577	  else
15578	    {
15579	      rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15580
15581	      /* Be conservative and copy OUTVAL into the scratch now,
15582		 this should only be necessary if outval is a subreg
15583		 of something larger than a word.  */
15584	      /* XXX Might this clobber base?  I can't see how it can,
15585		 since scratch is known to overlap with OUTVAL, and
15586		 must be wider than a word.  */
15587	      emit_insn (gen_movhi (scratch_hi, outval));
15588	      outval = scratch_hi;
15589	    }
15590	}
15591
15592      emit_set_insn (base_plus, base);
15593      base = base_plus;
15594    }
15595  else if (GET_CODE (base) == PLUS)
15596    {
15597      /* The addend must be CONST_INT, or we would have dealt with it above.  */
15598      HOST_WIDE_INT hi, lo;
15599
15600      offset += INTVAL (XEXP (base, 1));
15601      base = XEXP (base, 0);
15602
15603      /* Rework the address into a legal sequence of insns.  */
15604      /* Valid range for lo is -4095 -> 4095 */
15605      lo = (offset >= 0
15606	    ? (offset & 0xfff)
15607	    : -((-offset) & 0xfff));
15608
15609      /* Corner case, if lo is the max offset then we would be out of range
15610	 once we have added the additional 1 below, so bump the msb into the
15611	 pre-loading insn(s).  */
15612      if (lo == 4095)
15613	lo &= 0x7ff;
15614
15615      hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15616	     ^ (HOST_WIDE_INT) 0x80000000)
15617	    - (HOST_WIDE_INT) 0x80000000);
15618
15619      gcc_assert (hi + lo == offset);
15620
15621      if (hi != 0)
15622	{
15623	  rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15624
15625	  /* Be careful not to destroy OUTVAL.  */
15626	  if (reg_overlap_mentioned_p (base_plus, outval))
15627	    {
15628	      /* Updating base_plus might destroy outval, see if we
15629		 can swap the scratch and base_plus.  */
15630	      if (!reg_overlap_mentioned_p (scratch, outval))
15631	        std::swap (scratch, base_plus);
15632	      else
15633		{
15634		  rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15635
15636		  /* Be conservative and copy outval into scratch now,
15637		     this should only be necessary if outval is a
15638		     subreg of something larger than a word.  */
15639		  /* XXX Might this clobber base?  I can't see how it
15640		     can, since scratch is known to overlap with
15641		     outval.  */
15642		  emit_insn (gen_movhi (scratch_hi, outval));
15643		  outval = scratch_hi;
15644		}
15645	    }
15646
15647	  /* Get the base address; addsi3 knows how to handle constants
15648	     that require more than one insn.  */
15649	  emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15650	  base = base_plus;
15651	  offset = lo;
15652	}
15653    }
15654
15655  if (BYTES_BIG_ENDIAN)
15656    {
15657      emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15658					 plus_constant (Pmode, base,
15659							offset + 1)),
15660			    gen_lowpart (QImode, outval)));
15661      emit_insn (gen_lshrsi3 (scratch,
15662			      gen_rtx_SUBREG (SImode, outval, 0),
15663			      GEN_INT (8)));
15664      emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15665								offset)),
15666			    gen_lowpart (QImode, scratch)));
15667    }
15668  else
15669    {
15670      emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15671								offset)),
15672			    gen_lowpart (QImode, outval)));
15673      emit_insn (gen_lshrsi3 (scratch,
15674			      gen_rtx_SUBREG (SImode, outval, 0),
15675			      GEN_INT (8)));
15676      emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15677					 plus_constant (Pmode, base,
15678							offset + 1)),
15679			    gen_lowpart (QImode, scratch)));
15680    }
15681}
15682
15683/* Return true if a type must be passed in memory. For AAPCS, small aggregates
15684   (padded to the size of a word) should be passed in a register.  */
15685
15686static bool
15687arm_must_pass_in_stack (machine_mode mode, const_tree type)
15688{
15689  if (TARGET_AAPCS_BASED)
15690    return must_pass_in_stack_var_size (mode, type);
15691  else
15692    return must_pass_in_stack_var_size_or_pad (mode, type);
15693}
15694
15695
15696/* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
15697   Return true if an argument passed on the stack should be padded upwards,
15698   i.e. if the least-significant byte has useful data.
15699   For legacy APCS ABIs we use the default.  For AAPCS based ABIs small
15700   aggregate types are placed in the lowest memory address.  */
15701
15702bool
15703arm_pad_arg_upward (machine_mode mode ATTRIBUTE_UNUSED, const_tree type)
15704{
15705  if (!TARGET_AAPCS_BASED)
15706    return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
15707
15708  if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
15709    return false;
15710
15711  return true;
15712}
15713
15714
15715/* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15716   Return !BYTES_BIG_ENDIAN if the least significant byte of the
15717   register has useful data, and return the opposite if the most
15718   significant byte does.  */
15719
15720bool
15721arm_pad_reg_upward (machine_mode mode,
15722                    tree type, int first ATTRIBUTE_UNUSED)
15723{
15724  if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
15725    {
15726      /* For AAPCS, small aggregates, small fixed-point types,
15727	 and small complex types are always padded upwards.  */
15728      if (type)
15729	{
15730	  if ((AGGREGATE_TYPE_P (type)
15731	       || TREE_CODE (type) == COMPLEX_TYPE
15732	       || FIXED_POINT_TYPE_P (type))
15733	      && int_size_in_bytes (type) <= 4)
15734	    return true;
15735	}
15736      else
15737	{
15738	  if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
15739	      && GET_MODE_SIZE (mode) <= 4)
15740	    return true;
15741	}
15742    }
15743
15744  /* Otherwise, use default padding.  */
15745  return !BYTES_BIG_ENDIAN;
15746}
15747
15748/* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15749   assuming that the address in the base register is word aligned.  */
15750bool
15751offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
15752{
15753  HOST_WIDE_INT max_offset;
15754
15755  /* Offset must be a multiple of 4 in Thumb mode.  */
15756  if (TARGET_THUMB2 && ((offset & 3) != 0))
15757    return false;
15758
15759  if (TARGET_THUMB2)
15760    max_offset = 1020;
15761  else if (TARGET_ARM)
15762    max_offset = 255;
15763  else
15764    return false;
15765
15766  return ((offset <= max_offset) && (offset >= -max_offset));
15767}
15768
15769/* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15770   Assumes that RT, RT2, and RN are REG.  This is guaranteed by the patterns.
15771   Assumes that the address in the base register RN is word aligned.  Pattern
15772   guarantees that both memory accesses use the same base register,
15773   the offsets are constants within the range, and the gap between the offsets is 4.
15774   If preload complete then check that registers are legal.  WBACK indicates whether
15775   address is updated.  LOAD indicates whether memory access is load or store.  */
15776bool
15777operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
15778                       bool wback, bool load)
15779{
15780  unsigned int t, t2, n;
15781
15782  if (!reload_completed)
15783    return true;
15784
15785  if (!offset_ok_for_ldrd_strd (offset))
15786    return false;
15787
15788  t = REGNO (rt);
15789  t2 = REGNO (rt2);
15790  n = REGNO (rn);
15791
15792  if ((TARGET_THUMB2)
15793      && ((wback && (n == t || n == t2))
15794          || (t == SP_REGNUM)
15795          || (t == PC_REGNUM)
15796          || (t2 == SP_REGNUM)
15797          || (t2 == PC_REGNUM)
15798          || (!load && (n == PC_REGNUM))
15799          || (load && (t == t2))
15800          /* Triggers Cortex-M3 LDRD errata.  */
15801          || (!wback && load && fix_cm3_ldrd && (n == t))))
15802    return false;
15803
15804  if ((TARGET_ARM)
15805      && ((wback && (n == t || n == t2))
15806          || (t2 == PC_REGNUM)
15807          || (t % 2 != 0)   /* First destination register is not even.  */
15808          || (t2 != t + 1)
15809          /* PC can be used as base register (for offset addressing only),
15810             but it is depricated.  */
15811          || (n == PC_REGNUM)))
15812    return false;
15813
15814  return true;
15815}
15816
15817/* Helper for gen_operands_ldrd_strd.  Returns true iff the memory
15818   operand MEM's address contains an immediate offset from the base
15819   register and has no side effects, in which case it sets BASE and
15820   OFFSET accordingly.  */
15821static bool
15822mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset)
15823{
15824  rtx addr;
15825
15826  gcc_assert (base != NULL && offset != NULL);
15827
15828  /* TODO: Handle more general memory operand patterns, such as
15829     PRE_DEC and PRE_INC.  */
15830
15831  if (side_effects_p (mem))
15832    return false;
15833
15834  /* Can't deal with subregs.  */
15835  if (GET_CODE (mem) == SUBREG)
15836    return false;
15837
15838  gcc_assert (MEM_P (mem));
15839
15840  *offset = const0_rtx;
15841
15842  addr = XEXP (mem, 0);
15843
15844  /* If addr isn't valid for DImode, then we can't handle it.  */
15845  if (!arm_legitimate_address_p (DImode, addr,
15846				 reload_in_progress || reload_completed))
15847    return false;
15848
15849  if (REG_P (addr))
15850    {
15851      *base = addr;
15852      return true;
15853    }
15854  else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS)
15855    {
15856      *base = XEXP (addr, 0);
15857      *offset = XEXP (addr, 1);
15858      return (REG_P (*base) && CONST_INT_P (*offset));
15859    }
15860
15861  return false;
15862}
15863
15864/* Called from a peephole2 to replace two word-size accesses with a
15865   single LDRD/STRD instruction.  Returns true iff we can generate a
15866   new instruction sequence.  That is, both accesses use the same base
15867   register and the gap between constant offsets is 4.  This function
15868   may reorder its operands to match ldrd/strd RTL templates.
15869   OPERANDS are the operands found by the peephole matcher;
15870   OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15871   corresponding memory operands.  LOAD indicaates whether the access
15872   is load or store.  CONST_STORE indicates a store of constant
15873   integer values held in OPERANDS[4,5] and assumes that the pattern
15874   is of length 4 insn, for the purpose of checking dead registers.
15875   COMMUTE indicates that register operands may be reordered.  */
15876bool
15877gen_operands_ldrd_strd (rtx *operands, bool load,
15878                        bool const_store, bool commute)
15879{
15880  int nops = 2;
15881  HOST_WIDE_INT offsets[2], offset;
15882  rtx base = NULL_RTX;
15883  rtx cur_base, cur_offset, tmp;
15884  int i, gap;
15885  HARD_REG_SET regset;
15886
15887  gcc_assert (!const_store || !load);
15888  /* Check that the memory references are immediate offsets from the
15889     same base register.  Extract the base register, the destination
15890     registers, and the corresponding memory offsets.  */
15891  for (i = 0; i < nops; i++)
15892    {
15893      if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset))
15894        return false;
15895
15896      if (i == 0)
15897        base = cur_base;
15898      else if (REGNO (base) != REGNO (cur_base))
15899        return false;
15900
15901      offsets[i] = INTVAL (cur_offset);
15902      if (GET_CODE (operands[i]) == SUBREG)
15903        {
15904          tmp = SUBREG_REG (operands[i]);
15905          gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
15906          operands[i] = tmp;
15907        }
15908    }
15909
15910  /* Make sure there is no dependency between the individual loads.  */
15911  if (load && REGNO (operands[0]) == REGNO (base))
15912    return false; /* RAW */
15913
15914  if (load && REGNO (operands[0]) == REGNO (operands[1]))
15915    return false; /* WAW */
15916
15917  /* If the same input register is used in both stores
15918     when storing different constants, try to find a free register.
15919     For example, the code
15920        mov r0, 0
15921        str r0, [r2]
15922        mov r0, 1
15923        str r0, [r2, #4]
15924     can be transformed into
15925        mov r1, 0
15926        strd r1, r0, [r2]
15927     in Thumb mode assuming that r1 is free.  */
15928  if (const_store
15929      && REGNO (operands[0]) == REGNO (operands[1])
15930      && INTVAL (operands[4]) != INTVAL (operands[5]))
15931    {
15932    if (TARGET_THUMB2)
15933      {
15934        CLEAR_HARD_REG_SET (regset);
15935        tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15936        if (tmp == NULL_RTX)
15937          return false;
15938
15939        /* Use the new register in the first load to ensure that
15940           if the original input register is not dead after peephole,
15941           then it will have the correct constant value.  */
15942        operands[0] = tmp;
15943      }
15944    else if (TARGET_ARM)
15945      {
15946        return false;
15947        int regno = REGNO (operands[0]);
15948        if (!peep2_reg_dead_p (4, operands[0]))
15949          {
15950            /* When the input register is even and is not dead after the
15951               pattern, it has to hold the second constant but we cannot
15952               form a legal STRD in ARM mode with this register as the second
15953               register.  */
15954            if (regno % 2 == 0)
15955              return false;
15956
15957            /* Is regno-1 free? */
15958            SET_HARD_REG_SET (regset);
15959            CLEAR_HARD_REG_BIT(regset, regno - 1);
15960            tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15961            if (tmp == NULL_RTX)
15962              return false;
15963
15964            operands[0] = tmp;
15965          }
15966        else
15967          {
15968            /* Find a DImode register.  */
15969            CLEAR_HARD_REG_SET (regset);
15970            tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15971            if (tmp != NULL_RTX)
15972              {
15973                operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15974                operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15975              }
15976            else
15977              {
15978                /* Can we use the input register to form a DI register?  */
15979                SET_HARD_REG_SET (regset);
15980                CLEAR_HARD_REG_BIT(regset,
15981                                   regno % 2 == 0 ? regno + 1 : regno - 1);
15982                tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15983                if (tmp == NULL_RTX)
15984                  return false;
15985                operands[regno % 2 == 1 ? 0 : 1] = tmp;
15986              }
15987          }
15988
15989        gcc_assert (operands[0] != NULL_RTX);
15990        gcc_assert (operands[1] != NULL_RTX);
15991        gcc_assert (REGNO (operands[0]) % 2 == 0);
15992        gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
15993      }
15994    }
15995
15996  /* Make sure the instructions are ordered with lower memory access first.  */
15997  if (offsets[0] > offsets[1])
15998    {
15999      gap = offsets[0] - offsets[1];
16000      offset = offsets[1];
16001
16002      /* Swap the instructions such that lower memory is accessed first.  */
16003      std::swap (operands[0], operands[1]);
16004      std::swap (operands[2], operands[3]);
16005      if (const_store)
16006        std::swap (operands[4], operands[5]);
16007    }
16008  else
16009    {
16010      gap = offsets[1] - offsets[0];
16011      offset = offsets[0];
16012    }
16013
16014  /* Make sure accesses are to consecutive memory locations.  */
16015  if (gap != 4)
16016    return false;
16017
16018  /* Make sure we generate legal instructions.  */
16019  if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
16020                             false, load))
16021    return true;
16022
16023  /* In Thumb state, where registers are almost unconstrained, there
16024     is little hope to fix it.  */
16025  if (TARGET_THUMB2)
16026    return false;
16027
16028  if (load && commute)
16029    {
16030      /* Try reordering registers.  */
16031      std::swap (operands[0], operands[1]);
16032      if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
16033                                 false, load))
16034        return true;
16035    }
16036
16037  if (const_store)
16038    {
16039      /* If input registers are dead after this pattern, they can be
16040         reordered or replaced by other registers that are free in the
16041         current pattern.  */
16042      if (!peep2_reg_dead_p (4, operands[0])
16043          || !peep2_reg_dead_p (4, operands[1]))
16044        return false;
16045
16046      /* Try to reorder the input registers.  */
16047      /* For example, the code
16048           mov r0, 0
16049           mov r1, 1
16050           str r1, [r2]
16051           str r0, [r2, #4]
16052         can be transformed into
16053           mov r1, 0
16054           mov r0, 1
16055           strd r0, [r2]
16056      */
16057      if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
16058                                  false, false))
16059        {
16060          std::swap (operands[0], operands[1]);
16061          return true;
16062        }
16063
16064      /* Try to find a free DI register.  */
16065      CLEAR_HARD_REG_SET (regset);
16066      add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
16067      add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
16068      while (true)
16069        {
16070          tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
16071          if (tmp == NULL_RTX)
16072            return false;
16073
16074          /* DREG must be an even-numbered register in DImode.
16075             Split it into SI registers.  */
16076          operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
16077          operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
16078          gcc_assert (operands[0] != NULL_RTX);
16079          gcc_assert (operands[1] != NULL_RTX);
16080          gcc_assert (REGNO (operands[0]) % 2 == 0);
16081          gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
16082
16083          return (operands_ok_ldrd_strd (operands[0], operands[1],
16084                                         base, offset,
16085                                         false, load));
16086        }
16087    }
16088
16089  return false;
16090}
16091
16092
16093
16094
16095/* Print a symbolic form of X to the debug file, F.  */
16096static void
16097arm_print_value (FILE *f, rtx x)
16098{
16099  switch (GET_CODE (x))
16100    {
16101    case CONST_INT:
16102      fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
16103      return;
16104
16105    case CONST_DOUBLE:
16106      fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
16107      return;
16108
16109    case CONST_VECTOR:
16110      {
16111	int i;
16112
16113	fprintf (f, "<");
16114	for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
16115	  {
16116	    fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
16117	    if (i < (CONST_VECTOR_NUNITS (x) - 1))
16118	      fputc (',', f);
16119	  }
16120	fprintf (f, ">");
16121      }
16122      return;
16123
16124    case CONST_STRING:
16125      fprintf (f, "\"%s\"", XSTR (x, 0));
16126      return;
16127
16128    case SYMBOL_REF:
16129      fprintf (f, "`%s'", XSTR (x, 0));
16130      return;
16131
16132    case LABEL_REF:
16133      fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
16134      return;
16135
16136    case CONST:
16137      arm_print_value (f, XEXP (x, 0));
16138      return;
16139
16140    case PLUS:
16141      arm_print_value (f, XEXP (x, 0));
16142      fprintf (f, "+");
16143      arm_print_value (f, XEXP (x, 1));
16144      return;
16145
16146    case PC:
16147      fprintf (f, "pc");
16148      return;
16149
16150    default:
16151      fprintf (f, "????");
16152      return;
16153    }
16154}
16155
16156/* Routines for manipulation of the constant pool.  */
16157
16158/* Arm instructions cannot load a large constant directly into a
16159   register; they have to come from a pc relative load.  The constant
16160   must therefore be placed in the addressable range of the pc
16161   relative load.  Depending on the precise pc relative load
16162   instruction the range is somewhere between 256 bytes and 4k.  This
16163   means that we often have to dump a constant inside a function, and
16164   generate code to branch around it.
16165
16166   It is important to minimize this, since the branches will slow
16167   things down and make the code larger.
16168
16169   Normally we can hide the table after an existing unconditional
16170   branch so that there is no interruption of the flow, but in the
16171   worst case the code looks like this:
16172
16173	ldr	rn, L1
16174	...
16175	b	L2
16176	align
16177	L1:	.long value
16178	L2:
16179	...
16180
16181	ldr	rn, L3
16182	...
16183	b	L4
16184	align
16185	L3:	.long value
16186	L4:
16187	...
16188
16189   We fix this by performing a scan after scheduling, which notices
16190   which instructions need to have their operands fetched from the
16191   constant table and builds the table.
16192
16193   The algorithm starts by building a table of all the constants that
16194   need fixing up and all the natural barriers in the function (places
16195   where a constant table can be dropped without breaking the flow).
16196   For each fixup we note how far the pc-relative replacement will be
16197   able to reach and the offset of the instruction into the function.
16198
16199   Having built the table we then group the fixes together to form
16200   tables that are as large as possible (subject to addressing
16201   constraints) and emit each table of constants after the last
16202   barrier that is within range of all the instructions in the group.
16203   If a group does not contain a barrier, then we forcibly create one
16204   by inserting a jump instruction into the flow.  Once the table has
16205   been inserted, the insns are then modified to reference the
16206   relevant entry in the pool.
16207
16208   Possible enhancements to the algorithm (not implemented) are:
16209
16210   1) For some processors and object formats, there may be benefit in
16211   aligning the pools to the start of cache lines; this alignment
16212   would need to be taken into account when calculating addressability
16213   of a pool.  */
16214
16215/* These typedefs are located at the start of this file, so that
16216   they can be used in the prototypes there.  This comment is to
16217   remind readers of that fact so that the following structures
16218   can be understood more easily.
16219
16220     typedef struct minipool_node    Mnode;
16221     typedef struct minipool_fixup   Mfix;  */
16222
16223struct minipool_node
16224{
16225  /* Doubly linked chain of entries.  */
16226  Mnode * next;
16227  Mnode * prev;
16228  /* The maximum offset into the code that this entry can be placed.  While
16229     pushing fixes for forward references, all entries are sorted in order
16230     of increasing max_address.  */
16231  HOST_WIDE_INT max_address;
16232  /* Similarly for an entry inserted for a backwards ref.  */
16233  HOST_WIDE_INT min_address;
16234  /* The number of fixes referencing this entry.  This can become zero
16235     if we "unpush" an entry.  In this case we ignore the entry when we
16236     come to emit the code.  */
16237  int refcount;
16238  /* The offset from the start of the minipool.  */
16239  HOST_WIDE_INT offset;
16240  /* The value in table.  */
16241  rtx value;
16242  /* The mode of value.  */
16243  machine_mode mode;
16244  /* The size of the value.  With iWMMXt enabled
16245     sizes > 4 also imply an alignment of 8-bytes.  */
16246  int fix_size;
16247};
16248
16249struct minipool_fixup
16250{
16251  Mfix *            next;
16252  rtx_insn *        insn;
16253  HOST_WIDE_INT     address;
16254  rtx *             loc;
16255  machine_mode mode;
16256  int               fix_size;
16257  rtx               value;
16258  Mnode *           minipool;
16259  HOST_WIDE_INT     forwards;
16260  HOST_WIDE_INT     backwards;
16261};
16262
16263/* Fixes less than a word need padding out to a word boundary.  */
16264#define MINIPOOL_FIX_SIZE(mode) \
16265  (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
16266
16267static Mnode *	minipool_vector_head;
16268static Mnode *	minipool_vector_tail;
16269static rtx_code_label	*minipool_vector_label;
16270static int	minipool_pad;
16271
16272/* The linked list of all minipool fixes required for this function.  */
16273Mfix * 		minipool_fix_head;
16274Mfix * 		minipool_fix_tail;
16275/* The fix entry for the current minipool, once it has been placed.  */
16276Mfix *		minipool_barrier;
16277
16278#ifndef JUMP_TABLES_IN_TEXT_SECTION
16279#define JUMP_TABLES_IN_TEXT_SECTION 0
16280#endif
16281
16282static HOST_WIDE_INT
16283get_jump_table_size (rtx_jump_table_data *insn)
16284{
16285  /* ADDR_VECs only take room if read-only data does into the text
16286     section.  */
16287  if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
16288    {
16289      rtx body = PATTERN (insn);
16290      int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
16291      HOST_WIDE_INT size;
16292      HOST_WIDE_INT modesize;
16293
16294      modesize = GET_MODE_SIZE (GET_MODE (body));
16295      size = modesize * XVECLEN (body, elt);
16296      switch (modesize)
16297	{
16298	case 1:
16299	  /* Round up size  of TBB table to a halfword boundary.  */
16300	  size = (size + 1) & ~(HOST_WIDE_INT)1;
16301	  break;
16302	case 2:
16303	  /* No padding necessary for TBH.  */
16304	  break;
16305	case 4:
16306	  /* Add two bytes for alignment on Thumb.  */
16307	  if (TARGET_THUMB)
16308	    size += 2;
16309	  break;
16310	default:
16311	  gcc_unreachable ();
16312	}
16313      return size;
16314    }
16315
16316  return 0;
16317}
16318
16319/* Return the maximum amount of padding that will be inserted before
16320   label LABEL.  */
16321
16322static HOST_WIDE_INT
16323get_label_padding (rtx label)
16324{
16325  HOST_WIDE_INT align, min_insn_size;
16326
16327  align = 1 << label_to_alignment (label);
16328  min_insn_size = TARGET_THUMB ? 2 : 4;
16329  return align > min_insn_size ? align - min_insn_size : 0;
16330}
16331
16332/* Move a minipool fix MP from its current location to before MAX_MP.
16333   If MAX_MP is NULL, then MP doesn't need moving, but the addressing
16334   constraints may need updating.  */
16335static Mnode *
16336move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
16337			       HOST_WIDE_INT max_address)
16338{
16339  /* The code below assumes these are different.  */
16340  gcc_assert (mp != max_mp);
16341
16342  if (max_mp == NULL)
16343    {
16344      if (max_address < mp->max_address)
16345	mp->max_address = max_address;
16346    }
16347  else
16348    {
16349      if (max_address > max_mp->max_address - mp->fix_size)
16350	mp->max_address = max_mp->max_address - mp->fix_size;
16351      else
16352	mp->max_address = max_address;
16353
16354      /* Unlink MP from its current position.  Since max_mp is non-null,
16355       mp->prev must be non-null.  */
16356      mp->prev->next = mp->next;
16357      if (mp->next != NULL)
16358	mp->next->prev = mp->prev;
16359      else
16360	minipool_vector_tail = mp->prev;
16361
16362      /* Re-insert it before MAX_MP.  */
16363      mp->next = max_mp;
16364      mp->prev = max_mp->prev;
16365      max_mp->prev = mp;
16366
16367      if (mp->prev != NULL)
16368	mp->prev->next = mp;
16369      else
16370	minipool_vector_head = mp;
16371    }
16372
16373  /* Save the new entry.  */
16374  max_mp = mp;
16375
16376  /* Scan over the preceding entries and adjust their addresses as
16377     required.  */
16378  while (mp->prev != NULL
16379	 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16380    {
16381      mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16382      mp = mp->prev;
16383    }
16384
16385  return max_mp;
16386}
16387
16388/* Add a constant to the minipool for a forward reference.  Returns the
16389   node added or NULL if the constant will not fit in this pool.  */
16390static Mnode *
16391add_minipool_forward_ref (Mfix *fix)
16392{
16393  /* If set, max_mp is the first pool_entry that has a lower
16394     constraint than the one we are trying to add.  */
16395  Mnode *       max_mp = NULL;
16396  HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
16397  Mnode *       mp;
16398
16399  /* If the minipool starts before the end of FIX->INSN then this FIX
16400     can not be placed into the current pool.  Furthermore, adding the
16401     new constant pool entry may cause the pool to start FIX_SIZE bytes
16402     earlier.  */
16403  if (minipool_vector_head &&
16404      (fix->address + get_attr_length (fix->insn)
16405       >= minipool_vector_head->max_address - fix->fix_size))
16406    return NULL;
16407
16408  /* Scan the pool to see if a constant with the same value has
16409     already been added.  While we are doing this, also note the
16410     location where we must insert the constant if it doesn't already
16411     exist.  */
16412  for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16413    {
16414      if (GET_CODE (fix->value) == GET_CODE (mp->value)
16415	  && fix->mode == mp->mode
16416	  && (!LABEL_P (fix->value)
16417	      || (CODE_LABEL_NUMBER (fix->value)
16418		  == CODE_LABEL_NUMBER (mp->value)))
16419	  && rtx_equal_p (fix->value, mp->value))
16420	{
16421	  /* More than one fix references this entry.  */
16422	  mp->refcount++;
16423	  return move_minipool_fix_forward_ref (mp, max_mp, max_address);
16424	}
16425
16426      /* Note the insertion point if necessary.  */
16427      if (max_mp == NULL
16428	  && mp->max_address > max_address)
16429	max_mp = mp;
16430
16431      /* If we are inserting an 8-bytes aligned quantity and
16432	 we have not already found an insertion point, then
16433	 make sure that all such 8-byte aligned quantities are
16434	 placed at the start of the pool.  */
16435      if (ARM_DOUBLEWORD_ALIGN
16436	  && max_mp == NULL
16437	  && fix->fix_size >= 8
16438	  && mp->fix_size < 8)
16439	{
16440	  max_mp = mp;
16441	  max_address = mp->max_address;
16442	}
16443    }
16444
16445  /* The value is not currently in the minipool, so we need to create
16446     a new entry for it.  If MAX_MP is NULL, the entry will be put on
16447     the end of the list since the placement is less constrained than
16448     any existing entry.  Otherwise, we insert the new fix before
16449     MAX_MP and, if necessary, adjust the constraints on the other
16450     entries.  */
16451  mp = XNEW (Mnode);
16452  mp->fix_size = fix->fix_size;
16453  mp->mode = fix->mode;
16454  mp->value = fix->value;
16455  mp->refcount = 1;
16456  /* Not yet required for a backwards ref.  */
16457  mp->min_address = -65536;
16458
16459  if (max_mp == NULL)
16460    {
16461      mp->max_address = max_address;
16462      mp->next = NULL;
16463      mp->prev = minipool_vector_tail;
16464
16465      if (mp->prev == NULL)
16466	{
16467	  minipool_vector_head = mp;
16468	  minipool_vector_label = gen_label_rtx ();
16469	}
16470      else
16471	mp->prev->next = mp;
16472
16473      minipool_vector_tail = mp;
16474    }
16475  else
16476    {
16477      if (max_address > max_mp->max_address - mp->fix_size)
16478	mp->max_address = max_mp->max_address - mp->fix_size;
16479      else
16480	mp->max_address = max_address;
16481
16482      mp->next = max_mp;
16483      mp->prev = max_mp->prev;
16484      max_mp->prev = mp;
16485      if (mp->prev != NULL)
16486	mp->prev->next = mp;
16487      else
16488	minipool_vector_head = mp;
16489    }
16490
16491  /* Save the new entry.  */
16492  max_mp = mp;
16493
16494  /* Scan over the preceding entries and adjust their addresses as
16495     required.  */
16496  while (mp->prev != NULL
16497	 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16498    {
16499      mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16500      mp = mp->prev;
16501    }
16502
16503  return max_mp;
16504}
16505
16506static Mnode *
16507move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
16508				HOST_WIDE_INT  min_address)
16509{
16510  HOST_WIDE_INT offset;
16511
16512  /* The code below assumes these are different.  */
16513  gcc_assert (mp != min_mp);
16514
16515  if (min_mp == NULL)
16516    {
16517      if (min_address > mp->min_address)
16518	mp->min_address = min_address;
16519    }
16520  else
16521    {
16522      /* We will adjust this below if it is too loose.  */
16523      mp->min_address = min_address;
16524
16525      /* Unlink MP from its current position.  Since min_mp is non-null,
16526	 mp->next must be non-null.  */
16527      mp->next->prev = mp->prev;
16528      if (mp->prev != NULL)
16529	mp->prev->next = mp->next;
16530      else
16531	minipool_vector_head = mp->next;
16532
16533      /* Reinsert it after MIN_MP.  */
16534      mp->prev = min_mp;
16535      mp->next = min_mp->next;
16536      min_mp->next = mp;
16537      if (mp->next != NULL)
16538	mp->next->prev = mp;
16539      else
16540	minipool_vector_tail = mp;
16541    }
16542
16543  min_mp = mp;
16544
16545  offset = 0;
16546  for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16547    {
16548      mp->offset = offset;
16549      if (mp->refcount > 0)
16550	offset += mp->fix_size;
16551
16552      if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
16553	mp->next->min_address = mp->min_address + mp->fix_size;
16554    }
16555
16556  return min_mp;
16557}
16558
16559/* Add a constant to the minipool for a backward reference.  Returns the
16560   node added or NULL if the constant will not fit in this pool.
16561
16562   Note that the code for insertion for a backwards reference can be
16563   somewhat confusing because the calculated offsets for each fix do
16564   not take into account the size of the pool (which is still under
16565   construction.  */
16566static Mnode *
16567add_minipool_backward_ref (Mfix *fix)
16568{
16569  /* If set, min_mp is the last pool_entry that has a lower constraint
16570     than the one we are trying to add.  */
16571  Mnode *min_mp = NULL;
16572  /* This can be negative, since it is only a constraint.  */
16573  HOST_WIDE_INT  min_address = fix->address - fix->backwards;
16574  Mnode *mp;
16575
16576  /* If we can't reach the current pool from this insn, or if we can't
16577     insert this entry at the end of the pool without pushing other
16578     fixes out of range, then we don't try.  This ensures that we
16579     can't fail later on.  */
16580  if (min_address >= minipool_barrier->address
16581      || (minipool_vector_tail->min_address + fix->fix_size
16582	  >= minipool_barrier->address))
16583    return NULL;
16584
16585  /* Scan the pool to see if a constant with the same value has
16586     already been added.  While we are doing this, also note the
16587     location where we must insert the constant if it doesn't already
16588     exist.  */
16589  for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
16590    {
16591      if (GET_CODE (fix->value) == GET_CODE (mp->value)
16592	  && fix->mode == mp->mode
16593	  && (!LABEL_P (fix->value)
16594	      || (CODE_LABEL_NUMBER (fix->value)
16595		  == CODE_LABEL_NUMBER (mp->value)))
16596	  && rtx_equal_p (fix->value, mp->value)
16597	  /* Check that there is enough slack to move this entry to the
16598	     end of the table (this is conservative).  */
16599	  && (mp->max_address
16600	      > (minipool_barrier->address
16601		 + minipool_vector_tail->offset
16602		 + minipool_vector_tail->fix_size)))
16603	{
16604	  mp->refcount++;
16605	  return move_minipool_fix_backward_ref (mp, min_mp, min_address);
16606	}
16607
16608      if (min_mp != NULL)
16609	mp->min_address += fix->fix_size;
16610      else
16611	{
16612	  /* Note the insertion point if necessary.  */
16613	  if (mp->min_address < min_address)
16614	    {
16615	      /* For now, we do not allow the insertion of 8-byte alignment
16616		 requiring nodes anywhere but at the start of the pool.  */
16617	      if (ARM_DOUBLEWORD_ALIGN
16618		  && fix->fix_size >= 8 && mp->fix_size < 8)
16619		return NULL;
16620	      else
16621		min_mp = mp;
16622	    }
16623	  else if (mp->max_address
16624		   < minipool_barrier->address + mp->offset + fix->fix_size)
16625	    {
16626	      /* Inserting before this entry would push the fix beyond
16627		 its maximum address (which can happen if we have
16628		 re-located a forwards fix); force the new fix to come
16629		 after it.  */
16630	      if (ARM_DOUBLEWORD_ALIGN
16631		  && fix->fix_size >= 8 && mp->fix_size < 8)
16632		return NULL;
16633	      else
16634		{
16635		  min_mp = mp;
16636		  min_address = mp->min_address + fix->fix_size;
16637		}
16638	    }
16639	  /* Do not insert a non-8-byte aligned quantity before 8-byte
16640	     aligned quantities.  */
16641	  else if (ARM_DOUBLEWORD_ALIGN
16642		   && fix->fix_size < 8
16643		   && mp->fix_size >= 8)
16644	    {
16645	      min_mp = mp;
16646	      min_address = mp->min_address + fix->fix_size;
16647	    }
16648	}
16649    }
16650
16651  /* We need to create a new entry.  */
16652  mp = XNEW (Mnode);
16653  mp->fix_size = fix->fix_size;
16654  mp->mode = fix->mode;
16655  mp->value = fix->value;
16656  mp->refcount = 1;
16657  mp->max_address = minipool_barrier->address + 65536;
16658
16659  mp->min_address = min_address;
16660
16661  if (min_mp == NULL)
16662    {
16663      mp->prev = NULL;
16664      mp->next = minipool_vector_head;
16665
16666      if (mp->next == NULL)
16667	{
16668	  minipool_vector_tail = mp;
16669	  minipool_vector_label = gen_label_rtx ();
16670	}
16671      else
16672	mp->next->prev = mp;
16673
16674      minipool_vector_head = mp;
16675    }
16676  else
16677    {
16678      mp->next = min_mp->next;
16679      mp->prev = min_mp;
16680      min_mp->next = mp;
16681
16682      if (mp->next != NULL)
16683	mp->next->prev = mp;
16684      else
16685	minipool_vector_tail = mp;
16686    }
16687
16688  /* Save the new entry.  */
16689  min_mp = mp;
16690
16691  if (mp->prev)
16692    mp = mp->prev;
16693  else
16694    mp->offset = 0;
16695
16696  /* Scan over the following entries and adjust their offsets.  */
16697  while (mp->next != NULL)
16698    {
16699      if (mp->next->min_address < mp->min_address + mp->fix_size)
16700	mp->next->min_address = mp->min_address + mp->fix_size;
16701
16702      if (mp->refcount)
16703	mp->next->offset = mp->offset + mp->fix_size;
16704      else
16705	mp->next->offset = mp->offset;
16706
16707      mp = mp->next;
16708    }
16709
16710  return min_mp;
16711}
16712
16713static void
16714assign_minipool_offsets (Mfix *barrier)
16715{
16716  HOST_WIDE_INT offset = 0;
16717  Mnode *mp;
16718
16719  minipool_barrier = barrier;
16720
16721  for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16722    {
16723      mp->offset = offset;
16724
16725      if (mp->refcount > 0)
16726	offset += mp->fix_size;
16727    }
16728}
16729
16730/* Output the literal table */
16731static void
16732dump_minipool (rtx_insn *scan)
16733{
16734  Mnode * mp;
16735  Mnode * nmp;
16736  int align64 = 0;
16737
16738  if (ARM_DOUBLEWORD_ALIGN)
16739    for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16740      if (mp->refcount > 0 && mp->fix_size >= 8)
16741	{
16742	  align64 = 1;
16743	  break;
16744	}
16745
16746  if (dump_file)
16747    fprintf (dump_file,
16748	     ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16749	     INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
16750
16751  scan = emit_label_after (gen_label_rtx (), scan);
16752  scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
16753  scan = emit_label_after (minipool_vector_label, scan);
16754
16755  for (mp = minipool_vector_head; mp != NULL; mp = nmp)
16756    {
16757      if (mp->refcount > 0)
16758	{
16759	  if (dump_file)
16760	    {
16761	      fprintf (dump_file,
16762		       ";;  Offset %u, min %ld, max %ld ",
16763		       (unsigned) mp->offset, (unsigned long) mp->min_address,
16764		       (unsigned long) mp->max_address);
16765	      arm_print_value (dump_file, mp->value);
16766	      fputc ('\n', dump_file);
16767	    }
16768
16769	  switch (GET_MODE_SIZE (mp->mode))
16770	    {
16771#ifdef HAVE_consttable_1
16772	    case 1:
16773	      scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
16774	      break;
16775
16776#endif
16777#ifdef HAVE_consttable_2
16778	    case 2:
16779	      scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
16780	      break;
16781
16782#endif
16783#ifdef HAVE_consttable_4
16784	    case 4:
16785	      scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
16786	      break;
16787
16788#endif
16789#ifdef HAVE_consttable_8
16790	    case 8:
16791	      scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
16792	      break;
16793
16794#endif
16795#ifdef HAVE_consttable_16
16796	    case 16:
16797              scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
16798              break;
16799
16800#endif
16801	    default:
16802	      gcc_unreachable ();
16803	    }
16804	}
16805
16806      nmp = mp->next;
16807      free (mp);
16808    }
16809
16810  minipool_vector_head = minipool_vector_tail = NULL;
16811  scan = emit_insn_after (gen_consttable_end (), scan);
16812  scan = emit_barrier_after (scan);
16813}
16814
16815/* Return the cost of forcibly inserting a barrier after INSN.  */
16816static int
16817arm_barrier_cost (rtx insn)
16818{
16819  /* Basing the location of the pool on the loop depth is preferable,
16820     but at the moment, the basic block information seems to be
16821     corrupt by this stage of the compilation.  */
16822  int base_cost = 50;
16823  rtx next = next_nonnote_insn (insn);
16824
16825  if (next != NULL && LABEL_P (next))
16826    base_cost -= 20;
16827
16828  switch (GET_CODE (insn))
16829    {
16830    case CODE_LABEL:
16831      /* It will always be better to place the table before the label, rather
16832	 than after it.  */
16833      return 50;
16834
16835    case INSN:
16836    case CALL_INSN:
16837      return base_cost;
16838
16839    case JUMP_INSN:
16840      return base_cost - 10;
16841
16842    default:
16843      return base_cost + 10;
16844    }
16845}
16846
16847/* Find the best place in the insn stream in the range
16848   (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16849   Create the barrier by inserting a jump and add a new fix entry for
16850   it.  */
16851static Mfix *
16852create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
16853{
16854  HOST_WIDE_INT count = 0;
16855  rtx_barrier *barrier;
16856  rtx_insn *from = fix->insn;
16857  /* The instruction after which we will insert the jump.  */
16858  rtx_insn *selected = NULL;
16859  int selected_cost;
16860  /* The address at which the jump instruction will be placed.  */
16861  HOST_WIDE_INT selected_address;
16862  Mfix * new_fix;
16863  HOST_WIDE_INT max_count = max_address - fix->address;
16864  rtx_code_label *label = gen_label_rtx ();
16865
16866  selected_cost = arm_barrier_cost (from);
16867  selected_address = fix->address;
16868
16869  while (from && count < max_count)
16870    {
16871      rtx_jump_table_data *tmp;
16872      int new_cost;
16873
16874      /* This code shouldn't have been called if there was a natural barrier
16875	 within range.  */
16876      gcc_assert (!BARRIER_P (from));
16877
16878      /* Count the length of this insn.  This must stay in sync with the
16879	 code that pushes minipool fixes.  */
16880      if (LABEL_P (from))
16881	count += get_label_padding (from);
16882      else
16883	count += get_attr_length (from);
16884
16885      /* If there is a jump table, add its length.  */
16886      if (tablejump_p (from, NULL, &tmp))
16887	{
16888	  count += get_jump_table_size (tmp);
16889
16890	  /* Jump tables aren't in a basic block, so base the cost on
16891	     the dispatch insn.  If we select this location, we will
16892	     still put the pool after the table.  */
16893	  new_cost = arm_barrier_cost (from);
16894
16895	  if (count < max_count
16896	      && (!selected || new_cost <= selected_cost))
16897	    {
16898	      selected = tmp;
16899	      selected_cost = new_cost;
16900	      selected_address = fix->address + count;
16901	    }
16902
16903	  /* Continue after the dispatch table.  */
16904	  from = NEXT_INSN (tmp);
16905	  continue;
16906	}
16907
16908      new_cost = arm_barrier_cost (from);
16909
16910      if (count < max_count
16911	  && (!selected || new_cost <= selected_cost))
16912	{
16913	  selected = from;
16914	  selected_cost = new_cost;
16915	  selected_address = fix->address + count;
16916	}
16917
16918      from = NEXT_INSN (from);
16919    }
16920
16921  /* Make sure that we found a place to insert the jump.  */
16922  gcc_assert (selected);
16923
16924  /* Make sure we do not split a call and its corresponding
16925     CALL_ARG_LOCATION note.  */
16926  if (CALL_P (selected))
16927    {
16928      rtx_insn *next = NEXT_INSN (selected);
16929      if (next && NOTE_P (next)
16930	  && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
16931	  selected = next;
16932    }
16933
16934  /* Create a new JUMP_INSN that branches around a barrier.  */
16935  from = emit_jump_insn_after (gen_jump (label), selected);
16936  JUMP_LABEL (from) = label;
16937  barrier = emit_barrier_after (from);
16938  emit_label_after (label, barrier);
16939
16940  /* Create a minipool barrier entry for the new barrier.  */
16941  new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
16942  new_fix->insn = barrier;
16943  new_fix->address = selected_address;
16944  new_fix->next = fix->next;
16945  fix->next = new_fix;
16946
16947  return new_fix;
16948}
16949
16950/* Record that there is a natural barrier in the insn stream at
16951   ADDRESS.  */
16952static void
16953push_minipool_barrier (rtx_insn *insn, HOST_WIDE_INT address)
16954{
16955  Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16956
16957  fix->insn = insn;
16958  fix->address = address;
16959
16960  fix->next = NULL;
16961  if (minipool_fix_head != NULL)
16962    minipool_fix_tail->next = fix;
16963  else
16964    minipool_fix_head = fix;
16965
16966  minipool_fix_tail = fix;
16967}
16968
16969/* Record INSN, which will need fixing up to load a value from the
16970   minipool.  ADDRESS is the offset of the insn since the start of the
16971   function; LOC is a pointer to the part of the insn which requires
16972   fixing; VALUE is the constant that must be loaded, which is of type
16973   MODE.  */
16974static void
16975push_minipool_fix (rtx_insn *insn, HOST_WIDE_INT address, rtx *loc,
16976		   machine_mode mode, rtx value)
16977{
16978  Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16979
16980  fix->insn = insn;
16981  fix->address = address;
16982  fix->loc = loc;
16983  fix->mode = mode;
16984  fix->fix_size = MINIPOOL_FIX_SIZE (mode);
16985  fix->value = value;
16986  fix->forwards = get_attr_pool_range (insn);
16987  fix->backwards = get_attr_neg_pool_range (insn);
16988  fix->minipool = NULL;
16989
16990  /* If an insn doesn't have a range defined for it, then it isn't
16991     expecting to be reworked by this code.  Better to stop now than
16992     to generate duff assembly code.  */
16993  gcc_assert (fix->forwards || fix->backwards);
16994
16995  /* If an entry requires 8-byte alignment then assume all constant pools
16996     require 4 bytes of padding.  Trying to do this later on a per-pool
16997     basis is awkward because existing pool entries have to be modified.  */
16998  if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
16999    minipool_pad = 4;
17000
17001  if (dump_file)
17002    {
17003      fprintf (dump_file,
17004	       ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
17005	       GET_MODE_NAME (mode),
17006	       INSN_UID (insn), (unsigned long) address,
17007	       -1 * (long)fix->backwards, (long)fix->forwards);
17008      arm_print_value (dump_file, fix->value);
17009      fprintf (dump_file, "\n");
17010    }
17011
17012  /* Add it to the chain of fixes.  */
17013  fix->next = NULL;
17014
17015  if (minipool_fix_head != NULL)
17016    minipool_fix_tail->next = fix;
17017  else
17018    minipool_fix_head = fix;
17019
17020  minipool_fix_tail = fix;
17021}
17022
17023/* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
17024   Returns the number of insns needed, or 99 if we always want to synthesize
17025   the value.  */
17026int
17027arm_max_const_double_inline_cost ()
17028{
17029  /* Let the value get synthesized to avoid the use of literal pools.  */
17030  if (arm_disable_literal_pool)
17031    return 99;
17032
17033  return ((optimize_size || arm_ld_sched) ? 3 : 4);
17034}
17035
17036/* Return the cost of synthesizing a 64-bit constant VAL inline.
17037   Returns the number of insns needed, or 99 if we don't know how to
17038   do it.  */
17039int
17040arm_const_double_inline_cost (rtx val)
17041{
17042  rtx lowpart, highpart;
17043  machine_mode mode;
17044
17045  mode = GET_MODE (val);
17046
17047  if (mode == VOIDmode)
17048    mode = DImode;
17049
17050  gcc_assert (GET_MODE_SIZE (mode) == 8);
17051
17052  lowpart = gen_lowpart (SImode, val);
17053  highpart = gen_highpart_mode (SImode, mode, val);
17054
17055  gcc_assert (CONST_INT_P (lowpart));
17056  gcc_assert (CONST_INT_P (highpart));
17057
17058  return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
17059			    NULL_RTX, NULL_RTX, 0, 0)
17060	  + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
17061			      NULL_RTX, NULL_RTX, 0, 0));
17062}
17063
17064/* Cost of loading a SImode constant.  */
17065static inline int
17066arm_const_inline_cost (enum rtx_code code, rtx val)
17067{
17068  return arm_gen_constant (code, SImode, NULL_RTX, INTVAL (val),
17069                           NULL_RTX, NULL_RTX, 1, 0);
17070}
17071
17072/* Return true if it is worthwhile to split a 64-bit constant into two
17073   32-bit operations.  This is the case if optimizing for size, or
17074   if we have load delay slots, or if one 32-bit part can be done with
17075   a single data operation.  */
17076bool
17077arm_const_double_by_parts (rtx val)
17078{
17079  machine_mode mode = GET_MODE (val);
17080  rtx part;
17081
17082  if (optimize_size || arm_ld_sched)
17083    return true;
17084
17085  if (mode == VOIDmode)
17086    mode = DImode;
17087
17088  part = gen_highpart_mode (SImode, mode, val);
17089
17090  gcc_assert (CONST_INT_P (part));
17091
17092  if (const_ok_for_arm (INTVAL (part))
17093      || const_ok_for_arm (~INTVAL (part)))
17094    return true;
17095
17096  part = gen_lowpart (SImode, val);
17097
17098  gcc_assert (CONST_INT_P (part));
17099
17100  if (const_ok_for_arm (INTVAL (part))
17101      || const_ok_for_arm (~INTVAL (part)))
17102    return true;
17103
17104  return false;
17105}
17106
17107/* Return true if it is possible to inline both the high and low parts
17108   of a 64-bit constant into 32-bit data processing instructions.  */
17109bool
17110arm_const_double_by_immediates (rtx val)
17111{
17112  machine_mode mode = GET_MODE (val);
17113  rtx part;
17114
17115  if (mode == VOIDmode)
17116    mode = DImode;
17117
17118  part = gen_highpart_mode (SImode, mode, val);
17119
17120  gcc_assert (CONST_INT_P (part));
17121
17122  if (!const_ok_for_arm (INTVAL (part)))
17123    return false;
17124
17125  part = gen_lowpart (SImode, val);
17126
17127  gcc_assert (CONST_INT_P (part));
17128
17129  if (!const_ok_for_arm (INTVAL (part)))
17130    return false;
17131
17132  return true;
17133}
17134
17135/* Scan INSN and note any of its operands that need fixing.
17136   If DO_PUSHES is false we do not actually push any of the fixups
17137   needed.  */
17138static void
17139note_invalid_constants (rtx_insn *insn, HOST_WIDE_INT address, int do_pushes)
17140{
17141  int opno;
17142
17143  extract_constrain_insn (insn);
17144
17145  if (recog_data.n_alternatives == 0)
17146    return;
17147
17148  /* Fill in recog_op_alt with information about the constraints of
17149     this insn.  */
17150  preprocess_constraints (insn);
17151
17152  const operand_alternative *op_alt = which_op_alt ();
17153  for (opno = 0; opno < recog_data.n_operands; opno++)
17154    {
17155      /* Things we need to fix can only occur in inputs.  */
17156      if (recog_data.operand_type[opno] != OP_IN)
17157	continue;
17158
17159      /* If this alternative is a memory reference, then any mention
17160	 of constants in this alternative is really to fool reload
17161	 into allowing us to accept one there.  We need to fix them up
17162	 now so that we output the right code.  */
17163      if (op_alt[opno].memory_ok)
17164	{
17165	  rtx op = recog_data.operand[opno];
17166
17167	  if (CONSTANT_P (op))
17168	    {
17169	      if (do_pushes)
17170		push_minipool_fix (insn, address, recog_data.operand_loc[opno],
17171				   recog_data.operand_mode[opno], op);
17172	    }
17173	  else if (MEM_P (op)
17174		   && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
17175		   && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
17176	    {
17177	      if (do_pushes)
17178		{
17179		  rtx cop = avoid_constant_pool_reference (op);
17180
17181		  /* Casting the address of something to a mode narrower
17182		     than a word can cause avoid_constant_pool_reference()
17183		     to return the pool reference itself.  That's no good to
17184		     us here.  Lets just hope that we can use the
17185		     constant pool value directly.  */
17186		  if (op == cop)
17187		    cop = get_pool_constant (XEXP (op, 0));
17188
17189		  push_minipool_fix (insn, address,
17190				     recog_data.operand_loc[opno],
17191				     recog_data.operand_mode[opno], cop);
17192		}
17193
17194	    }
17195	}
17196    }
17197
17198  return;
17199}
17200
17201/* Rewrite move insn into subtract of 0 if the condition codes will
17202   be useful in next conditional jump insn.  */
17203
17204static void
17205thumb1_reorg (void)
17206{
17207  basic_block bb;
17208
17209  FOR_EACH_BB_FN (bb, cfun)
17210    {
17211      rtx dest, src;
17212      rtx cmp, op0, op1, set = NULL;
17213      rtx_insn *prev, *insn = BB_END (bb);
17214      bool insn_clobbered = false;
17215
17216      while (insn != BB_HEAD (bb) && !NONDEBUG_INSN_P (insn))
17217	insn = PREV_INSN (insn);
17218
17219      /* Find the last cbranchsi4_insn in basic block BB.  */
17220      if (insn == BB_HEAD (bb)
17221	  || INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
17222	continue;
17223
17224      /* Get the register with which we are comparing.  */
17225      cmp = XEXP (SET_SRC (PATTERN (insn)), 0);
17226      op0 = XEXP (cmp, 0);
17227      op1 = XEXP (cmp, 1);
17228
17229      /* Check that comparison is against ZERO.  */
17230      if (!CONST_INT_P (op1) || INTVAL (op1) != 0)
17231	continue;
17232
17233      /* Find the first flag setting insn before INSN in basic block BB.  */
17234      gcc_assert (insn != BB_HEAD (bb));
17235      for (prev = PREV_INSN (insn);
17236	   (!insn_clobbered
17237	    && prev != BB_HEAD (bb)
17238	    && (NOTE_P (prev)
17239		|| DEBUG_INSN_P (prev)
17240		|| ((set = single_set (prev)) != NULL
17241		    && get_attr_conds (prev) == CONDS_NOCOND)));
17242	   prev = PREV_INSN (prev))
17243	{
17244	  if (reg_set_p (op0, prev))
17245	    insn_clobbered = true;
17246	}
17247
17248      /* Skip if op0 is clobbered by insn other than prev. */
17249      if (insn_clobbered)
17250	continue;
17251
17252      if (!set)
17253	continue;
17254
17255      dest = SET_DEST (set);
17256      src = SET_SRC (set);
17257      if (!low_register_operand (dest, SImode)
17258	  || !low_register_operand (src, SImode))
17259	continue;
17260
17261      /* Rewrite move into subtract of 0 if its operand is compared with ZERO
17262	 in INSN.  Both src and dest of the move insn are checked.  */
17263      if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
17264	{
17265	  dest = copy_rtx (dest);
17266	  src = copy_rtx (src);
17267	  src = gen_rtx_MINUS (SImode, src, const0_rtx);
17268	  PATTERN (prev) = gen_rtx_SET (VOIDmode, dest, src);
17269	  INSN_CODE (prev) = -1;
17270	  /* Set test register in INSN to dest.  */
17271	  XEXP (cmp, 0) = copy_rtx (dest);
17272	  INSN_CODE (insn) = -1;
17273	}
17274    }
17275}
17276
17277/* Convert instructions to their cc-clobbering variant if possible, since
17278   that allows us to use smaller encodings.  */
17279
17280static void
17281thumb2_reorg (void)
17282{
17283  basic_block bb;
17284  regset_head live;
17285
17286  INIT_REG_SET (&live);
17287
17288  /* We are freeing block_for_insn in the toplev to keep compatibility
17289     with old MDEP_REORGS that are not CFG based.  Recompute it now.  */
17290  compute_bb_for_insn ();
17291  df_analyze ();
17292
17293  enum Convert_Action {SKIP, CONV, SWAP_CONV};
17294
17295  FOR_EACH_BB_FN (bb, cfun)
17296    {
17297      if (current_tune->disparage_flag_setting_t16_encodings
17298	  && optimize_bb_for_speed_p (bb))
17299	continue;
17300
17301      rtx_insn *insn;
17302      Convert_Action action = SKIP;
17303      Convert_Action action_for_partial_flag_setting
17304	= (current_tune->disparage_partial_flag_setting_t16_encodings
17305	   && optimize_bb_for_speed_p (bb))
17306	  ? SKIP : CONV;
17307
17308      COPY_REG_SET (&live, DF_LR_OUT (bb));
17309      df_simulate_initialize_backwards (bb, &live);
17310      FOR_BB_INSNS_REVERSE (bb, insn)
17311	{
17312	  if (NONJUMP_INSN_P (insn)
17313	      && !REGNO_REG_SET_P (&live, CC_REGNUM)
17314	      && GET_CODE (PATTERN (insn)) == SET)
17315	    {
17316	      action = SKIP;
17317	      rtx pat = PATTERN (insn);
17318	      rtx dst = XEXP (pat, 0);
17319	      rtx src = XEXP (pat, 1);
17320	      rtx op0 = NULL_RTX, op1 = NULL_RTX;
17321
17322	      if (UNARY_P (src) || BINARY_P (src))
17323		  op0 = XEXP (src, 0);
17324
17325	      if (BINARY_P (src))
17326		  op1 = XEXP (src, 1);
17327
17328	      if (low_register_operand (dst, SImode))
17329		{
17330		  switch (GET_CODE (src))
17331		    {
17332		    case PLUS:
17333		      /* Adding two registers and storing the result
17334			 in the first source is already a 16-bit
17335			 operation.  */
17336		      if (rtx_equal_p (dst, op0)
17337			  && register_operand (op1, SImode))
17338			break;
17339
17340		      if (low_register_operand (op0, SImode))
17341			{
17342			  /* ADDS <Rd>,<Rn>,<Rm>  */
17343			  if (low_register_operand (op1, SImode))
17344			    action = CONV;
17345			  /* ADDS <Rdn>,#<imm8>  */
17346			  /* SUBS <Rdn>,#<imm8>  */
17347			  else if (rtx_equal_p (dst, op0)
17348				   && CONST_INT_P (op1)
17349				   && IN_RANGE (INTVAL (op1), -255, 255))
17350			    action = CONV;
17351			  /* ADDS <Rd>,<Rn>,#<imm3>  */
17352			  /* SUBS <Rd>,<Rn>,#<imm3>  */
17353			  else if (CONST_INT_P (op1)
17354				   && IN_RANGE (INTVAL (op1), -7, 7))
17355			    action = CONV;
17356			}
17357		      /* ADCS <Rd>, <Rn>  */
17358		      else if (GET_CODE (XEXP (src, 0)) == PLUS
17359			      && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
17360			      && low_register_operand (XEXP (XEXP (src, 0), 1),
17361						       SImode)
17362			      && COMPARISON_P (op1)
17363			      && cc_register (XEXP (op1, 0), VOIDmode)
17364			      && maybe_get_arm_condition_code (op1) == ARM_CS
17365			      && XEXP (op1, 1) == const0_rtx)
17366		        action = CONV;
17367		      break;
17368
17369		    case MINUS:
17370		      /* RSBS <Rd>,<Rn>,#0
17371			 Not handled here: see NEG below.  */
17372		      /* SUBS <Rd>,<Rn>,#<imm3>
17373			 SUBS <Rdn>,#<imm8>
17374			 Not handled here: see PLUS above.  */
17375		      /* SUBS <Rd>,<Rn>,<Rm>  */
17376		      if (low_register_operand (op0, SImode)
17377			  && low_register_operand (op1, SImode))
17378			    action = CONV;
17379		      break;
17380
17381		    case MULT:
17382		      /* MULS <Rdm>,<Rn>,<Rdm>
17383			 As an exception to the rule, this is only used
17384			 when optimizing for size since MULS is slow on all
17385			 known implementations.  We do not even want to use
17386			 MULS in cold code, if optimizing for speed, so we
17387			 test the global flag here.  */
17388		      if (!optimize_size)
17389			break;
17390		      /* else fall through.  */
17391		    case AND:
17392		    case IOR:
17393		    case XOR:
17394		      /* ANDS <Rdn>,<Rm>  */
17395		      if (rtx_equal_p (dst, op0)
17396			  && low_register_operand (op1, SImode))
17397			action = action_for_partial_flag_setting;
17398		      else if (rtx_equal_p (dst, op1)
17399			       && low_register_operand (op0, SImode))
17400			action = action_for_partial_flag_setting == SKIP
17401				 ? SKIP : SWAP_CONV;
17402		      break;
17403
17404		    case ASHIFTRT:
17405		    case ASHIFT:
17406		    case LSHIFTRT:
17407		      /* ASRS <Rdn>,<Rm> */
17408		      /* LSRS <Rdn>,<Rm> */
17409		      /* LSLS <Rdn>,<Rm> */
17410		      if (rtx_equal_p (dst, op0)
17411			  && low_register_operand (op1, SImode))
17412			action = action_for_partial_flag_setting;
17413		      /* ASRS <Rd>,<Rm>,#<imm5> */
17414		      /* LSRS <Rd>,<Rm>,#<imm5> */
17415		      /* LSLS <Rd>,<Rm>,#<imm5> */
17416		      else if (low_register_operand (op0, SImode)
17417			       && CONST_INT_P (op1)
17418			       && IN_RANGE (INTVAL (op1), 0, 31))
17419			action = action_for_partial_flag_setting;
17420		      break;
17421
17422		    case ROTATERT:
17423		      /* RORS <Rdn>,<Rm>  */
17424		      if (rtx_equal_p (dst, op0)
17425			  && low_register_operand (op1, SImode))
17426			action = action_for_partial_flag_setting;
17427		      break;
17428
17429		    case NOT:
17430		      /* MVNS <Rd>,<Rm>  */
17431		      if (low_register_operand (op0, SImode))
17432			action = action_for_partial_flag_setting;
17433		      break;
17434
17435		    case NEG:
17436		      /* NEGS <Rd>,<Rm>  (a.k.a RSBS)  */
17437		      if (low_register_operand (op0, SImode))
17438			action = CONV;
17439		      break;
17440
17441		    case CONST_INT:
17442		      /* MOVS <Rd>,#<imm8>  */
17443		      if (CONST_INT_P (src)
17444			  && IN_RANGE (INTVAL (src), 0, 255))
17445			action = action_for_partial_flag_setting;
17446		      break;
17447
17448		    case REG:
17449		      /* MOVS and MOV<c> with registers have different
17450			 encodings, so are not relevant here.  */
17451		      break;
17452
17453		    default:
17454		      break;
17455		    }
17456		}
17457
17458	      if (action != SKIP)
17459		{
17460		  rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
17461		  rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
17462		  rtvec vec;
17463
17464		  if (action == SWAP_CONV)
17465		    {
17466		      src = copy_rtx (src);
17467		      XEXP (src, 0) = op1;
17468		      XEXP (src, 1) = op0;
17469		      pat = gen_rtx_SET (VOIDmode, dst, src);
17470		      vec = gen_rtvec (2, pat, clobber);
17471		    }
17472		  else /* action == CONV */
17473		    vec = gen_rtvec (2, pat, clobber);
17474
17475		  PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
17476		  INSN_CODE (insn) = -1;
17477		}
17478	    }
17479
17480	  if (NONDEBUG_INSN_P (insn))
17481	    df_simulate_one_insn_backwards (bb, insn, &live);
17482	}
17483    }
17484
17485  CLEAR_REG_SET (&live);
17486}
17487
17488/* Gcc puts the pool in the wrong place for ARM, since we can only
17489   load addresses a limited distance around the pc.  We do some
17490   special munging to move the constant pool values to the correct
17491   point in the code.  */
17492static void
17493arm_reorg (void)
17494{
17495  rtx_insn *insn;
17496  HOST_WIDE_INT address = 0;
17497  Mfix * fix;
17498
17499  if (TARGET_THUMB1)
17500    thumb1_reorg ();
17501  else if (TARGET_THUMB2)
17502    thumb2_reorg ();
17503
17504  /* Ensure all insns that must be split have been split at this point.
17505     Otherwise, the pool placement code below may compute incorrect
17506     insn lengths.  Note that when optimizing, all insns have already
17507     been split at this point.  */
17508  if (!optimize)
17509    split_all_insns_noflow ();
17510
17511  minipool_fix_head = minipool_fix_tail = NULL;
17512
17513  /* The first insn must always be a note, or the code below won't
17514     scan it properly.  */
17515  insn = get_insns ();
17516  gcc_assert (NOTE_P (insn));
17517  minipool_pad = 0;
17518
17519  /* Scan all the insns and record the operands that will need fixing.  */
17520  for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
17521    {
17522      if (BARRIER_P (insn))
17523	push_minipool_barrier (insn, address);
17524      else if (INSN_P (insn))
17525	{
17526	  rtx_jump_table_data *table;
17527
17528	  note_invalid_constants (insn, address, true);
17529	  address += get_attr_length (insn);
17530
17531	  /* If the insn is a vector jump, add the size of the table
17532	     and skip the table.  */
17533	  if (tablejump_p (insn, NULL, &table))
17534	    {
17535	      address += get_jump_table_size (table);
17536	      insn = table;
17537	    }
17538	}
17539      else if (LABEL_P (insn))
17540	/* Add the worst-case padding due to alignment.  We don't add
17541	   the _current_ padding because the minipool insertions
17542	   themselves might change it.  */
17543	address += get_label_padding (insn);
17544    }
17545
17546  fix = minipool_fix_head;
17547
17548  /* Now scan the fixups and perform the required changes.  */
17549  while (fix)
17550    {
17551      Mfix * ftmp;
17552      Mfix * fdel;
17553      Mfix *  last_added_fix;
17554      Mfix * last_barrier = NULL;
17555      Mfix * this_fix;
17556
17557      /* Skip any further barriers before the next fix.  */
17558      while (fix && BARRIER_P (fix->insn))
17559	fix = fix->next;
17560
17561      /* No more fixes.  */
17562      if (fix == NULL)
17563	break;
17564
17565      last_added_fix = NULL;
17566
17567      for (ftmp = fix; ftmp; ftmp = ftmp->next)
17568	{
17569	  if (BARRIER_P (ftmp->insn))
17570	    {
17571	      if (ftmp->address >= minipool_vector_head->max_address)
17572		break;
17573
17574	      last_barrier = ftmp;
17575	    }
17576	  else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
17577	    break;
17578
17579	  last_added_fix = ftmp;  /* Keep track of the last fix added.  */
17580	}
17581
17582      /* If we found a barrier, drop back to that; any fixes that we
17583	 could have reached but come after the barrier will now go in
17584	 the next mini-pool.  */
17585      if (last_barrier != NULL)
17586	{
17587	  /* Reduce the refcount for those fixes that won't go into this
17588	     pool after all.  */
17589	  for (fdel = last_barrier->next;
17590	       fdel && fdel != ftmp;
17591	       fdel = fdel->next)
17592	    {
17593	      fdel->minipool->refcount--;
17594	      fdel->minipool = NULL;
17595	    }
17596
17597	  ftmp = last_barrier;
17598	}
17599      else
17600        {
17601	  /* ftmp is first fix that we can't fit into this pool and
17602	     there no natural barriers that we could use.  Insert a
17603	     new barrier in the code somewhere between the previous
17604	     fix and this one, and arrange to jump around it.  */
17605	  HOST_WIDE_INT max_address;
17606
17607	  /* The last item on the list of fixes must be a barrier, so
17608	     we can never run off the end of the list of fixes without
17609	     last_barrier being set.  */
17610	  gcc_assert (ftmp);
17611
17612	  max_address = minipool_vector_head->max_address;
17613	  /* Check that there isn't another fix that is in range that
17614	     we couldn't fit into this pool because the pool was
17615	     already too large: we need to put the pool before such an
17616	     instruction.  The pool itself may come just after the
17617	     fix because create_fix_barrier also allows space for a
17618	     jump instruction.  */
17619	  if (ftmp->address < max_address)
17620	    max_address = ftmp->address + 1;
17621
17622	  last_barrier = create_fix_barrier (last_added_fix, max_address);
17623	}
17624
17625      assign_minipool_offsets (last_barrier);
17626
17627      while (ftmp)
17628	{
17629	  if (!BARRIER_P (ftmp->insn)
17630	      && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
17631		  == NULL))
17632	    break;
17633
17634	  ftmp = ftmp->next;
17635	}
17636
17637      /* Scan over the fixes we have identified for this pool, fixing them
17638	 up and adding the constants to the pool itself.  */
17639      for (this_fix = fix; this_fix && ftmp != this_fix;
17640	   this_fix = this_fix->next)
17641	if (!BARRIER_P (this_fix->insn))
17642	  {
17643	    rtx addr
17644	      = plus_constant (Pmode,
17645			       gen_rtx_LABEL_REF (VOIDmode,
17646						  minipool_vector_label),
17647			       this_fix->minipool->offset);
17648	    *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
17649	  }
17650
17651      dump_minipool (last_barrier->insn);
17652      fix = ftmp;
17653    }
17654
17655  /* From now on we must synthesize any constants that we can't handle
17656     directly.  This can happen if the RTL gets split during final
17657     instruction generation.  */
17658  cfun->machine->after_arm_reorg = 1;
17659
17660  /* Free the minipool memory.  */
17661  obstack_free (&minipool_obstack, minipool_startobj);
17662}
17663
17664/* Routines to output assembly language.  */
17665
17666/* Return string representation of passed in real value.  */
17667static const char *
17668fp_const_from_val (REAL_VALUE_TYPE *r)
17669{
17670  if (!fp_consts_inited)
17671    init_fp_table ();
17672
17673  gcc_assert (REAL_VALUES_EQUAL (*r, value_fp0));
17674  return "0";
17675}
17676
17677/* OPERANDS[0] is the entire list of insns that constitute pop,
17678   OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17679   is in the list, UPDATE is true iff the list contains explicit
17680   update of base register.  */
17681void
17682arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
17683                         bool update)
17684{
17685  int i;
17686  char pattern[100];
17687  int offset;
17688  const char *conditional;
17689  int num_saves = XVECLEN (operands[0], 0);
17690  unsigned int regno;
17691  unsigned int regno_base = REGNO (operands[1]);
17692
17693  offset = 0;
17694  offset += update ? 1 : 0;
17695  offset += return_pc ? 1 : 0;
17696
17697  /* Is the base register in the list?  */
17698  for (i = offset; i < num_saves; i++)
17699    {
17700      regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
17701      /* If SP is in the list, then the base register must be SP.  */
17702      gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
17703      /* If base register is in the list, there must be no explicit update.  */
17704      if (regno == regno_base)
17705        gcc_assert (!update);
17706    }
17707
17708  conditional = reverse ? "%?%D0" : "%?%d0";
17709  if ((regno_base == SP_REGNUM) && TARGET_UNIFIED_ASM)
17710    {
17711      /* Output pop (not stmfd) because it has a shorter encoding.  */
17712      gcc_assert (update);
17713      sprintf (pattern, "pop%s\t{", conditional);
17714    }
17715  else
17716    {
17717      /* Output ldmfd when the base register is SP, otherwise output ldmia.
17718         It's just a convention, their semantics are identical.  */
17719      if (regno_base == SP_REGNUM)
17720        sprintf (pattern, "ldm%sfd\t", conditional);
17721      else if (TARGET_UNIFIED_ASM)
17722        sprintf (pattern, "ldmia%s\t", conditional);
17723      else
17724        sprintf (pattern, "ldm%sia\t", conditional);
17725
17726      strcat (pattern, reg_names[regno_base]);
17727      if (update)
17728        strcat (pattern, "!, {");
17729      else
17730        strcat (pattern, ", {");
17731    }
17732
17733  /* Output the first destination register.  */
17734  strcat (pattern,
17735          reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
17736
17737  /* Output the rest of the destination registers.  */
17738  for (i = offset + 1; i < num_saves; i++)
17739    {
17740      strcat (pattern, ", ");
17741      strcat (pattern,
17742              reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
17743    }
17744
17745  strcat (pattern, "}");
17746
17747  if (IS_INTERRUPT (arm_current_func_type ()) && return_pc)
17748    strcat (pattern, "^");
17749
17750  output_asm_insn (pattern, &cond);
17751}
17752
17753
17754/* Output the assembly for a store multiple.  */
17755
17756const char *
17757vfp_output_vstmd (rtx * operands)
17758{
17759  char pattern[100];
17760  int p;
17761  int base;
17762  int i;
17763  rtx addr_reg = REG_P (XEXP (operands[0], 0))
17764		   ? XEXP (operands[0], 0)
17765		   : XEXP (XEXP (operands[0], 0), 0);
17766  bool push_p =  REGNO (addr_reg) == SP_REGNUM;
17767
17768  if (push_p)
17769    strcpy (pattern, "vpush%?.64\t{%P1");
17770  else
17771    strcpy (pattern, "vstmdb%?.64\t%m0!, {%P1");
17772
17773  p = strlen (pattern);
17774
17775  gcc_assert (REG_P (operands[1]));
17776
17777  base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
17778  for (i = 1; i < XVECLEN (operands[2], 0); i++)
17779    {
17780      p += sprintf (&pattern[p], ", d%d", base + i);
17781    }
17782  strcpy (&pattern[p], "}");
17783
17784  output_asm_insn (pattern, operands);
17785  return "";
17786}
17787
17788
17789/* Emit RTL to save block of VFP register pairs to the stack.  Returns the
17790   number of bytes pushed.  */
17791
17792static int
17793vfp_emit_fstmd (int base_reg, int count)
17794{
17795  rtx par;
17796  rtx dwarf;
17797  rtx tmp, reg;
17798  int i;
17799
17800  /* Workaround ARM10 VFPr1 bug.  Data corruption can occur when exactly two
17801     register pairs are stored by a store multiple insn.  We avoid this
17802     by pushing an extra pair.  */
17803  if (count == 2 && !arm_arch6)
17804    {
17805      if (base_reg == LAST_VFP_REGNUM - 3)
17806	base_reg -= 2;
17807      count++;
17808    }
17809
17810  /* FSTMD may not store more than 16 doubleword registers at once.  Split
17811     larger stores into multiple parts (up to a maximum of two, in
17812     practice).  */
17813  if (count > 16)
17814    {
17815      int saved;
17816      /* NOTE: base_reg is an internal register number, so each D register
17817         counts as 2.  */
17818      saved = vfp_emit_fstmd (base_reg + 32, count - 16);
17819      saved += vfp_emit_fstmd (base_reg, 16);
17820      return saved;
17821    }
17822
17823  par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
17824  dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
17825
17826  reg = gen_rtx_REG (DFmode, base_reg);
17827  base_reg += 2;
17828
17829  XVECEXP (par, 0, 0)
17830    = gen_rtx_SET (VOIDmode,
17831		   gen_frame_mem
17832		   (BLKmode,
17833		    gen_rtx_PRE_MODIFY (Pmode,
17834					stack_pointer_rtx,
17835					plus_constant
17836					(Pmode, stack_pointer_rtx,
17837					 - (count * 8)))
17838		    ),
17839		   gen_rtx_UNSPEC (BLKmode,
17840				   gen_rtvec (1, reg),
17841				   UNSPEC_PUSH_MULT));
17842
17843  tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
17844		     plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
17845  RTX_FRAME_RELATED_P (tmp) = 1;
17846  XVECEXP (dwarf, 0, 0) = tmp;
17847
17848  tmp = gen_rtx_SET (VOIDmode,
17849		     gen_frame_mem (DFmode, stack_pointer_rtx),
17850		     reg);
17851  RTX_FRAME_RELATED_P (tmp) = 1;
17852  XVECEXP (dwarf, 0, 1) = tmp;
17853
17854  for (i = 1; i < count; i++)
17855    {
17856      reg = gen_rtx_REG (DFmode, base_reg);
17857      base_reg += 2;
17858      XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
17859
17860      tmp = gen_rtx_SET (VOIDmode,
17861			 gen_frame_mem (DFmode,
17862					plus_constant (Pmode,
17863						       stack_pointer_rtx,
17864						       i * 8)),
17865			 reg);
17866      RTX_FRAME_RELATED_P (tmp) = 1;
17867      XVECEXP (dwarf, 0, i + 1) = tmp;
17868    }
17869
17870  par = emit_insn (par);
17871  add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
17872  RTX_FRAME_RELATED_P (par) = 1;
17873
17874  return count * 8;
17875}
17876
17877/* Emit a call instruction with pattern PAT.  ADDR is the address of
17878   the call target.  */
17879
17880void
17881arm_emit_call_insn (rtx pat, rtx addr, bool sibcall)
17882{
17883  rtx insn;
17884
17885  insn = emit_call_insn (pat);
17886
17887  /* The PIC register is live on entry to VxWorks PIC PLT entries.
17888     If the call might use such an entry, add a use of the PIC register
17889     to the instruction's CALL_INSN_FUNCTION_USAGE.  */
17890  if (TARGET_VXWORKS_RTP
17891      && flag_pic
17892      && !sibcall
17893      && GET_CODE (addr) == SYMBOL_REF
17894      && (SYMBOL_REF_DECL (addr)
17895	  ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
17896	  : !SYMBOL_REF_LOCAL_P (addr)))
17897    {
17898      require_pic_register ();
17899      use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
17900    }
17901
17902  if (TARGET_AAPCS_BASED)
17903    {
17904      /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
17905	 linker.  We need to add an IP clobber to allow setting
17906	 TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true.  A CC clobber
17907	 is not needed since it's a fixed register.  */
17908      rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
17909      clobber_reg (fusage, gen_rtx_REG (word_mode, IP_REGNUM));
17910    }
17911}
17912
17913/* Output a 'call' insn.  */
17914const char *
17915output_call (rtx *operands)
17916{
17917  gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly.  */
17918
17919  /* Handle calls to lr using ip (which may be clobbered in subr anyway).  */
17920  if (REGNO (operands[0]) == LR_REGNUM)
17921    {
17922      operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
17923      output_asm_insn ("mov%?\t%0, %|lr", operands);
17924    }
17925
17926  output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17927
17928  if (TARGET_INTERWORK || arm_arch4t)
17929    output_asm_insn ("bx%?\t%0", operands);
17930  else
17931    output_asm_insn ("mov%?\t%|pc, %0", operands);
17932
17933  return "";
17934}
17935
17936/* Output a 'call' insn that is a reference in memory. This is
17937   disabled for ARMv5 and we prefer a blx instead because otherwise
17938   there's a significant performance overhead.  */
17939const char *
17940output_call_mem (rtx *operands)
17941{
17942  gcc_assert (!arm_arch5);
17943  if (TARGET_INTERWORK)
17944    {
17945      output_asm_insn ("ldr%?\t%|ip, %0", operands);
17946      output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17947      output_asm_insn ("bx%?\t%|ip", operands);
17948    }
17949  else if (regno_use_in (LR_REGNUM, operands[0]))
17950    {
17951      /* LR is used in the memory address.  We load the address in the
17952	 first instruction.  It's safe to use IP as the target of the
17953	 load since the call will kill it anyway.  */
17954      output_asm_insn ("ldr%?\t%|ip, %0", operands);
17955      output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17956      if (arm_arch4t)
17957	output_asm_insn ("bx%?\t%|ip", operands);
17958      else
17959	output_asm_insn ("mov%?\t%|pc, %|ip", operands);
17960    }
17961  else
17962    {
17963      output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17964      output_asm_insn ("ldr%?\t%|pc, %0", operands);
17965    }
17966
17967  return "";
17968}
17969
17970
17971/* Output a move from arm registers to arm registers of a long double
17972   OPERANDS[0] is the destination.
17973   OPERANDS[1] is the source.  */
17974const char *
17975output_mov_long_double_arm_from_arm (rtx *operands)
17976{
17977  /* We have to be careful here because the two might overlap.  */
17978  int dest_start = REGNO (operands[0]);
17979  int src_start = REGNO (operands[1]);
17980  rtx ops[2];
17981  int i;
17982
17983  if (dest_start < src_start)
17984    {
17985      for (i = 0; i < 3; i++)
17986	{
17987	  ops[0] = gen_rtx_REG (SImode, dest_start + i);
17988	  ops[1] = gen_rtx_REG (SImode, src_start + i);
17989	  output_asm_insn ("mov%?\t%0, %1", ops);
17990	}
17991    }
17992  else
17993    {
17994      for (i = 2; i >= 0; i--)
17995	{
17996	  ops[0] = gen_rtx_REG (SImode, dest_start + i);
17997	  ops[1] = gen_rtx_REG (SImode, src_start + i);
17998	  output_asm_insn ("mov%?\t%0, %1", ops);
17999	}
18000    }
18001
18002  return "";
18003}
18004
18005void
18006arm_emit_movpair (rtx dest, rtx src)
18007 {
18008  /* If the src is an immediate, simplify it.  */
18009  if (CONST_INT_P (src))
18010    {
18011      HOST_WIDE_INT val = INTVAL (src);
18012      emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
18013      if ((val >> 16) & 0x0000ffff)
18014        emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
18015                                             GEN_INT (16)),
18016                       GEN_INT ((val >> 16) & 0x0000ffff));
18017      return;
18018    }
18019   emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
18020   emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
18021 }
18022
18023/* Output a move between double words.  It must be REG<-MEM
18024   or MEM<-REG.  */
18025const char *
18026output_move_double (rtx *operands, bool emit, int *count)
18027{
18028  enum rtx_code code0 = GET_CODE (operands[0]);
18029  enum rtx_code code1 = GET_CODE (operands[1]);
18030  rtx otherops[3];
18031  if (count)
18032    *count = 1;
18033
18034  /* The only case when this might happen is when
18035     you are looking at the length of a DImode instruction
18036     that has an invalid constant in it.  */
18037  if (code0 == REG && code1 != MEM)
18038    {
18039      gcc_assert (!emit);
18040      *count = 2;
18041      return "";
18042    }
18043
18044  if (code0 == REG)
18045    {
18046      unsigned int reg0 = REGNO (operands[0]);
18047
18048      otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
18049
18050      gcc_assert (code1 == MEM);  /* Constraints should ensure this.  */
18051
18052      switch (GET_CODE (XEXP (operands[1], 0)))
18053	{
18054	case REG:
18055
18056	  if (emit)
18057	    {
18058	      if (TARGET_LDRD
18059		  && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
18060		output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
18061	      else
18062		output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
18063	    }
18064	  break;
18065
18066	case PRE_INC:
18067	  gcc_assert (TARGET_LDRD);
18068	  if (emit)
18069	    output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
18070	  break;
18071
18072	case PRE_DEC:
18073	  if (emit)
18074	    {
18075	      if (TARGET_LDRD)
18076		output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
18077	      else
18078		output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
18079	    }
18080	  break;
18081
18082	case POST_INC:
18083	  if (emit)
18084	    {
18085	      if (TARGET_LDRD)
18086		output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands);
18087	      else
18088		output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
18089	    }
18090	  break;
18091
18092	case POST_DEC:
18093	  gcc_assert (TARGET_LDRD);
18094	  if (emit)
18095	    output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
18096	  break;
18097
18098	case PRE_MODIFY:
18099	case POST_MODIFY:
18100	  /* Autoicrement addressing modes should never have overlapping
18101	     base and destination registers, and overlapping index registers
18102	     are already prohibited, so this doesn't need to worry about
18103	     fix_cm3_ldrd.  */
18104	  otherops[0] = operands[0];
18105	  otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
18106	  otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
18107
18108	  if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
18109	    {
18110	      if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
18111		{
18112		  /* Registers overlap so split out the increment.  */
18113		  if (emit)
18114		    {
18115		      output_asm_insn ("add%?\t%1, %1, %2", otherops);
18116		      output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
18117		    }
18118		  if (count)
18119		    *count = 2;
18120		}
18121	      else
18122		{
18123		  /* Use a single insn if we can.
18124		     FIXME: IWMMXT allows offsets larger than ldrd can
18125		     handle, fix these up with a pair of ldr.  */
18126		  if (TARGET_THUMB2
18127		      || !CONST_INT_P (otherops[2])
18128		      || (INTVAL (otherops[2]) > -256
18129			  && INTVAL (otherops[2]) < 256))
18130		    {
18131		      if (emit)
18132			output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
18133		    }
18134		  else
18135		    {
18136		      if (emit)
18137			{
18138			  output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
18139			  output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18140			}
18141		      if (count)
18142			*count = 2;
18143
18144		    }
18145		}
18146	    }
18147	  else
18148	    {
18149	      /* Use a single insn if we can.
18150		 FIXME: IWMMXT allows offsets larger than ldrd can handle,
18151		 fix these up with a pair of ldr.  */
18152	      if (TARGET_THUMB2
18153		  || !CONST_INT_P (otherops[2])
18154		  || (INTVAL (otherops[2]) > -256
18155		      && INTVAL (otherops[2]) < 256))
18156		{
18157		  if (emit)
18158		    output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
18159		}
18160	      else
18161		{
18162		  if (emit)
18163		    {
18164		      output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18165		      output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
18166		    }
18167		  if (count)
18168		    *count = 2;
18169		}
18170	    }
18171	  break;
18172
18173	case LABEL_REF:
18174	case CONST:
18175	  /* We might be able to use ldrd %0, %1 here.  However the range is
18176	     different to ldr/adr, and it is broken on some ARMv7-M
18177	     implementations.  */
18178	  /* Use the second register of the pair to avoid problematic
18179	     overlap.  */
18180	  otherops[1] = operands[1];
18181	  if (emit)
18182	    output_asm_insn ("adr%?\t%0, %1", otherops);
18183	  operands[1] = otherops[0];
18184	  if (emit)
18185	    {
18186	      if (TARGET_LDRD)
18187		output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
18188	      else
18189		output_asm_insn ("ldm%(ia%)\t%1, %M0", operands);
18190	    }
18191
18192	  if (count)
18193	    *count = 2;
18194	  break;
18195
18196	  /* ??? This needs checking for thumb2.  */
18197	default:
18198	  if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
18199			       GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
18200	    {
18201	      otherops[0] = operands[0];
18202	      otherops[1] = XEXP (XEXP (operands[1], 0), 0);
18203	      otherops[2] = XEXP (XEXP (operands[1], 0), 1);
18204
18205	      if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
18206		{
18207		  if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18208		    {
18209		      switch ((int) INTVAL (otherops[2]))
18210			{
18211			case -8:
18212			  if (emit)
18213			    output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
18214			  return "";
18215			case -4:
18216			  if (TARGET_THUMB2)
18217			    break;
18218			  if (emit)
18219			    output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
18220			  return "";
18221			case 4:
18222			  if (TARGET_THUMB2)
18223			    break;
18224			  if (emit)
18225			    output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
18226			  return "";
18227			}
18228		    }
18229		  otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
18230		  operands[1] = otherops[0];
18231		  if (TARGET_LDRD
18232		      && (REG_P (otherops[2])
18233			  || TARGET_THUMB2
18234			  || (CONST_INT_P (otherops[2])
18235			      && INTVAL (otherops[2]) > -256
18236			      && INTVAL (otherops[2]) < 256)))
18237		    {
18238		      if (reg_overlap_mentioned_p (operands[0],
18239						   otherops[2]))
18240			{
18241			  /* Swap base and index registers over to
18242			     avoid a conflict.  */
18243			  std::swap (otherops[1], otherops[2]);
18244			}
18245		      /* If both registers conflict, it will usually
18246			 have been fixed by a splitter.  */
18247		      if (reg_overlap_mentioned_p (operands[0], otherops[2])
18248			  || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
18249			{
18250			  if (emit)
18251			    {
18252			      output_asm_insn ("add%?\t%0, %1, %2", otherops);
18253			      output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
18254			    }
18255			  if (count)
18256			    *count = 2;
18257			}
18258		      else
18259			{
18260			  otherops[0] = operands[0];
18261			  if (emit)
18262			    output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
18263			}
18264		      return "";
18265		    }
18266
18267		  if (CONST_INT_P (otherops[2]))
18268		    {
18269		      if (emit)
18270			{
18271			  if (!(const_ok_for_arm (INTVAL (otherops[2]))))
18272			    output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
18273			  else
18274			    output_asm_insn ("add%?\t%0, %1, %2", otherops);
18275			}
18276		    }
18277		  else
18278		    {
18279		      if (emit)
18280			output_asm_insn ("add%?\t%0, %1, %2", otherops);
18281		    }
18282		}
18283	      else
18284		{
18285		  if (emit)
18286		    output_asm_insn ("sub%?\t%0, %1, %2", otherops);
18287		}
18288
18289	      if (count)
18290		*count = 2;
18291
18292	      if (TARGET_LDRD)
18293		return "ldr%(d%)\t%0, [%1]";
18294
18295	      return "ldm%(ia%)\t%1, %M0";
18296	    }
18297	  else
18298	    {
18299	      otherops[1] = adjust_address (operands[1], SImode, 4);
18300	      /* Take care of overlapping base/data reg.  */
18301	      if (reg_mentioned_p (operands[0], operands[1]))
18302		{
18303		  if (emit)
18304		    {
18305		      output_asm_insn ("ldr%?\t%0, %1", otherops);
18306		      output_asm_insn ("ldr%?\t%0, %1", operands);
18307		    }
18308		  if (count)
18309		    *count = 2;
18310
18311		}
18312	      else
18313		{
18314		  if (emit)
18315		    {
18316		      output_asm_insn ("ldr%?\t%0, %1", operands);
18317		      output_asm_insn ("ldr%?\t%0, %1", otherops);
18318		    }
18319		  if (count)
18320		    *count = 2;
18321		}
18322	    }
18323	}
18324    }
18325  else
18326    {
18327      /* Constraints should ensure this.  */
18328      gcc_assert (code0 == MEM && code1 == REG);
18329      gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
18330                  || (TARGET_ARM && TARGET_LDRD));
18331
18332      switch (GET_CODE (XEXP (operands[0], 0)))
18333        {
18334	case REG:
18335	  if (emit)
18336	    {
18337	      if (TARGET_LDRD)
18338		output_asm_insn ("str%(d%)\t%1, [%m0]", operands);
18339	      else
18340		output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
18341	    }
18342	  break;
18343
18344        case PRE_INC:
18345	  gcc_assert (TARGET_LDRD);
18346	  if (emit)
18347	    output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
18348	  break;
18349
18350        case PRE_DEC:
18351	  if (emit)
18352	    {
18353	      if (TARGET_LDRD)
18354		output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
18355	      else
18356		output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
18357	    }
18358	  break;
18359
18360        case POST_INC:
18361	  if (emit)
18362	    {
18363	      if (TARGET_LDRD)
18364		output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands);
18365	      else
18366		output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
18367	    }
18368	  break;
18369
18370        case POST_DEC:
18371	  gcc_assert (TARGET_LDRD);
18372	  if (emit)
18373	    output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
18374	  break;
18375
18376	case PRE_MODIFY:
18377	case POST_MODIFY:
18378	  otherops[0] = operands[1];
18379	  otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
18380	  otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
18381
18382	  /* IWMMXT allows offsets larger than ldrd can handle,
18383	     fix these up with a pair of ldr.  */
18384	  if (!TARGET_THUMB2
18385	      && CONST_INT_P (otherops[2])
18386	      && (INTVAL(otherops[2]) <= -256
18387		  || INTVAL(otherops[2]) >= 256))
18388	    {
18389	      if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18390		{
18391		  if (emit)
18392		    {
18393		      output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
18394		      output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18395		    }
18396		  if (count)
18397		    *count = 2;
18398		}
18399	      else
18400		{
18401		  if (emit)
18402		    {
18403		      output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18404		      output_asm_insn ("str%?\t%0, [%1], %2", otherops);
18405		    }
18406		  if (count)
18407		    *count = 2;
18408		}
18409	    }
18410	  else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18411	    {
18412	      if (emit)
18413		output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
18414	    }
18415	  else
18416	    {
18417	      if (emit)
18418		output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
18419	    }
18420	  break;
18421
18422	case PLUS:
18423	  otherops[2] = XEXP (XEXP (operands[0], 0), 1);
18424	  if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18425	    {
18426	      switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
18427		{
18428		case -8:
18429		  if (emit)
18430		    output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
18431		  return "";
18432
18433		case -4:
18434		  if (TARGET_THUMB2)
18435		    break;
18436		  if (emit)
18437		    output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
18438		  return "";
18439
18440		case 4:
18441		  if (TARGET_THUMB2)
18442		    break;
18443		  if (emit)
18444		    output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
18445		  return "";
18446		}
18447	    }
18448	  if (TARGET_LDRD
18449	      && (REG_P (otherops[2])
18450		  || TARGET_THUMB2
18451		  || (CONST_INT_P (otherops[2])
18452		      && INTVAL (otherops[2]) > -256
18453		      && INTVAL (otherops[2]) < 256)))
18454	    {
18455	      otherops[0] = operands[1];
18456	      otherops[1] = XEXP (XEXP (operands[0], 0), 0);
18457	      if (emit)
18458		output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
18459	      return "";
18460	    }
18461	  /* Fall through */
18462
18463        default:
18464	  otherops[0] = adjust_address (operands[0], SImode, 4);
18465	  otherops[1] = operands[1];
18466	  if (emit)
18467	    {
18468	      output_asm_insn ("str%?\t%1, %0", operands);
18469	      output_asm_insn ("str%?\t%H1, %0", otherops);
18470	    }
18471	  if (count)
18472	    *count = 2;
18473	}
18474    }
18475
18476  return "";
18477}
18478
18479/* Output a move, load or store for quad-word vectors in ARM registers.  Only
18480   handles MEMs accepted by neon_vector_mem_operand with TYPE=1.  */
18481
18482const char *
18483output_move_quad (rtx *operands)
18484{
18485  if (REG_P (operands[0]))
18486    {
18487      /* Load, or reg->reg move.  */
18488
18489      if (MEM_P (operands[1]))
18490        {
18491          switch (GET_CODE (XEXP (operands[1], 0)))
18492            {
18493            case REG:
18494              output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
18495              break;
18496
18497            case LABEL_REF:
18498            case CONST:
18499              output_asm_insn ("adr%?\t%0, %1", operands);
18500              output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
18501              break;
18502
18503            default:
18504              gcc_unreachable ();
18505            }
18506        }
18507      else
18508        {
18509          rtx ops[2];
18510          int dest, src, i;
18511
18512          gcc_assert (REG_P (operands[1]));
18513
18514          dest = REGNO (operands[0]);
18515          src = REGNO (operands[1]);
18516
18517          /* This seems pretty dumb, but hopefully GCC won't try to do it
18518             very often.  */
18519          if (dest < src)
18520            for (i = 0; i < 4; i++)
18521              {
18522                ops[0] = gen_rtx_REG (SImode, dest + i);
18523                ops[1] = gen_rtx_REG (SImode, src + i);
18524                output_asm_insn ("mov%?\t%0, %1", ops);
18525              }
18526          else
18527            for (i = 3; i >= 0; i--)
18528              {
18529                ops[0] = gen_rtx_REG (SImode, dest + i);
18530                ops[1] = gen_rtx_REG (SImode, src + i);
18531                output_asm_insn ("mov%?\t%0, %1", ops);
18532              }
18533        }
18534    }
18535  else
18536    {
18537      gcc_assert (MEM_P (operands[0]));
18538      gcc_assert (REG_P (operands[1]));
18539      gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
18540
18541      switch (GET_CODE (XEXP (operands[0], 0)))
18542        {
18543        case REG:
18544          output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
18545          break;
18546
18547        default:
18548          gcc_unreachable ();
18549        }
18550    }
18551
18552  return "";
18553}
18554
18555/* Output a VFP load or store instruction.  */
18556
18557const char *
18558output_move_vfp (rtx *operands)
18559{
18560  rtx reg, mem, addr, ops[2];
18561  int load = REG_P (operands[0]);
18562  int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
18563  int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
18564  const char *templ;
18565  char buff[50];
18566  machine_mode mode;
18567
18568  reg = operands[!load];
18569  mem = operands[load];
18570
18571  mode = GET_MODE (reg);
18572
18573  gcc_assert (REG_P (reg));
18574  gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
18575  gcc_assert (mode == SFmode
18576	      || mode == DFmode
18577	      || mode == SImode
18578	      || mode == DImode
18579              || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
18580  gcc_assert (MEM_P (mem));
18581
18582  addr = XEXP (mem, 0);
18583
18584  switch (GET_CODE (addr))
18585    {
18586    case PRE_DEC:
18587      templ = "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
18588      ops[0] = XEXP (addr, 0);
18589      ops[1] = reg;
18590      break;
18591
18592    case POST_INC:
18593      templ = "v%smia%%?.%s\t%%0!, {%%%s1}%s";
18594      ops[0] = XEXP (addr, 0);
18595      ops[1] = reg;
18596      break;
18597
18598    default:
18599      templ = "v%sr%%?.%s\t%%%s0, %%1%s";
18600      ops[0] = reg;
18601      ops[1] = mem;
18602      break;
18603    }
18604
18605  sprintf (buff, templ,
18606	   load ? "ld" : "st",
18607	   dp ? "64" : "32",
18608	   dp ? "P" : "",
18609	   integer_p ? "\t%@ int" : "");
18610  output_asm_insn (buff, ops);
18611
18612  return "";
18613}
18614
18615/* Output a Neon double-word or quad-word load or store, or a load
18616   or store for larger structure modes.
18617
18618   WARNING: The ordering of elements is weird in big-endian mode,
18619   because the EABI requires that vectors stored in memory appear
18620   as though they were stored by a VSTM, as required by the EABI.
18621   GCC RTL defines element ordering based on in-memory order.
18622   This can be different from the architectural ordering of elements
18623   within a NEON register. The intrinsics defined in arm_neon.h use the
18624   NEON register element ordering, not the GCC RTL element ordering.
18625
18626   For example, the in-memory ordering of a big-endian a quadword
18627   vector with 16-bit elements when stored from register pair {d0,d1}
18628   will be (lowest address first, d0[N] is NEON register element N):
18629
18630     [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18631
18632   When necessary, quadword registers (dN, dN+1) are moved to ARM
18633   registers from rN in the order:
18634
18635     dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18636
18637   So that STM/LDM can be used on vectors in ARM registers, and the
18638   same memory layout will result as if VSTM/VLDM were used.
18639
18640   Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18641   possible, which allows use of appropriate alignment tags.
18642   Note that the choice of "64" is independent of the actual vector
18643   element size; this size simply ensures that the behavior is
18644   equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18645
18646   Due to limitations of those instructions, use of VST1.64/VLD1.64
18647   is not possible if:
18648    - the address contains PRE_DEC, or
18649    - the mode refers to more than 4 double-word registers
18650
18651   In those cases, it would be possible to replace VSTM/VLDM by a
18652   sequence of instructions; this is not currently implemented since
18653   this is not certain to actually improve performance.  */
18654
18655const char *
18656output_move_neon (rtx *operands)
18657{
18658  rtx reg, mem, addr, ops[2];
18659  int regno, nregs, load = REG_P (operands[0]);
18660  const char *templ;
18661  char buff[50];
18662  machine_mode mode;
18663
18664  reg = operands[!load];
18665  mem = operands[load];
18666
18667  mode = GET_MODE (reg);
18668
18669  gcc_assert (REG_P (reg));
18670  regno = REGNO (reg);
18671  nregs = HARD_REGNO_NREGS (regno, mode) / 2;
18672  gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
18673	      || NEON_REGNO_OK_FOR_QUAD (regno));
18674  gcc_assert (VALID_NEON_DREG_MODE (mode)
18675	      || VALID_NEON_QREG_MODE (mode)
18676	      || VALID_NEON_STRUCT_MODE (mode));
18677  gcc_assert (MEM_P (mem));
18678
18679  addr = XEXP (mem, 0);
18680
18681  /* Strip off const from addresses like (const (plus (...))).  */
18682  if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18683    addr = XEXP (addr, 0);
18684
18685  switch (GET_CODE (addr))
18686    {
18687    case POST_INC:
18688      /* We have to use vldm / vstm for too-large modes.  */
18689      if (nregs > 4)
18690	{
18691	  templ = "v%smia%%?\t%%0!, %%h1";
18692	  ops[0] = XEXP (addr, 0);
18693	}
18694      else
18695	{
18696	  templ = "v%s1.64\t%%h1, %%A0";
18697	  ops[0] = mem;
18698	}
18699      ops[1] = reg;
18700      break;
18701
18702    case PRE_DEC:
18703      /* We have to use vldm / vstm in this case, since there is no
18704	 pre-decrement form of the vld1 / vst1 instructions.  */
18705      templ = "v%smdb%%?\t%%0!, %%h1";
18706      ops[0] = XEXP (addr, 0);
18707      ops[1] = reg;
18708      break;
18709
18710    case POST_MODIFY:
18711      /* FIXME: Not currently enabled in neon_vector_mem_operand.  */
18712      gcc_unreachable ();
18713
18714    case REG:
18715      /* We have to use vldm / vstm for too-large modes.  */
18716      if (nregs > 1)
18717	{
18718	  if (nregs > 4)
18719	    templ = "v%smia%%?\t%%m0, %%h1";
18720	  else
18721	    templ = "v%s1.64\t%%h1, %%A0";
18722
18723	  ops[0] = mem;
18724	  ops[1] = reg;
18725	  break;
18726	}
18727      /* Fall through.  */
18728    case LABEL_REF:
18729    case PLUS:
18730      {
18731	int i;
18732	int overlap = -1;
18733	for (i = 0; i < nregs; i++)
18734	  {
18735	    /* We're only using DImode here because it's a convenient size.  */
18736	    ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
18737	    ops[1] = adjust_address (mem, DImode, 8 * i);
18738	    if (reg_overlap_mentioned_p (ops[0], mem))
18739	      {
18740		gcc_assert (overlap == -1);
18741		overlap = i;
18742	      }
18743	    else
18744	      {
18745		sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18746		output_asm_insn (buff, ops);
18747	      }
18748	  }
18749	if (overlap != -1)
18750	  {
18751	    ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
18752	    ops[1] = adjust_address (mem, SImode, 8 * overlap);
18753	    sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18754	    output_asm_insn (buff, ops);
18755	  }
18756
18757        return "";
18758      }
18759
18760    default:
18761      gcc_unreachable ();
18762    }
18763
18764  sprintf (buff, templ, load ? "ld" : "st");
18765  output_asm_insn (buff, ops);
18766
18767  return "";
18768}
18769
18770/* Compute and return the length of neon_mov<mode>, where <mode> is
18771   one of VSTRUCT modes: EI, OI, CI or XI.  */
18772int
18773arm_attr_length_move_neon (rtx_insn *insn)
18774{
18775  rtx reg, mem, addr;
18776  int load;
18777  machine_mode mode;
18778
18779  extract_insn_cached (insn);
18780
18781  if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
18782    {
18783      mode = GET_MODE (recog_data.operand[0]);
18784      switch (mode)
18785	{
18786	case EImode:
18787	case OImode:
18788	  return 8;
18789	case CImode:
18790	  return 12;
18791	case XImode:
18792	  return 16;
18793	default:
18794	  gcc_unreachable ();
18795	}
18796    }
18797
18798  load = REG_P (recog_data.operand[0]);
18799  reg = recog_data.operand[!load];
18800  mem = recog_data.operand[load];
18801
18802  gcc_assert (MEM_P (mem));
18803
18804  mode = GET_MODE (reg);
18805  addr = XEXP (mem, 0);
18806
18807  /* Strip off const from addresses like (const (plus (...))).  */
18808  if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18809    addr = XEXP (addr, 0);
18810
18811  if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
18812    {
18813      int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
18814      return insns * 4;
18815    }
18816  else
18817    return 4;
18818}
18819
18820/* Return nonzero if the offset in the address is an immediate.  Otherwise,
18821   return zero.  */
18822
18823int
18824arm_address_offset_is_imm (rtx_insn *insn)
18825{
18826  rtx mem, addr;
18827
18828  extract_insn_cached (insn);
18829
18830  if (REG_P (recog_data.operand[0]))
18831    return 0;
18832
18833  mem = recog_data.operand[0];
18834
18835  gcc_assert (MEM_P (mem));
18836
18837  addr = XEXP (mem, 0);
18838
18839  if (REG_P (addr)
18840      || (GET_CODE (addr) == PLUS
18841	  && REG_P (XEXP (addr, 0))
18842	  && CONST_INT_P (XEXP (addr, 1))))
18843    return 1;
18844  else
18845    return 0;
18846}
18847
18848/* Output an ADD r, s, #n where n may be too big for one instruction.
18849   If adding zero to one register, output nothing.  */
18850const char *
18851output_add_immediate (rtx *operands)
18852{
18853  HOST_WIDE_INT n = INTVAL (operands[2]);
18854
18855  if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
18856    {
18857      if (n < 0)
18858	output_multi_immediate (operands,
18859				"sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
18860				-n);
18861      else
18862	output_multi_immediate (operands,
18863				"add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
18864				n);
18865    }
18866
18867  return "";
18868}
18869
18870/* Output a multiple immediate operation.
18871   OPERANDS is the vector of operands referred to in the output patterns.
18872   INSTR1 is the output pattern to use for the first constant.
18873   INSTR2 is the output pattern to use for subsequent constants.
18874   IMMED_OP is the index of the constant slot in OPERANDS.
18875   N is the constant value.  */
18876static const char *
18877output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
18878			int immed_op, HOST_WIDE_INT n)
18879{
18880#if HOST_BITS_PER_WIDE_INT > 32
18881  n &= 0xffffffff;
18882#endif
18883
18884  if (n == 0)
18885    {
18886      /* Quick and easy output.  */
18887      operands[immed_op] = const0_rtx;
18888      output_asm_insn (instr1, operands);
18889    }
18890  else
18891    {
18892      int i;
18893      const char * instr = instr1;
18894
18895      /* Note that n is never zero here (which would give no output).  */
18896      for (i = 0; i < 32; i += 2)
18897	{
18898	  if (n & (3 << i))
18899	    {
18900	      operands[immed_op] = GEN_INT (n & (255 << i));
18901	      output_asm_insn (instr, operands);
18902	      instr = instr2;
18903	      i += 6;
18904	    }
18905	}
18906    }
18907
18908  return "";
18909}
18910
18911/* Return the name of a shifter operation.  */
18912static const char *
18913arm_shift_nmem(enum rtx_code code)
18914{
18915  switch (code)
18916    {
18917    case ASHIFT:
18918      return ARM_LSL_NAME;
18919
18920    case ASHIFTRT:
18921      return "asr";
18922
18923    case LSHIFTRT:
18924      return "lsr";
18925
18926    case ROTATERT:
18927      return "ror";
18928
18929    default:
18930      abort();
18931    }
18932}
18933
18934/* Return the appropriate ARM instruction for the operation code.
18935   The returned result should not be overwritten.  OP is the rtx of the
18936   operation.  SHIFT_FIRST_ARG is TRUE if the first argument of the operator
18937   was shifted.  */
18938const char *
18939arithmetic_instr (rtx op, int shift_first_arg)
18940{
18941  switch (GET_CODE (op))
18942    {
18943    case PLUS:
18944      return "add";
18945
18946    case MINUS:
18947      return shift_first_arg ? "rsb" : "sub";
18948
18949    case IOR:
18950      return "orr";
18951
18952    case XOR:
18953      return "eor";
18954
18955    case AND:
18956      return "and";
18957
18958    case ASHIFT:
18959    case ASHIFTRT:
18960    case LSHIFTRT:
18961    case ROTATERT:
18962      return arm_shift_nmem(GET_CODE(op));
18963
18964    default:
18965      gcc_unreachable ();
18966    }
18967}
18968
18969/* Ensure valid constant shifts and return the appropriate shift mnemonic
18970   for the operation code.  The returned result should not be overwritten.
18971   OP is the rtx code of the shift.
18972   On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
18973   shift.  */
18974static const char *
18975shift_op (rtx op, HOST_WIDE_INT *amountp)
18976{
18977  const char * mnem;
18978  enum rtx_code code = GET_CODE (op);
18979
18980  switch (code)
18981    {
18982    case ROTATE:
18983      if (!CONST_INT_P (XEXP (op, 1)))
18984	{
18985	  output_operand_lossage ("invalid shift operand");
18986	  return NULL;
18987	}
18988
18989      code = ROTATERT;
18990      *amountp = 32 - INTVAL (XEXP (op, 1));
18991      mnem = "ror";
18992      break;
18993
18994    case ASHIFT:
18995    case ASHIFTRT:
18996    case LSHIFTRT:
18997    case ROTATERT:
18998      mnem = arm_shift_nmem(code);
18999      if (CONST_INT_P (XEXP (op, 1)))
19000	{
19001	  *amountp = INTVAL (XEXP (op, 1));
19002	}
19003      else if (REG_P (XEXP (op, 1)))
19004	{
19005	  *amountp = -1;
19006	  return mnem;
19007	}
19008      else
19009	{
19010	  output_operand_lossage ("invalid shift operand");
19011	  return NULL;
19012	}
19013      break;
19014
19015    case MULT:
19016      /* We never have to worry about the amount being other than a
19017	 power of 2, since this case can never be reloaded from a reg.  */
19018      if (!CONST_INT_P (XEXP (op, 1)))
19019	{
19020	  output_operand_lossage ("invalid shift operand");
19021	  return NULL;
19022	}
19023
19024      *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
19025
19026      /* Amount must be a power of two.  */
19027      if (*amountp & (*amountp - 1))
19028	{
19029	  output_operand_lossage ("invalid shift operand");
19030	  return NULL;
19031	}
19032
19033      *amountp = int_log2 (*amountp);
19034      return ARM_LSL_NAME;
19035
19036    default:
19037      output_operand_lossage ("invalid shift operand");
19038      return NULL;
19039    }
19040
19041  /* This is not 100% correct, but follows from the desire to merge
19042     multiplication by a power of 2 with the recognizer for a
19043     shift.  >=32 is not a valid shift for "lsl", so we must try and
19044     output a shift that produces the correct arithmetical result.
19045     Using lsr #32 is identical except for the fact that the carry bit
19046     is not set correctly if we set the flags; but we never use the
19047     carry bit from such an operation, so we can ignore that.  */
19048  if (code == ROTATERT)
19049    /* Rotate is just modulo 32.  */
19050    *amountp &= 31;
19051  else if (*amountp != (*amountp & 31))
19052    {
19053      if (code == ASHIFT)
19054	mnem = "lsr";
19055      *amountp = 32;
19056    }
19057
19058  /* Shifts of 0 are no-ops.  */
19059  if (*amountp == 0)
19060    return NULL;
19061
19062  return mnem;
19063}
19064
19065/* Obtain the shift from the POWER of two.  */
19066
19067static HOST_WIDE_INT
19068int_log2 (HOST_WIDE_INT power)
19069{
19070  HOST_WIDE_INT shift = 0;
19071
19072  while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
19073    {
19074      gcc_assert (shift <= 31);
19075      shift++;
19076    }
19077
19078  return shift;
19079}
19080
19081/* Output a .ascii pseudo-op, keeping track of lengths.  This is
19082   because /bin/as is horribly restrictive.  The judgement about
19083   whether or not each character is 'printable' (and can be output as
19084   is) or not (and must be printed with an octal escape) must be made
19085   with reference to the *host* character set -- the situation is
19086   similar to that discussed in the comments above pp_c_char in
19087   c-pretty-print.c.  */
19088
19089#define MAX_ASCII_LEN 51
19090
19091void
19092output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
19093{
19094  int i;
19095  int len_so_far = 0;
19096
19097  fputs ("\t.ascii\t\"", stream);
19098
19099  for (i = 0; i < len; i++)
19100    {
19101      int c = p[i];
19102
19103      if (len_so_far >= MAX_ASCII_LEN)
19104	{
19105	  fputs ("\"\n\t.ascii\t\"", stream);
19106	  len_so_far = 0;
19107	}
19108
19109      if (ISPRINT (c))
19110	{
19111	  if (c == '\\' || c == '\"')
19112	    {
19113	      putc ('\\', stream);
19114	      len_so_far++;
19115	    }
19116	  putc (c, stream);
19117	  len_so_far++;
19118	}
19119      else
19120	{
19121	  fprintf (stream, "\\%03o", c);
19122	  len_so_far += 4;
19123	}
19124    }
19125
19126  fputs ("\"\n", stream);
19127}
19128
19129/* Whether a register is callee saved or not.  This is necessary because high
19130   registers are marked as caller saved when optimizing for size on Thumb-1
19131   targets despite being callee saved in order to avoid using them.  */
19132#define callee_saved_reg_p(reg) \
19133  (!call_used_regs[reg] \
19134   || (TARGET_THUMB1 && optimize_size \
19135       && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
19136
19137/* Compute the register save mask for registers 0 through 12
19138   inclusive.  This code is used by arm_compute_save_reg_mask.  */
19139
19140static unsigned long
19141arm_compute_save_reg0_reg12_mask (void)
19142{
19143  unsigned long func_type = arm_current_func_type ();
19144  unsigned long save_reg_mask = 0;
19145  unsigned int reg;
19146
19147  if (IS_INTERRUPT (func_type))
19148    {
19149      unsigned int max_reg;
19150      /* Interrupt functions must not corrupt any registers,
19151	 even call clobbered ones.  If this is a leaf function
19152	 we can just examine the registers used by the RTL, but
19153	 otherwise we have to assume that whatever function is
19154	 called might clobber anything, and so we have to save
19155	 all the call-clobbered registers as well.  */
19156      if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
19157	/* FIQ handlers have registers r8 - r12 banked, so
19158	   we only need to check r0 - r7, Normal ISRs only
19159	   bank r14 and r15, so we must check up to r12.
19160	   r13 is the stack pointer which is always preserved,
19161	   so we do not need to consider it here.  */
19162	max_reg = 7;
19163      else
19164	max_reg = 12;
19165
19166      for (reg = 0; reg <= max_reg; reg++)
19167	if (df_regs_ever_live_p (reg)
19168	    || (! crtl->is_leaf && call_used_regs[reg]))
19169	  save_reg_mask |= (1 << reg);
19170
19171      /* Also save the pic base register if necessary.  */
19172      if (flag_pic
19173	  && !TARGET_SINGLE_PIC_BASE
19174	  && arm_pic_register != INVALID_REGNUM
19175	  && crtl->uses_pic_offset_table)
19176	save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19177    }
19178  else if (IS_VOLATILE(func_type))
19179    {
19180      /* For noreturn functions we historically omitted register saves
19181	 altogether.  However this really messes up debugging.  As a
19182	 compromise save just the frame pointers.  Combined with the link
19183	 register saved elsewhere this should be sufficient to get
19184	 a backtrace.  */
19185      if (frame_pointer_needed)
19186	save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19187      if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
19188	save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19189      if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
19190	save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
19191    }
19192  else
19193    {
19194      /* In the normal case we only need to save those registers
19195	 which are call saved and which are used by this function.  */
19196      for (reg = 0; reg <= 11; reg++)
19197	if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
19198	  save_reg_mask |= (1 << reg);
19199
19200      /* Handle the frame pointer as a special case.  */
19201      if (frame_pointer_needed)
19202	save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19203
19204      /* If we aren't loading the PIC register,
19205	 don't stack it even though it may be live.  */
19206      if (flag_pic
19207	  && !TARGET_SINGLE_PIC_BASE
19208	  && arm_pic_register != INVALID_REGNUM
19209	  && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
19210	      || crtl->uses_pic_offset_table))
19211	save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19212
19213      /* The prologue will copy SP into R0, so save it.  */
19214      if (IS_STACKALIGN (func_type))
19215	save_reg_mask |= 1;
19216    }
19217
19218  /* Save registers so the exception handler can modify them.  */
19219  if (crtl->calls_eh_return)
19220    {
19221      unsigned int i;
19222
19223      for (i = 0; ; i++)
19224	{
19225	  reg = EH_RETURN_DATA_REGNO (i);
19226	  if (reg == INVALID_REGNUM)
19227	    break;
19228	  save_reg_mask |= 1 << reg;
19229	}
19230    }
19231
19232  return save_reg_mask;
19233}
19234
19235/* Return true if r3 is live at the start of the function.  */
19236
19237static bool
19238arm_r3_live_at_start_p (void)
19239{
19240  /* Just look at cfg info, which is still close enough to correct at this
19241     point.  This gives false positives for broken functions that might use
19242     uninitialized data that happens to be allocated in r3, but who cares?  */
19243  return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
19244}
19245
19246/* Compute the number of bytes used to store the static chain register on the
19247   stack, above the stack frame.  We need to know this accurately to get the
19248   alignment of the rest of the stack frame correct.  */
19249
19250static int
19251arm_compute_static_chain_stack_bytes (void)
19252{
19253  /* See the defining assertion in arm_expand_prologue.  */
19254  if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM
19255      && IS_NESTED (arm_current_func_type ())
19256      && arm_r3_live_at_start_p ()
19257      && crtl->args.pretend_args_size == 0)
19258    return 4;
19259
19260  return 0;
19261}
19262
19263/* Compute a bit mask of which registers need to be
19264   saved on the stack for the current function.
19265   This is used by arm_get_frame_offsets, which may add extra registers.  */
19266
19267static unsigned long
19268arm_compute_save_reg_mask (void)
19269{
19270  unsigned int save_reg_mask = 0;
19271  unsigned long func_type = arm_current_func_type ();
19272  unsigned int reg;
19273
19274  if (IS_NAKED (func_type))
19275    /* This should never really happen.  */
19276    return 0;
19277
19278  /* If we are creating a stack frame, then we must save the frame pointer,
19279     IP (which will hold the old stack pointer), LR and the PC.  */
19280  if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19281    save_reg_mask |=
19282      (1 << ARM_HARD_FRAME_POINTER_REGNUM)
19283      | (1 << IP_REGNUM)
19284      | (1 << LR_REGNUM)
19285      | (1 << PC_REGNUM);
19286
19287  save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
19288
19289  /* Decide if we need to save the link register.
19290     Interrupt routines have their own banked link register,
19291     so they never need to save it.
19292     Otherwise if we do not use the link register we do not need to save
19293     it.  If we are pushing other registers onto the stack however, we
19294     can save an instruction in the epilogue by pushing the link register
19295     now and then popping it back into the PC.  This incurs extra memory
19296     accesses though, so we only do it when optimizing for size, and only
19297     if we know that we will not need a fancy return sequence.  */
19298  if (df_regs_ever_live_p (LR_REGNUM)
19299      || (save_reg_mask
19300	  && optimize_size
19301	  && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
19302	  && !crtl->tail_call_emit
19303	  && !crtl->calls_eh_return))
19304    save_reg_mask |= 1 << LR_REGNUM;
19305
19306  if (cfun->machine->lr_save_eliminated)
19307    save_reg_mask &= ~ (1 << LR_REGNUM);
19308
19309  if (TARGET_REALLY_IWMMXT
19310      && ((bit_count (save_reg_mask)
19311	   + ARM_NUM_INTS (crtl->args.pretend_args_size +
19312			   arm_compute_static_chain_stack_bytes())
19313	   ) % 2) != 0)
19314    {
19315      /* The total number of registers that are going to be pushed
19316	 onto the stack is odd.  We need to ensure that the stack
19317	 is 64-bit aligned before we start to save iWMMXt registers,
19318	 and also before we start to create locals.  (A local variable
19319	 might be a double or long long which we will load/store using
19320	 an iWMMXt instruction).  Therefore we need to push another
19321	 ARM register, so that the stack will be 64-bit aligned.  We
19322	 try to avoid using the arg registers (r0 -r3) as they might be
19323	 used to pass values in a tail call.  */
19324      for (reg = 4; reg <= 12; reg++)
19325	if ((save_reg_mask & (1 << reg)) == 0)
19326	  break;
19327
19328      if (reg <= 12)
19329	save_reg_mask |= (1 << reg);
19330      else
19331	{
19332	  cfun->machine->sibcall_blocked = 1;
19333	  save_reg_mask |= (1 << 3);
19334	}
19335    }
19336
19337  /* We may need to push an additional register for use initializing the
19338     PIC base register.  */
19339  if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
19340      && (save_reg_mask & THUMB2_WORK_REGS) == 0)
19341    {
19342      reg = thumb_find_work_register (1 << 4);
19343      if (!call_used_regs[reg])
19344	save_reg_mask |= (1 << reg);
19345    }
19346
19347  return save_reg_mask;
19348}
19349
19350
19351/* Compute a bit mask of which registers need to be
19352   saved on the stack for the current function.  */
19353static unsigned long
19354thumb1_compute_save_reg_mask (void)
19355{
19356  unsigned long mask;
19357  unsigned reg;
19358
19359  mask = 0;
19360  for (reg = 0; reg < 12; reg ++)
19361    if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
19362      mask |= 1 << reg;
19363
19364  if (flag_pic
19365      && !TARGET_SINGLE_PIC_BASE
19366      && arm_pic_register != INVALID_REGNUM
19367      && crtl->uses_pic_offset_table)
19368    mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19369
19370  /* See if we might need r11 for calls to _interwork_r11_call_via_rN().  */
19371  if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
19372    mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19373
19374  /* LR will also be pushed if any lo regs are pushed.  */
19375  if (mask & 0xff || thumb_force_lr_save ())
19376    mask |= (1 << LR_REGNUM);
19377
19378  /* Make sure we have a low work register if we need one.
19379     We will need one if we are going to push a high register,
19380     but we are not currently intending to push a low register.  */
19381  if ((mask & 0xff) == 0
19382      && ((mask & 0x0f00) || TARGET_BACKTRACE))
19383    {
19384      /* Use thumb_find_work_register to choose which register
19385	 we will use.  If the register is live then we will
19386	 have to push it.  Use LAST_LO_REGNUM as our fallback
19387	 choice for the register to select.  */
19388      reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
19389      /* Make sure the register returned by thumb_find_work_register is
19390	 not part of the return value.  */
19391      if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
19392	reg = LAST_LO_REGNUM;
19393
19394      if (callee_saved_reg_p (reg))
19395	mask |= 1 << reg;
19396    }
19397
19398  /* The 504 below is 8 bytes less than 512 because there are two possible
19399     alignment words.  We can't tell here if they will be present or not so we
19400     have to play it safe and assume that they are. */
19401  if ((CALLER_INTERWORKING_SLOT_SIZE +
19402       ROUND_UP_WORD (get_frame_size ()) +
19403       crtl->outgoing_args_size) >= 504)
19404    {
19405      /* This is the same as the code in thumb1_expand_prologue() which
19406	 determines which register to use for stack decrement. */
19407      for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
19408	if (mask & (1 << reg))
19409	  break;
19410
19411      if (reg > LAST_LO_REGNUM)
19412	{
19413	  /* Make sure we have a register available for stack decrement. */
19414	  mask |= 1 << LAST_LO_REGNUM;
19415	}
19416    }
19417
19418  return mask;
19419}
19420
19421
19422/* Return the number of bytes required to save VFP registers.  */
19423static int
19424arm_get_vfp_saved_size (void)
19425{
19426  unsigned int regno;
19427  int count;
19428  int saved;
19429
19430  saved = 0;
19431  /* Space for saved VFP registers.  */
19432  if (TARGET_HARD_FLOAT && TARGET_VFP)
19433    {
19434      count = 0;
19435      for (regno = FIRST_VFP_REGNUM;
19436	   regno < LAST_VFP_REGNUM;
19437	   regno += 2)
19438	{
19439	  if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
19440	      && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
19441	    {
19442	      if (count > 0)
19443		{
19444		  /* Workaround ARM10 VFPr1 bug.  */
19445		  if (count == 2 && !arm_arch6)
19446		    count++;
19447		  saved += count * 8;
19448		}
19449	      count = 0;
19450	    }
19451	  else
19452	    count++;
19453	}
19454      if (count > 0)
19455	{
19456	  if (count == 2 && !arm_arch6)
19457	    count++;
19458	  saved += count * 8;
19459	}
19460    }
19461  return saved;
19462}
19463
19464
19465/* Generate a function exit sequence.  If REALLY_RETURN is false, then do
19466   everything bar the final return instruction.  If simple_return is true,
19467   then do not output epilogue, because it has already been emitted in RTL.  */
19468const char *
19469output_return_instruction (rtx operand, bool really_return, bool reverse,
19470                           bool simple_return)
19471{
19472  char conditional[10];
19473  char instr[100];
19474  unsigned reg;
19475  unsigned long live_regs_mask;
19476  unsigned long func_type;
19477  arm_stack_offsets *offsets;
19478
19479  func_type = arm_current_func_type ();
19480
19481  if (IS_NAKED (func_type))
19482    return "";
19483
19484  if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
19485    {
19486      /* If this function was declared non-returning, and we have
19487	 found a tail call, then we have to trust that the called
19488	 function won't return.  */
19489      if (really_return)
19490	{
19491	  rtx ops[2];
19492
19493	  /* Otherwise, trap an attempted return by aborting.  */
19494	  ops[0] = operand;
19495	  ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
19496				       : "abort");
19497	  assemble_external_libcall (ops[1]);
19498	  output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
19499	}
19500
19501      return "";
19502    }
19503
19504  gcc_assert (!cfun->calls_alloca || really_return);
19505
19506  sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
19507
19508  cfun->machine->return_used_this_function = 1;
19509
19510  offsets = arm_get_frame_offsets ();
19511  live_regs_mask = offsets->saved_regs_mask;
19512
19513  if (!simple_return && live_regs_mask)
19514    {
19515      const char * return_reg;
19516
19517      /* If we do not have any special requirements for function exit
19518	 (e.g. interworking) then we can load the return address
19519	 directly into the PC.  Otherwise we must load it into LR.  */
19520      if (really_return
19521	  && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
19522	return_reg = reg_names[PC_REGNUM];
19523      else
19524	return_reg = reg_names[LR_REGNUM];
19525
19526      if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
19527	{
19528	  /* There are three possible reasons for the IP register
19529	     being saved.  1) a stack frame was created, in which case
19530	     IP contains the old stack pointer, or 2) an ISR routine
19531	     corrupted it, or 3) it was saved to align the stack on
19532	     iWMMXt.  In case 1, restore IP into SP, otherwise just
19533	     restore IP.  */
19534	  if (frame_pointer_needed)
19535	    {
19536	      live_regs_mask &= ~ (1 << IP_REGNUM);
19537	      live_regs_mask |=   (1 << SP_REGNUM);
19538	    }
19539	  else
19540	    gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
19541	}
19542
19543      /* On some ARM architectures it is faster to use LDR rather than
19544	 LDM to load a single register.  On other architectures, the
19545	 cost is the same.  In 26 bit mode, or for exception handlers,
19546	 we have to use LDM to load the PC so that the CPSR is also
19547	 restored.  */
19548      for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
19549	if (live_regs_mask == (1U << reg))
19550	  break;
19551
19552      if (reg <= LAST_ARM_REGNUM
19553	  && (reg != LR_REGNUM
19554	      || ! really_return
19555	      || ! IS_INTERRUPT (func_type)))
19556	{
19557	  sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
19558		   (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
19559	}
19560      else
19561	{
19562	  char *p;
19563	  int first = 1;
19564
19565	  /* Generate the load multiple instruction to restore the
19566	     registers.  Note we can get here, even if
19567	     frame_pointer_needed is true, but only if sp already
19568	     points to the base of the saved core registers.  */
19569	  if (live_regs_mask & (1 << SP_REGNUM))
19570	    {
19571	      unsigned HOST_WIDE_INT stack_adjust;
19572
19573	      stack_adjust = offsets->outgoing_args - offsets->saved_regs;
19574	      gcc_assert (stack_adjust == 0 || stack_adjust == 4);
19575
19576	      if (stack_adjust && arm_arch5 && TARGET_ARM)
19577		if (TARGET_UNIFIED_ASM)
19578		  sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
19579		else
19580		  sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
19581	      else
19582		{
19583		  /* If we can't use ldmib (SA110 bug),
19584		     then try to pop r3 instead.  */
19585		  if (stack_adjust)
19586		    live_regs_mask |= 1 << 3;
19587
19588		  if (TARGET_UNIFIED_ASM)
19589		    sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
19590		  else
19591		    sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
19592		}
19593	    }
19594	  else
19595	    if (TARGET_UNIFIED_ASM)
19596	      sprintf (instr, "pop%s\t{", conditional);
19597	    else
19598	      sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
19599
19600	  p = instr + strlen (instr);
19601
19602	  for (reg = 0; reg <= SP_REGNUM; reg++)
19603	    if (live_regs_mask & (1 << reg))
19604	      {
19605		int l = strlen (reg_names[reg]);
19606
19607		if (first)
19608		  first = 0;
19609		else
19610		  {
19611		    memcpy (p, ", ", 2);
19612		    p += 2;
19613		  }
19614
19615		memcpy (p, "%|", 2);
19616		memcpy (p + 2, reg_names[reg], l);
19617		p += l + 2;
19618	      }
19619
19620	  if (live_regs_mask & (1 << LR_REGNUM))
19621	    {
19622	      sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
19623	      /* If returning from an interrupt, restore the CPSR.  */
19624	      if (IS_INTERRUPT (func_type))
19625		strcat (p, "^");
19626	    }
19627	  else
19628	    strcpy (p, "}");
19629	}
19630
19631      output_asm_insn (instr, & operand);
19632
19633      /* See if we need to generate an extra instruction to
19634	 perform the actual function return.  */
19635      if (really_return
19636	  && func_type != ARM_FT_INTERWORKED
19637	  && (live_regs_mask & (1 << LR_REGNUM)) != 0)
19638	{
19639	  /* The return has already been handled
19640	     by loading the LR into the PC.  */
19641          return "";
19642	}
19643    }
19644
19645  if (really_return)
19646    {
19647      switch ((int) ARM_FUNC_TYPE (func_type))
19648	{
19649	case ARM_FT_ISR:
19650	case ARM_FT_FIQ:
19651	  /* ??? This is wrong for unified assembly syntax.  */
19652	  sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
19653	  break;
19654
19655	case ARM_FT_INTERWORKED:
19656	  sprintf (instr, "bx%s\t%%|lr", conditional);
19657	  break;
19658
19659	case ARM_FT_EXCEPTION:
19660	  /* ??? This is wrong for unified assembly syntax.  */
19661	  sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
19662	  break;
19663
19664	default:
19665	  /* Use bx if it's available.  */
19666	  if (arm_arch5 || arm_arch4t)
19667	    sprintf (instr, "bx%s\t%%|lr", conditional);
19668	  else
19669	    sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
19670	  break;
19671	}
19672
19673      output_asm_insn (instr, & operand);
19674    }
19675
19676  return "";
19677}
19678
19679/* Write the function name into the code section, directly preceding
19680   the function prologue.
19681
19682   Code will be output similar to this:
19683     t0
19684	 .ascii "arm_poke_function_name", 0
19685	 .align
19686     t1
19687	 .word 0xff000000 + (t1 - t0)
19688     arm_poke_function_name
19689	 mov     ip, sp
19690	 stmfd   sp!, {fp, ip, lr, pc}
19691	 sub     fp, ip, #4
19692
19693   When performing a stack backtrace, code can inspect the value
19694   of 'pc' stored at 'fp' + 0.  If the trace function then looks
19695   at location pc - 12 and the top 8 bits are set, then we know
19696   that there is a function name embedded immediately preceding this
19697   location and has length ((pc[-3]) & 0xff000000).
19698
19699   We assume that pc is declared as a pointer to an unsigned long.
19700
19701   It is of no benefit to output the function name if we are assembling
19702   a leaf function.  These function types will not contain a stack
19703   backtrace structure, therefore it is not possible to determine the
19704   function name.  */
19705void
19706arm_poke_function_name (FILE *stream, const char *name)
19707{
19708  unsigned long alignlength;
19709  unsigned long length;
19710  rtx           x;
19711
19712  length      = strlen (name) + 1;
19713  alignlength = ROUND_UP_WORD (length);
19714
19715  ASM_OUTPUT_ASCII (stream, name, length);
19716  ASM_OUTPUT_ALIGN (stream, 2);
19717  x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
19718  assemble_aligned_integer (UNITS_PER_WORD, x);
19719}
19720
19721/* Place some comments into the assembler stream
19722   describing the current function.  */
19723static void
19724arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
19725{
19726  unsigned long func_type;
19727
19728  /* ??? Do we want to print some of the below anyway?  */
19729  if (TARGET_THUMB1)
19730    return;
19731
19732  /* Sanity check.  */
19733  gcc_assert (!arm_ccfsm_state && !arm_target_insn);
19734
19735  func_type = arm_current_func_type ();
19736
19737  switch ((int) ARM_FUNC_TYPE (func_type))
19738    {
19739    default:
19740    case ARM_FT_NORMAL:
19741      break;
19742    case ARM_FT_INTERWORKED:
19743      asm_fprintf (f, "\t%@ Function supports interworking.\n");
19744      break;
19745    case ARM_FT_ISR:
19746      asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
19747      break;
19748    case ARM_FT_FIQ:
19749      asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
19750      break;
19751    case ARM_FT_EXCEPTION:
19752      asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
19753      break;
19754    }
19755
19756  if (IS_NAKED (func_type))
19757    asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19758
19759  if (IS_VOLATILE (func_type))
19760    asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
19761
19762  if (IS_NESTED (func_type))
19763    asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
19764  if (IS_STACKALIGN (func_type))
19765    asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
19766
19767  asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
19768	       crtl->args.size,
19769	       crtl->args.pretend_args_size, frame_size);
19770
19771  asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
19772	       frame_pointer_needed,
19773	       cfun->machine->uses_anonymous_args);
19774
19775  if (cfun->machine->lr_save_eliminated)
19776    asm_fprintf (f, "\t%@ link register save eliminated.\n");
19777
19778  if (crtl->calls_eh_return)
19779    asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
19780
19781}
19782
19783static void
19784arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
19785			      HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
19786{
19787  arm_stack_offsets *offsets;
19788
19789  if (TARGET_THUMB1)
19790    {
19791      int regno;
19792
19793      /* Emit any call-via-reg trampolines that are needed for v4t support
19794	 of call_reg and call_value_reg type insns.  */
19795      for (regno = 0; regno < LR_REGNUM; regno++)
19796	{
19797	  rtx label = cfun->machine->call_via[regno];
19798
19799	  if (label != NULL)
19800	    {
19801	      switch_to_section (function_section (current_function_decl));
19802	      targetm.asm_out.internal_label (asm_out_file, "L",
19803					      CODE_LABEL_NUMBER (label));
19804	      asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
19805	    }
19806	}
19807
19808      /* ??? Probably not safe to set this here, since it assumes that a
19809	 function will be emitted as assembly immediately after we generate
19810	 RTL for it.  This does not happen for inline functions.  */
19811      cfun->machine->return_used_this_function = 0;
19812    }
19813  else /* TARGET_32BIT */
19814    {
19815      /* We need to take into account any stack-frame rounding.  */
19816      offsets = arm_get_frame_offsets ();
19817
19818      gcc_assert (!use_return_insn (FALSE, NULL)
19819		  || (cfun->machine->return_used_this_function != 0)
19820		  || offsets->saved_regs == offsets->outgoing_args
19821		  || frame_pointer_needed);
19822    }
19823}
19824
19825/* Generate and emit a sequence of insns equivalent to PUSH, but using
19826   STR and STRD.  If an even number of registers are being pushed, one
19827   or more STRD patterns are created for each register pair.  If an
19828   odd number of registers are pushed, emit an initial STR followed by
19829   as many STRD instructions as are needed.  This works best when the
19830   stack is initially 64-bit aligned (the normal case), since it
19831   ensures that each STRD is also 64-bit aligned.  */
19832static void
19833thumb2_emit_strd_push (unsigned long saved_regs_mask)
19834{
19835  int num_regs = 0;
19836  int i;
19837  int regno;
19838  rtx par = NULL_RTX;
19839  rtx dwarf = NULL_RTX;
19840  rtx tmp;
19841  bool first = true;
19842
19843  num_regs = bit_count (saved_regs_mask);
19844
19845  /* Must be at least one register to save, and can't save SP or PC.  */
19846  gcc_assert (num_regs > 0 && num_regs <= 14);
19847  gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19848  gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19849
19850  /* Create sequence for DWARF info.  All the frame-related data for
19851     debugging is held in this wrapper.  */
19852  dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19853
19854  /* Describe the stack adjustment.  */
19855  tmp = gen_rtx_SET (VOIDmode,
19856		      stack_pointer_rtx,
19857		      plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19858  RTX_FRAME_RELATED_P (tmp) = 1;
19859  XVECEXP (dwarf, 0, 0) = tmp;
19860
19861  /* Find the first register.  */
19862  for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
19863    ;
19864
19865  i = 0;
19866
19867  /* If there's an odd number of registers to push.  Start off by
19868     pushing a single register.  This ensures that subsequent strd
19869     operations are dword aligned (assuming that SP was originally
19870     64-bit aligned).  */
19871  if ((num_regs & 1) != 0)
19872    {
19873      rtx reg, mem, insn;
19874
19875      reg = gen_rtx_REG (SImode, regno);
19876      if (num_regs == 1)
19877	mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
19878						     stack_pointer_rtx));
19879      else
19880	mem = gen_frame_mem (Pmode,
19881			     gen_rtx_PRE_MODIFY
19882			     (Pmode, stack_pointer_rtx,
19883			      plus_constant (Pmode, stack_pointer_rtx,
19884					     -4 * num_regs)));
19885
19886      tmp = gen_rtx_SET (VOIDmode, mem, reg);
19887      RTX_FRAME_RELATED_P (tmp) = 1;
19888      insn = emit_insn (tmp);
19889      RTX_FRAME_RELATED_P (insn) = 1;
19890      add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19891      tmp = gen_rtx_SET (VOIDmode, gen_frame_mem (Pmode, stack_pointer_rtx),
19892			 reg);
19893      RTX_FRAME_RELATED_P (tmp) = 1;
19894      i++;
19895      regno++;
19896      XVECEXP (dwarf, 0, i) = tmp;
19897      first = false;
19898    }
19899
19900  while (i < num_regs)
19901    if (saved_regs_mask & (1 << regno))
19902      {
19903	rtx reg1, reg2, mem1, mem2;
19904	rtx tmp0, tmp1, tmp2;
19905	int regno2;
19906
19907	/* Find the register to pair with this one.  */
19908	for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
19909	     regno2++)
19910	  ;
19911
19912	reg1 = gen_rtx_REG (SImode, regno);
19913	reg2 = gen_rtx_REG (SImode, regno2);
19914
19915	if (first)
19916	  {
19917	    rtx insn;
19918
19919	    first = false;
19920	    mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19921							stack_pointer_rtx,
19922							-4 * num_regs));
19923	    mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19924							stack_pointer_rtx,
19925							-4 * (num_regs - 1)));
19926	    tmp0 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
19927				plus_constant (Pmode, stack_pointer_rtx,
19928					       -4 * (num_regs)));
19929	    tmp1 = gen_rtx_SET (VOIDmode, mem1, reg1);
19930	    tmp2 = gen_rtx_SET (VOIDmode, mem2, reg2);
19931	    RTX_FRAME_RELATED_P (tmp0) = 1;
19932	    RTX_FRAME_RELATED_P (tmp1) = 1;
19933	    RTX_FRAME_RELATED_P (tmp2) = 1;
19934	    par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
19935	    XVECEXP (par, 0, 0) = tmp0;
19936	    XVECEXP (par, 0, 1) = tmp1;
19937	    XVECEXP (par, 0, 2) = tmp2;
19938	    insn = emit_insn (par);
19939	    RTX_FRAME_RELATED_P (insn) = 1;
19940	    add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19941	  }
19942	else
19943	  {
19944	    mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19945							stack_pointer_rtx,
19946							4 * i));
19947	    mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19948							stack_pointer_rtx,
19949							4 * (i + 1)));
19950	    tmp1 = gen_rtx_SET (VOIDmode, mem1, reg1);
19951	    tmp2 = gen_rtx_SET (VOIDmode, mem2, reg2);
19952	    RTX_FRAME_RELATED_P (tmp1) = 1;
19953	    RTX_FRAME_RELATED_P (tmp2) = 1;
19954	    par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
19955	    XVECEXP (par, 0, 0) = tmp1;
19956	    XVECEXP (par, 0, 1) = tmp2;
19957	    emit_insn (par);
19958	  }
19959
19960	/* Create unwind information.  This is an approximation.  */
19961	tmp1 = gen_rtx_SET (VOIDmode,
19962			    gen_frame_mem (Pmode,
19963					   plus_constant (Pmode,
19964							  stack_pointer_rtx,
19965							  4 * i)),
19966			    reg1);
19967	tmp2 = gen_rtx_SET (VOIDmode,
19968			    gen_frame_mem (Pmode,
19969					   plus_constant (Pmode,
19970							  stack_pointer_rtx,
19971							  4 * (i + 1))),
19972			    reg2);
19973
19974	RTX_FRAME_RELATED_P (tmp1) = 1;
19975	RTX_FRAME_RELATED_P (tmp2) = 1;
19976	XVECEXP (dwarf, 0, i + 1) = tmp1;
19977	XVECEXP (dwarf, 0, i + 2) = tmp2;
19978	i += 2;
19979	regno = regno2 + 1;
19980      }
19981    else
19982      regno++;
19983
19984  return;
19985}
19986
19987/* STRD in ARM mode requires consecutive registers.  This function emits STRD
19988   whenever possible, otherwise it emits single-word stores.  The first store
19989   also allocates stack space for all saved registers, using writeback with
19990   post-addressing mode.  All other stores use offset addressing.  If no STRD
19991   can be emitted, this function emits a sequence of single-word stores,
19992   and not an STM as before, because single-word stores provide more freedom
19993   scheduling and can be turned into an STM by peephole optimizations.  */
19994static void
19995arm_emit_strd_push (unsigned long saved_regs_mask)
19996{
19997  int num_regs = 0;
19998  int i, j, dwarf_index  = 0;
19999  int offset = 0;
20000  rtx dwarf = NULL_RTX;
20001  rtx insn = NULL_RTX;
20002  rtx tmp, mem;
20003
20004  /* TODO: A more efficient code can be emitted by changing the
20005     layout, e.g., first push all pairs that can use STRD to keep the
20006     stack aligned, and then push all other registers.  */
20007  for (i = 0; i <= LAST_ARM_REGNUM; i++)
20008    if (saved_regs_mask & (1 << i))
20009      num_regs++;
20010
20011  gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20012  gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
20013  gcc_assert (num_regs > 0);
20014
20015  /* Create sequence for DWARF info.  */
20016  dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
20017
20018  /* For dwarf info, we generate explicit stack update.  */
20019  tmp = gen_rtx_SET (VOIDmode,
20020                     stack_pointer_rtx,
20021                     plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20022  RTX_FRAME_RELATED_P (tmp) = 1;
20023  XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20024
20025  /* Save registers.  */
20026  offset = - 4 * num_regs;
20027  j = 0;
20028  while (j <= LAST_ARM_REGNUM)
20029    if (saved_regs_mask & (1 << j))
20030      {
20031        if ((j % 2 == 0)
20032            && (saved_regs_mask & (1 << (j + 1))))
20033          {
20034            /* Current register and previous register form register pair for
20035               which STRD can be generated.  */
20036            if (offset < 0)
20037              {
20038                /* Allocate stack space for all saved registers.  */
20039                tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
20040                tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
20041                mem = gen_frame_mem (DImode, tmp);
20042                offset = 0;
20043              }
20044            else if (offset > 0)
20045              mem = gen_frame_mem (DImode,
20046                                   plus_constant (Pmode,
20047                                                  stack_pointer_rtx,
20048                                                  offset));
20049            else
20050              mem = gen_frame_mem (DImode, stack_pointer_rtx);
20051
20052            tmp = gen_rtx_SET (DImode, mem, gen_rtx_REG (DImode, j));
20053            RTX_FRAME_RELATED_P (tmp) = 1;
20054            tmp = emit_insn (tmp);
20055
20056            /* Record the first store insn.  */
20057            if (dwarf_index == 1)
20058              insn = tmp;
20059
20060            /* Generate dwarf info.  */
20061            mem = gen_frame_mem (SImode,
20062                                 plus_constant (Pmode,
20063                                                stack_pointer_rtx,
20064                                                offset));
20065            tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
20066            RTX_FRAME_RELATED_P (tmp) = 1;
20067            XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20068
20069            mem = gen_frame_mem (SImode,
20070                                 plus_constant (Pmode,
20071                                                stack_pointer_rtx,
20072                                                offset + 4));
20073            tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j + 1));
20074            RTX_FRAME_RELATED_P (tmp) = 1;
20075            XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20076
20077            offset += 8;
20078            j += 2;
20079          }
20080        else
20081          {
20082            /* Emit a single word store.  */
20083            if (offset < 0)
20084              {
20085                /* Allocate stack space for all saved registers.  */
20086                tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
20087                tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
20088                mem = gen_frame_mem (SImode, tmp);
20089                offset = 0;
20090              }
20091            else if (offset > 0)
20092              mem = gen_frame_mem (SImode,
20093                                   plus_constant (Pmode,
20094                                                  stack_pointer_rtx,
20095                                                  offset));
20096            else
20097              mem = gen_frame_mem (SImode, stack_pointer_rtx);
20098
20099            tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
20100            RTX_FRAME_RELATED_P (tmp) = 1;
20101            tmp = emit_insn (tmp);
20102
20103            /* Record the first store insn.  */
20104            if (dwarf_index == 1)
20105              insn = tmp;
20106
20107            /* Generate dwarf info.  */
20108            mem = gen_frame_mem (SImode,
20109                                 plus_constant(Pmode,
20110                                               stack_pointer_rtx,
20111                                               offset));
20112            tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
20113            RTX_FRAME_RELATED_P (tmp) = 1;
20114            XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20115
20116            offset += 4;
20117            j += 1;
20118          }
20119      }
20120    else
20121      j++;
20122
20123  /* Attach dwarf info to the first insn we generate.  */
20124  gcc_assert (insn != NULL_RTX);
20125  add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20126  RTX_FRAME_RELATED_P (insn) = 1;
20127}
20128
20129/* Generate and emit an insn that we will recognize as a push_multi.
20130   Unfortunately, since this insn does not reflect very well the actual
20131   semantics of the operation, we need to annotate the insn for the benefit
20132   of DWARF2 frame unwind information.  DWARF_REGS_MASK is a subset of
20133   MASK for registers that should be annotated for DWARF2 frame unwind
20134   information.  */
20135static rtx
20136emit_multi_reg_push (unsigned long mask, unsigned long dwarf_regs_mask)
20137{
20138  int num_regs = 0;
20139  int num_dwarf_regs = 0;
20140  int i, j;
20141  rtx par;
20142  rtx dwarf;
20143  int dwarf_par_index;
20144  rtx tmp, reg;
20145
20146  /* We don't record the PC in the dwarf frame information.  */
20147  dwarf_regs_mask &= ~(1 << PC_REGNUM);
20148
20149  for (i = 0; i <= LAST_ARM_REGNUM; i++)
20150    {
20151      if (mask & (1 << i))
20152	num_regs++;
20153      if (dwarf_regs_mask & (1 << i))
20154	num_dwarf_regs++;
20155    }
20156
20157  gcc_assert (num_regs && num_regs <= 16);
20158  gcc_assert ((dwarf_regs_mask & ~mask) == 0);
20159
20160  /* For the body of the insn we are going to generate an UNSPEC in
20161     parallel with several USEs.  This allows the insn to be recognized
20162     by the push_multi pattern in the arm.md file.
20163
20164     The body of the insn looks something like this:
20165
20166       (parallel [
20167           (set (mem:BLK (pre_modify:SI (reg:SI sp)
20168	                                (const_int:SI <num>)))
20169	        (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
20170           (use (reg:SI XX))
20171           (use (reg:SI YY))
20172	   ...
20173        ])
20174
20175     For the frame note however, we try to be more explicit and actually
20176     show each register being stored into the stack frame, plus a (single)
20177     decrement of the stack pointer.  We do it this way in order to be
20178     friendly to the stack unwinding code, which only wants to see a single
20179     stack decrement per instruction.  The RTL we generate for the note looks
20180     something like this:
20181
20182      (sequence [
20183           (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
20184           (set (mem:SI (reg:SI sp)) (reg:SI r4))
20185           (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
20186           (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
20187	   ...
20188        ])
20189
20190     FIXME:: In an ideal world the PRE_MODIFY would not exist and
20191     instead we'd have a parallel expression detailing all
20192     the stores to the various memory addresses so that debug
20193     information is more up-to-date. Remember however while writing
20194     this to take care of the constraints with the push instruction.
20195
20196     Note also that this has to be taken care of for the VFP registers.
20197
20198     For more see PR43399.  */
20199
20200  par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
20201  dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
20202  dwarf_par_index = 1;
20203
20204  for (i = 0; i <= LAST_ARM_REGNUM; i++)
20205    {
20206      if (mask & (1 << i))
20207	{
20208	  reg = gen_rtx_REG (SImode, i);
20209
20210	  XVECEXP (par, 0, 0)
20211	    = gen_rtx_SET (VOIDmode,
20212			   gen_frame_mem
20213			   (BLKmode,
20214			    gen_rtx_PRE_MODIFY (Pmode,
20215						stack_pointer_rtx,
20216						plus_constant
20217						(Pmode, stack_pointer_rtx,
20218						 -4 * num_regs))
20219			    ),
20220			   gen_rtx_UNSPEC (BLKmode,
20221					   gen_rtvec (1, reg),
20222					   UNSPEC_PUSH_MULT));
20223
20224	  if (dwarf_regs_mask & (1 << i))
20225	    {
20226	      tmp = gen_rtx_SET (VOIDmode,
20227				 gen_frame_mem (SImode, stack_pointer_rtx),
20228				 reg);
20229	      RTX_FRAME_RELATED_P (tmp) = 1;
20230	      XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20231	    }
20232
20233	  break;
20234	}
20235    }
20236
20237  for (j = 1, i++; j < num_regs; i++)
20238    {
20239      if (mask & (1 << i))
20240	{
20241	  reg = gen_rtx_REG (SImode, i);
20242
20243	  XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
20244
20245	  if (dwarf_regs_mask & (1 << i))
20246	    {
20247	      tmp
20248		= gen_rtx_SET (VOIDmode,
20249			       gen_frame_mem
20250			       (SImode,
20251				plus_constant (Pmode, stack_pointer_rtx,
20252					       4 * j)),
20253			       reg);
20254	      RTX_FRAME_RELATED_P (tmp) = 1;
20255	      XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20256	    }
20257
20258	  j++;
20259	}
20260    }
20261
20262  par = emit_insn (par);
20263
20264  tmp = gen_rtx_SET (VOIDmode,
20265		     stack_pointer_rtx,
20266		     plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20267  RTX_FRAME_RELATED_P (tmp) = 1;
20268  XVECEXP (dwarf, 0, 0) = tmp;
20269
20270  add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
20271
20272  return par;
20273}
20274
20275/* Add a REG_CFA_ADJUST_CFA REG note to INSN.
20276   SIZE is the offset to be adjusted.
20277   DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx.  */
20278static void
20279arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
20280{
20281  rtx dwarf;
20282
20283  RTX_FRAME_RELATED_P (insn) = 1;
20284  dwarf = gen_rtx_SET (VOIDmode, dest, plus_constant (Pmode, src, size));
20285  add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
20286}
20287
20288/* Generate and emit an insn pattern that we will recognize as a pop_multi.
20289   SAVED_REGS_MASK shows which registers need to be restored.
20290
20291   Unfortunately, since this insn does not reflect very well the actual
20292   semantics of the operation, we need to annotate the insn for the benefit
20293   of DWARF2 frame unwind information.  */
20294static void
20295arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
20296{
20297  int num_regs = 0;
20298  int i, j;
20299  rtx par;
20300  rtx dwarf = NULL_RTX;
20301  rtx tmp, reg;
20302  bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20303  int offset_adj;
20304  int emit_update;
20305
20306  offset_adj = return_in_pc ? 1 : 0;
20307  for (i = 0; i <= LAST_ARM_REGNUM; i++)
20308    if (saved_regs_mask & (1 << i))
20309      num_regs++;
20310
20311  gcc_assert (num_regs && num_regs <= 16);
20312
20313  /* If SP is in reglist, then we don't emit SP update insn.  */
20314  emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
20315
20316  /* The parallel needs to hold num_regs SETs
20317     and one SET for the stack update.  */
20318  par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
20319
20320  if (return_in_pc)
20321    XVECEXP (par, 0, 0) = ret_rtx;
20322
20323  if (emit_update)
20324    {
20325      /* Increment the stack pointer, based on there being
20326         num_regs 4-byte registers to restore.  */
20327      tmp = gen_rtx_SET (VOIDmode,
20328                         stack_pointer_rtx,
20329                         plus_constant (Pmode,
20330                                        stack_pointer_rtx,
20331                                        4 * num_regs));
20332      RTX_FRAME_RELATED_P (tmp) = 1;
20333      XVECEXP (par, 0, offset_adj) = tmp;
20334    }
20335
20336  /* Now restore every reg, which may include PC.  */
20337  for (j = 0, i = 0; j < num_regs; i++)
20338    if (saved_regs_mask & (1 << i))
20339      {
20340        reg = gen_rtx_REG (SImode, i);
20341        if ((num_regs == 1) && emit_update && !return_in_pc)
20342          {
20343            /* Emit single load with writeback.  */
20344            tmp = gen_frame_mem (SImode,
20345                                 gen_rtx_POST_INC (Pmode,
20346                                                   stack_pointer_rtx));
20347            tmp = emit_insn (gen_rtx_SET (VOIDmode, reg, tmp));
20348            REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20349            return;
20350          }
20351
20352        tmp = gen_rtx_SET (VOIDmode,
20353                           reg,
20354                           gen_frame_mem
20355                           (SImode,
20356                            plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
20357        RTX_FRAME_RELATED_P (tmp) = 1;
20358        XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
20359
20360        /* We need to maintain a sequence for DWARF info too.  As dwarf info
20361           should not have PC, skip PC.  */
20362        if (i != PC_REGNUM)
20363          dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20364
20365        j++;
20366      }
20367
20368  if (return_in_pc)
20369    par = emit_jump_insn (par);
20370  else
20371    par = emit_insn (par);
20372
20373  REG_NOTES (par) = dwarf;
20374  if (!return_in_pc)
20375    arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
20376				 stack_pointer_rtx, stack_pointer_rtx);
20377}
20378
20379/* Generate and emit an insn pattern that we will recognize as a pop_multi
20380   of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
20381
20382   Unfortunately, since this insn does not reflect very well the actual
20383   semantics of the operation, we need to annotate the insn for the benefit
20384   of DWARF2 frame unwind information.  */
20385static void
20386arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
20387{
20388  int i, j;
20389  rtx par;
20390  rtx dwarf = NULL_RTX;
20391  rtx tmp, reg;
20392
20393  gcc_assert (num_regs && num_regs <= 32);
20394
20395    /* Workaround ARM10 VFPr1 bug.  */
20396  if (num_regs == 2 && !arm_arch6)
20397    {
20398      if (first_reg == 15)
20399        first_reg--;
20400
20401      num_regs++;
20402    }
20403
20404  /* We can emit at most 16 D-registers in a single pop_multi instruction, and
20405     there could be up to 32 D-registers to restore.
20406     If there are more than 16 D-registers, make two recursive calls,
20407     each of which emits one pop_multi instruction.  */
20408  if (num_regs > 16)
20409    {
20410      arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
20411      arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
20412      return;
20413    }
20414
20415  /* The parallel needs to hold num_regs SETs
20416     and one SET for the stack update.  */
20417  par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
20418
20419  /* Increment the stack pointer, based on there being
20420     num_regs 8-byte registers to restore.  */
20421  tmp = gen_rtx_SET (VOIDmode,
20422                     base_reg,
20423                     plus_constant (Pmode, base_reg, 8 * num_regs));
20424  RTX_FRAME_RELATED_P (tmp) = 1;
20425  XVECEXP (par, 0, 0) = tmp;
20426
20427  /* Now show every reg that will be restored, using a SET for each.  */
20428  for (j = 0, i=first_reg; j < num_regs; i += 2)
20429    {
20430      reg = gen_rtx_REG (DFmode, i);
20431
20432      tmp = gen_rtx_SET (VOIDmode,
20433                         reg,
20434                         gen_frame_mem
20435                         (DFmode,
20436                          plus_constant (Pmode, base_reg, 8 * j)));
20437      RTX_FRAME_RELATED_P (tmp) = 1;
20438      XVECEXP (par, 0, j + 1) = tmp;
20439
20440      dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20441
20442      j++;
20443    }
20444
20445  par = emit_insn (par);
20446  REG_NOTES (par) = dwarf;
20447
20448  /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP.  */
20449  if (TARGET_VFP && REGNO (base_reg) == IP_REGNUM)
20450    {
20451      RTX_FRAME_RELATED_P (par) = 1;
20452      add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
20453    }
20454  else
20455    arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
20456				 base_reg, base_reg);
20457}
20458
20459/* Generate and emit a pattern that will be recognized as LDRD pattern.  If even
20460   number of registers are being popped, multiple LDRD patterns are created for
20461   all register pairs.  If odd number of registers are popped, last register is
20462   loaded by using LDR pattern.  */
20463static void
20464thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
20465{
20466  int num_regs = 0;
20467  int i, j;
20468  rtx par = NULL_RTX;
20469  rtx dwarf = NULL_RTX;
20470  rtx tmp, reg, tmp1;
20471  bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20472
20473  for (i = 0; i <= LAST_ARM_REGNUM; i++)
20474    if (saved_regs_mask & (1 << i))
20475      num_regs++;
20476
20477  gcc_assert (num_regs && num_regs <= 16);
20478
20479  /* We cannot generate ldrd for PC.  Hence, reduce the count if PC is
20480     to be popped.  So, if num_regs is even, now it will become odd,
20481     and we can generate pop with PC.  If num_regs is odd, it will be
20482     even now, and ldr with return can be generated for PC.  */
20483  if (return_in_pc)
20484    num_regs--;
20485
20486  gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20487
20488  /* Var j iterates over all the registers to gather all the registers in
20489     saved_regs_mask.  Var i gives index of saved registers in stack frame.
20490     A PARALLEL RTX of register-pair is created here, so that pattern for
20491     LDRD can be matched.  As PC is always last register to be popped, and
20492     we have already decremented num_regs if PC, we don't have to worry
20493     about PC in this loop.  */
20494  for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
20495    if (saved_regs_mask & (1 << j))
20496      {
20497        /* Create RTX for memory load.  */
20498        reg = gen_rtx_REG (SImode, j);
20499        tmp = gen_rtx_SET (SImode,
20500                           reg,
20501                           gen_frame_mem (SImode,
20502                               plus_constant (Pmode,
20503                                              stack_pointer_rtx, 4 * i)));
20504        RTX_FRAME_RELATED_P (tmp) = 1;
20505
20506        if (i % 2 == 0)
20507          {
20508            /* When saved-register index (i) is even, the RTX to be emitted is
20509               yet to be created.  Hence create it first.  The LDRD pattern we
20510               are generating is :
20511               [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
20512                 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
20513               where target registers need not be consecutive.  */
20514            par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20515            dwarf = NULL_RTX;
20516          }
20517
20518        /* ith register is added in PARALLEL RTX.  If i is even, the reg_i is
20519           added as 0th element and if i is odd, reg_i is added as 1st element
20520           of LDRD pattern shown above.  */
20521        XVECEXP (par, 0, (i % 2)) = tmp;
20522        dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20523
20524        if ((i % 2) == 1)
20525          {
20526            /* When saved-register index (i) is odd, RTXs for both the registers
20527               to be loaded are generated in above given LDRD pattern, and the
20528               pattern can be emitted now.  */
20529            par = emit_insn (par);
20530            REG_NOTES (par) = dwarf;
20531	    RTX_FRAME_RELATED_P (par) = 1;
20532          }
20533
20534        i++;
20535      }
20536
20537  /* If the number of registers pushed is odd AND return_in_pc is false OR
20538     number of registers are even AND return_in_pc is true, last register is
20539     popped using LDR.  It can be PC as well.  Hence, adjust the stack first and
20540     then LDR with post increment.  */
20541
20542  /* Increment the stack pointer, based on there being
20543     num_regs 4-byte registers to restore.  */
20544  tmp = gen_rtx_SET (VOIDmode,
20545                     stack_pointer_rtx,
20546                     plus_constant (Pmode, stack_pointer_rtx, 4 * i));
20547  RTX_FRAME_RELATED_P (tmp) = 1;
20548  tmp = emit_insn (tmp);
20549  if (!return_in_pc)
20550    {
20551      arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
20552				   stack_pointer_rtx, stack_pointer_rtx);
20553    }
20554
20555  dwarf = NULL_RTX;
20556
20557  if (((num_regs % 2) == 1 && !return_in_pc)
20558      || ((num_regs % 2) == 0 && return_in_pc))
20559    {
20560      /* Scan for the single register to be popped.  Skip until the saved
20561         register is found.  */
20562      for (; (saved_regs_mask & (1 << j)) == 0; j++);
20563
20564      /* Gen LDR with post increment here.  */
20565      tmp1 = gen_rtx_MEM (SImode,
20566                          gen_rtx_POST_INC (SImode,
20567                                            stack_pointer_rtx));
20568      set_mem_alias_set (tmp1, get_frame_alias_set ());
20569
20570      reg = gen_rtx_REG (SImode, j);
20571      tmp = gen_rtx_SET (SImode, reg, tmp1);
20572      RTX_FRAME_RELATED_P (tmp) = 1;
20573      dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20574
20575      if (return_in_pc)
20576        {
20577          /* If return_in_pc, j must be PC_REGNUM.  */
20578          gcc_assert (j == PC_REGNUM);
20579          par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20580          XVECEXP (par, 0, 0) = ret_rtx;
20581          XVECEXP (par, 0, 1) = tmp;
20582          par = emit_jump_insn (par);
20583        }
20584      else
20585        {
20586          par = emit_insn (tmp);
20587	  REG_NOTES (par) = dwarf;
20588	  arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20589				       stack_pointer_rtx, stack_pointer_rtx);
20590        }
20591
20592    }
20593  else if ((num_regs % 2) == 1 && return_in_pc)
20594    {
20595      /* There are 2 registers to be popped.  So, generate the pattern
20596         pop_multiple_with_stack_update_and_return to pop in PC.  */
20597      arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
20598    }
20599
20600  return;
20601}
20602
20603/* LDRD in ARM mode needs consecutive registers as operands.  This function
20604   emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20605   offset addressing and then generates one separate stack udpate. This provides
20606   more scheduling freedom, compared to writeback on every load.  However,
20607   if the function returns using load into PC directly
20608   (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20609   before the last load.  TODO: Add a peephole optimization to recognize
20610   the new epilogue sequence as an LDM instruction whenever possible.  TODO: Add
20611   peephole optimization to merge the load at stack-offset zero
20612   with the stack update instruction using load with writeback
20613   in post-index addressing mode.  */
20614static void
20615arm_emit_ldrd_pop (unsigned long saved_regs_mask)
20616{
20617  int j = 0;
20618  int offset = 0;
20619  rtx par = NULL_RTX;
20620  rtx dwarf = NULL_RTX;
20621  rtx tmp, mem;
20622
20623  /* Restore saved registers.  */
20624  gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
20625  j = 0;
20626  while (j <= LAST_ARM_REGNUM)
20627    if (saved_regs_mask & (1 << j))
20628      {
20629        if ((j % 2) == 0
20630            && (saved_regs_mask & (1 << (j + 1)))
20631            && (j + 1) != PC_REGNUM)
20632          {
20633            /* Current register and next register form register pair for which
20634               LDRD can be generated. PC is always the last register popped, and
20635               we handle it separately.  */
20636            if (offset > 0)
20637              mem = gen_frame_mem (DImode,
20638                                   plus_constant (Pmode,
20639                                                  stack_pointer_rtx,
20640                                                  offset));
20641            else
20642              mem = gen_frame_mem (DImode, stack_pointer_rtx);
20643
20644            tmp = gen_rtx_SET (DImode, gen_rtx_REG (DImode, j), mem);
20645            tmp = emit_insn (tmp);
20646	    RTX_FRAME_RELATED_P (tmp) = 1;
20647
20648            /* Generate dwarf info.  */
20649
20650            dwarf = alloc_reg_note (REG_CFA_RESTORE,
20651                                    gen_rtx_REG (SImode, j),
20652                                    NULL_RTX);
20653            dwarf = alloc_reg_note (REG_CFA_RESTORE,
20654                                    gen_rtx_REG (SImode, j + 1),
20655                                    dwarf);
20656
20657            REG_NOTES (tmp) = dwarf;
20658
20659            offset += 8;
20660            j += 2;
20661          }
20662        else if (j != PC_REGNUM)
20663          {
20664            /* Emit a single word load.  */
20665            if (offset > 0)
20666              mem = gen_frame_mem (SImode,
20667                                   plus_constant (Pmode,
20668                                                  stack_pointer_rtx,
20669                                                  offset));
20670            else
20671              mem = gen_frame_mem (SImode, stack_pointer_rtx);
20672
20673            tmp = gen_rtx_SET (SImode, gen_rtx_REG (SImode, j), mem);
20674            tmp = emit_insn (tmp);
20675	    RTX_FRAME_RELATED_P (tmp) = 1;
20676
20677            /* Generate dwarf info.  */
20678            REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
20679                                              gen_rtx_REG (SImode, j),
20680                                              NULL_RTX);
20681
20682            offset += 4;
20683            j += 1;
20684          }
20685        else /* j == PC_REGNUM */
20686          j++;
20687      }
20688    else
20689      j++;
20690
20691  /* Update the stack.  */
20692  if (offset > 0)
20693    {
20694      tmp = gen_rtx_SET (Pmode,
20695                         stack_pointer_rtx,
20696                         plus_constant (Pmode,
20697                                        stack_pointer_rtx,
20698                                        offset));
20699      tmp = emit_insn (tmp);
20700      arm_add_cfa_adjust_cfa_note (tmp, offset,
20701				   stack_pointer_rtx, stack_pointer_rtx);
20702      offset = 0;
20703    }
20704
20705  if (saved_regs_mask & (1 << PC_REGNUM))
20706    {
20707      /* Only PC is to be popped.  */
20708      par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20709      XVECEXP (par, 0, 0) = ret_rtx;
20710      tmp = gen_rtx_SET (SImode,
20711                         gen_rtx_REG (SImode, PC_REGNUM),
20712                         gen_frame_mem (SImode,
20713                                        gen_rtx_POST_INC (SImode,
20714                                                          stack_pointer_rtx)));
20715      RTX_FRAME_RELATED_P (tmp) = 1;
20716      XVECEXP (par, 0, 1) = tmp;
20717      par = emit_jump_insn (par);
20718
20719      /* Generate dwarf info.  */
20720      dwarf = alloc_reg_note (REG_CFA_RESTORE,
20721                              gen_rtx_REG (SImode, PC_REGNUM),
20722                              NULL_RTX);
20723      REG_NOTES (par) = dwarf;
20724      arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20725				   stack_pointer_rtx, stack_pointer_rtx);
20726    }
20727}
20728
20729/* Calculate the size of the return value that is passed in registers.  */
20730static unsigned
20731arm_size_return_regs (void)
20732{
20733  machine_mode mode;
20734
20735  if (crtl->return_rtx != 0)
20736    mode = GET_MODE (crtl->return_rtx);
20737  else
20738    mode = DECL_MODE (DECL_RESULT (current_function_decl));
20739
20740  return GET_MODE_SIZE (mode);
20741}
20742
20743/* Return true if the current function needs to save/restore LR.  */
20744static bool
20745thumb_force_lr_save (void)
20746{
20747  return !cfun->machine->lr_save_eliminated
20748	 && (!leaf_function_p ()
20749	     || thumb_far_jump_used_p ()
20750	     || df_regs_ever_live_p (LR_REGNUM));
20751}
20752
20753/* We do not know if r3 will be available because
20754   we do have an indirect tailcall happening in this
20755   particular case.  */
20756static bool
20757is_indirect_tailcall_p (rtx call)
20758{
20759  rtx pat = PATTERN (call);
20760
20761  /* Indirect tail call.  */
20762  pat = XVECEXP (pat, 0, 0);
20763  if (GET_CODE (pat) == SET)
20764    pat = SET_SRC (pat);
20765
20766  pat = XEXP (XEXP (pat, 0), 0);
20767  return REG_P (pat);
20768}
20769
20770/* Return true if r3 is used by any of the tail call insns in the
20771   current function.  */
20772static bool
20773any_sibcall_could_use_r3 (void)
20774{
20775  edge_iterator ei;
20776  edge e;
20777
20778  if (!crtl->tail_call_emit)
20779    return false;
20780  FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
20781    if (e->flags & EDGE_SIBCALL)
20782      {
20783	rtx call = BB_END (e->src);
20784	if (!CALL_P (call))
20785	  call = prev_nonnote_nondebug_insn (call);
20786	gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
20787	if (find_regno_fusage (call, USE, 3)
20788	    || is_indirect_tailcall_p (call))
20789	  return true;
20790      }
20791  return false;
20792}
20793
20794
20795/* Compute the distance from register FROM to register TO.
20796   These can be the arg pointer (26), the soft frame pointer (25),
20797   the stack pointer (13) or the hard frame pointer (11).
20798   In thumb mode r7 is used as the soft frame pointer, if needed.
20799   Typical stack layout looks like this:
20800
20801       old stack pointer -> |    |
20802                             ----
20803                            |    | \
20804                            |    |   saved arguments for
20805                            |    |   vararg functions
20806			    |    | /
20807                              --
20808   hard FP & arg pointer -> |    | \
20809                            |    |   stack
20810                            |    |   frame
20811                            |    | /
20812                              --
20813                            |    | \
20814                            |    |   call saved
20815                            |    |   registers
20816      soft frame pointer -> |    | /
20817                              --
20818                            |    | \
20819                            |    |   local
20820                            |    |   variables
20821     locals base pointer -> |    | /
20822                              --
20823                            |    | \
20824                            |    |   outgoing
20825                            |    |   arguments
20826   current stack pointer -> |    | /
20827                              --
20828
20829  For a given function some or all of these stack components
20830  may not be needed, giving rise to the possibility of
20831  eliminating some of the registers.
20832
20833  The values returned by this function must reflect the behavior
20834  of arm_expand_prologue() and arm_compute_save_reg_mask().
20835
20836  The sign of the number returned reflects the direction of stack
20837  growth, so the values are positive for all eliminations except
20838  from the soft frame pointer to the hard frame pointer.
20839
20840  SFP may point just inside the local variables block to ensure correct
20841  alignment.  */
20842
20843
20844/* Calculate stack offsets.  These are used to calculate register elimination
20845   offsets and in prologue/epilogue code.  Also calculates which registers
20846   should be saved.  */
20847
20848static arm_stack_offsets *
20849arm_get_frame_offsets (void)
20850{
20851  struct arm_stack_offsets *offsets;
20852  unsigned long func_type;
20853  int leaf;
20854  int saved;
20855  int core_saved;
20856  HOST_WIDE_INT frame_size;
20857  int i;
20858
20859  offsets = &cfun->machine->stack_offsets;
20860
20861  /* We need to know if we are a leaf function.  Unfortunately, it
20862     is possible to be called after start_sequence has been called,
20863     which causes get_insns to return the insns for the sequence,
20864     not the function, which will cause leaf_function_p to return
20865     the incorrect result.
20866
20867     to know about leaf functions once reload has completed, and the
20868     frame size cannot be changed after that time, so we can safely
20869     use the cached value.  */
20870
20871  if (reload_completed)
20872    return offsets;
20873
20874  /* Initially this is the size of the local variables.  It will translated
20875     into an offset once we have determined the size of preceding data.  */
20876  frame_size = ROUND_UP_WORD (get_frame_size ());
20877
20878  leaf = leaf_function_p ();
20879
20880  /* Space for variadic functions.  */
20881  offsets->saved_args = crtl->args.pretend_args_size;
20882
20883  /* In Thumb mode this is incorrect, but never used.  */
20884  offsets->frame
20885    = (offsets->saved_args
20886       + arm_compute_static_chain_stack_bytes ()
20887       + (frame_pointer_needed ? 4 : 0));
20888
20889  if (TARGET_32BIT)
20890    {
20891      unsigned int regno;
20892
20893      offsets->saved_regs_mask = arm_compute_save_reg_mask ();
20894      core_saved = bit_count (offsets->saved_regs_mask) * 4;
20895      saved = core_saved;
20896
20897      /* We know that SP will be doubleword aligned on entry, and we must
20898	 preserve that condition at any subroutine call.  We also require the
20899	 soft frame pointer to be doubleword aligned.  */
20900
20901      if (TARGET_REALLY_IWMMXT)
20902	{
20903	  /* Check for the call-saved iWMMXt registers.  */
20904	  for (regno = FIRST_IWMMXT_REGNUM;
20905	       regno <= LAST_IWMMXT_REGNUM;
20906	       regno++)
20907	    if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
20908	      saved += 8;
20909	}
20910
20911      func_type = arm_current_func_type ();
20912      /* Space for saved VFP registers.  */
20913      if (! IS_VOLATILE (func_type)
20914	  && TARGET_HARD_FLOAT && TARGET_VFP)
20915	saved += arm_get_vfp_saved_size ();
20916    }
20917  else /* TARGET_THUMB1 */
20918    {
20919      offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
20920      core_saved = bit_count (offsets->saved_regs_mask) * 4;
20921      saved = core_saved;
20922      if (TARGET_BACKTRACE)
20923	saved += 16;
20924    }
20925
20926  /* Saved registers include the stack frame.  */
20927  offsets->saved_regs
20928    = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
20929  offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
20930
20931  /* A leaf function does not need any stack alignment if it has nothing
20932     on the stack.  */
20933  if (leaf && frame_size == 0
20934      /* However if it calls alloca(), we have a dynamically allocated
20935	 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment.  */
20936      && ! cfun->calls_alloca)
20937    {
20938      offsets->outgoing_args = offsets->soft_frame;
20939      offsets->locals_base = offsets->soft_frame;
20940      return offsets;
20941    }
20942
20943  /* Ensure SFP has the correct alignment.  */
20944  if (ARM_DOUBLEWORD_ALIGN
20945      && (offsets->soft_frame & 7))
20946    {
20947      offsets->soft_frame += 4;
20948      /* Try to align stack by pushing an extra reg.  Don't bother doing this
20949         when there is a stack frame as the alignment will be rolled into
20950	 the normal stack adjustment.  */
20951      if (frame_size + crtl->outgoing_args_size == 0)
20952	{
20953	  int reg = -1;
20954
20955	  /* Register r3 is caller-saved.  Normally it does not need to be
20956	     saved on entry by the prologue.  However if we choose to save
20957	     it for padding then we may confuse the compiler into thinking
20958	     a prologue sequence is required when in fact it is not.  This
20959	     will occur when shrink-wrapping if r3 is used as a scratch
20960	     register and there are no other callee-saved writes.
20961
20962	     This situation can be avoided when other callee-saved registers
20963	     are available and r3 is not mandatory if we choose a callee-saved
20964	     register for padding.  */
20965	  bool prefer_callee_reg_p = false;
20966
20967	  /* If it is safe to use r3, then do so.  This sometimes
20968	     generates better code on Thumb-2 by avoiding the need to
20969	     use 32-bit push/pop instructions.  */
20970          if (! any_sibcall_could_use_r3 ()
20971	      && arm_size_return_regs () <= 12
20972	      && (offsets->saved_regs_mask & (1 << 3)) == 0
20973	      && (TARGET_THUMB2
20974		  || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
20975	    {
20976	      reg = 3;
20977	      if (!TARGET_THUMB2)
20978		prefer_callee_reg_p = true;
20979	    }
20980	  if (reg == -1
20981	      || prefer_callee_reg_p)
20982	    {
20983	      for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
20984		{
20985		  /* Avoid fixed registers; they may be changed at
20986		     arbitrary times so it's unsafe to restore them
20987		     during the epilogue.  */
20988		  if (!fixed_regs[i]
20989		      && (offsets->saved_regs_mask & (1 << i)) == 0)
20990		    {
20991		      reg = i;
20992		      break;
20993		    }
20994		}
20995	    }
20996
20997	  if (reg != -1)
20998	    {
20999	      offsets->saved_regs += 4;
21000	      offsets->saved_regs_mask |= (1 << reg);
21001	    }
21002	}
21003    }
21004
21005  offsets->locals_base = offsets->soft_frame + frame_size;
21006  offsets->outgoing_args = (offsets->locals_base
21007			    + crtl->outgoing_args_size);
21008
21009  if (ARM_DOUBLEWORD_ALIGN)
21010    {
21011      /* Ensure SP remains doubleword aligned.  */
21012      if (offsets->outgoing_args & 7)
21013	offsets->outgoing_args += 4;
21014      gcc_assert (!(offsets->outgoing_args & 7));
21015    }
21016
21017  return offsets;
21018}
21019
21020
21021/* Calculate the relative offsets for the different stack pointers.  Positive
21022   offsets are in the direction of stack growth.  */
21023
21024HOST_WIDE_INT
21025arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
21026{
21027  arm_stack_offsets *offsets;
21028
21029  offsets = arm_get_frame_offsets ();
21030
21031  /* OK, now we have enough information to compute the distances.
21032     There must be an entry in these switch tables for each pair
21033     of registers in ELIMINABLE_REGS, even if some of the entries
21034     seem to be redundant or useless.  */
21035  switch (from)
21036    {
21037    case ARG_POINTER_REGNUM:
21038      switch (to)
21039	{
21040	case THUMB_HARD_FRAME_POINTER_REGNUM:
21041	  return 0;
21042
21043	case FRAME_POINTER_REGNUM:
21044	  /* This is the reverse of the soft frame pointer
21045	     to hard frame pointer elimination below.  */
21046	  return offsets->soft_frame - offsets->saved_args;
21047
21048	case ARM_HARD_FRAME_POINTER_REGNUM:
21049	  /* This is only non-zero in the case where the static chain register
21050	     is stored above the frame.  */
21051	  return offsets->frame - offsets->saved_args - 4;
21052
21053	case STACK_POINTER_REGNUM:
21054	  /* If nothing has been pushed on the stack at all
21055	     then this will return -4.  This *is* correct!  */
21056	  return offsets->outgoing_args - (offsets->saved_args + 4);
21057
21058	default:
21059	  gcc_unreachable ();
21060	}
21061      gcc_unreachable ();
21062
21063    case FRAME_POINTER_REGNUM:
21064      switch (to)
21065	{
21066	case THUMB_HARD_FRAME_POINTER_REGNUM:
21067	  return 0;
21068
21069	case ARM_HARD_FRAME_POINTER_REGNUM:
21070	  /* The hard frame pointer points to the top entry in the
21071	     stack frame.  The soft frame pointer to the bottom entry
21072	     in the stack frame.  If there is no stack frame at all,
21073	     then they are identical.  */
21074
21075	  return offsets->frame - offsets->soft_frame;
21076
21077	case STACK_POINTER_REGNUM:
21078	  return offsets->outgoing_args - offsets->soft_frame;
21079
21080	default:
21081	  gcc_unreachable ();
21082	}
21083      gcc_unreachable ();
21084
21085    default:
21086      /* You cannot eliminate from the stack pointer.
21087	 In theory you could eliminate from the hard frame
21088	 pointer to the stack pointer, but this will never
21089	 happen, since if a stack frame is not needed the
21090	 hard frame pointer will never be used.  */
21091      gcc_unreachable ();
21092    }
21093}
21094
21095/* Given FROM and TO register numbers, say whether this elimination is
21096   allowed.  Frame pointer elimination is automatically handled.
21097
21098   All eliminations are permissible.  Note that ARG_POINTER_REGNUM and
21099   HARD_FRAME_POINTER_REGNUM are in fact the same thing.  If we need a frame
21100   pointer, we must eliminate FRAME_POINTER_REGNUM into
21101   HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
21102   ARG_POINTER_REGNUM.  */
21103
21104bool
21105arm_can_eliminate (const int from, const int to)
21106{
21107  return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
21108          (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
21109          (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
21110          (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
21111           true);
21112}
21113
21114/* Emit RTL to save coprocessor registers on function entry.  Returns the
21115   number of bytes pushed.  */
21116
21117static int
21118arm_save_coproc_regs(void)
21119{
21120  int saved_size = 0;
21121  unsigned reg;
21122  unsigned start_reg;
21123  rtx insn;
21124
21125  for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
21126    if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
21127      {
21128	insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21129	insn = gen_rtx_MEM (V2SImode, insn);
21130	insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
21131	RTX_FRAME_RELATED_P (insn) = 1;
21132	saved_size += 8;
21133      }
21134
21135  if (TARGET_HARD_FLOAT && TARGET_VFP)
21136    {
21137      start_reg = FIRST_VFP_REGNUM;
21138
21139      for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
21140	{
21141	  if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
21142	      && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
21143	    {
21144	      if (start_reg != reg)
21145		saved_size += vfp_emit_fstmd (start_reg,
21146					      (reg - start_reg) / 2);
21147	      start_reg = reg + 2;
21148	    }
21149	}
21150      if (start_reg != reg)
21151	saved_size += vfp_emit_fstmd (start_reg,
21152				      (reg - start_reg) / 2);
21153    }
21154  return saved_size;
21155}
21156
21157
21158/* Set the Thumb frame pointer from the stack pointer.  */
21159
21160static void
21161thumb_set_frame_pointer (arm_stack_offsets *offsets)
21162{
21163  HOST_WIDE_INT amount;
21164  rtx insn, dwarf;
21165
21166  amount = offsets->outgoing_args - offsets->locals_base;
21167  if (amount < 1024)
21168    insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21169				  stack_pointer_rtx, GEN_INT (amount)));
21170  else
21171    {
21172      emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
21173      /* Thumb-2 RTL patterns expect sp as the first input.  Thumb-1
21174         expects the first two operands to be the same.  */
21175      if (TARGET_THUMB2)
21176	{
21177	  insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21178					stack_pointer_rtx,
21179					hard_frame_pointer_rtx));
21180	}
21181      else
21182	{
21183	  insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21184					hard_frame_pointer_rtx,
21185					stack_pointer_rtx));
21186	}
21187      dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
21188			   plus_constant (Pmode, stack_pointer_rtx, amount));
21189      RTX_FRAME_RELATED_P (dwarf) = 1;
21190      add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21191    }
21192
21193  RTX_FRAME_RELATED_P (insn) = 1;
21194}
21195
21196/* Generate the prologue instructions for entry into an ARM or Thumb-2
21197   function.  */
21198void
21199arm_expand_prologue (void)
21200{
21201  rtx amount;
21202  rtx insn;
21203  rtx ip_rtx;
21204  unsigned long live_regs_mask;
21205  unsigned long func_type;
21206  int fp_offset = 0;
21207  int saved_pretend_args = 0;
21208  int saved_regs = 0;
21209  unsigned HOST_WIDE_INT args_to_push;
21210  arm_stack_offsets *offsets;
21211
21212  func_type = arm_current_func_type ();
21213
21214  /* Naked functions don't have prologues.  */
21215  if (IS_NAKED (func_type))
21216    {
21217      if (flag_stack_usage_info)
21218	current_function_static_stack_size = 0;
21219      return;
21220    }
21221
21222  /* Make a copy of c_f_p_a_s as we may need to modify it locally.  */
21223  args_to_push = crtl->args.pretend_args_size;
21224
21225  /* Compute which register we will have to save onto the stack.  */
21226  offsets = arm_get_frame_offsets ();
21227  live_regs_mask = offsets->saved_regs_mask;
21228
21229  ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
21230
21231  if (IS_STACKALIGN (func_type))
21232    {
21233      rtx r0, r1;
21234
21235      /* Handle a word-aligned stack pointer.  We generate the following:
21236
21237	  mov r0, sp
21238	  bic r1, r0, #7
21239	  mov sp, r1
21240	  <save and restore r0 in normal prologue/epilogue>
21241	  mov sp, r0
21242	  bx lr
21243
21244	 The unwinder doesn't need to know about the stack realignment.
21245	 Just tell it we saved SP in r0.  */
21246      gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
21247
21248      r0 = gen_rtx_REG (SImode, R0_REGNUM);
21249      r1 = gen_rtx_REG (SImode, R1_REGNUM);
21250
21251      insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
21252      RTX_FRAME_RELATED_P (insn) = 1;
21253      add_reg_note (insn, REG_CFA_REGISTER, NULL);
21254
21255      emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
21256
21257      /* ??? The CFA changes here, which may cause GDB to conclude that it
21258	 has entered a different function.  That said, the unwind info is
21259	 correct, individually, before and after this instruction because
21260	 we've described the save of SP, which will override the default
21261	 handling of SP as restoring from the CFA.  */
21262      emit_insn (gen_movsi (stack_pointer_rtx, r1));
21263    }
21264
21265  /* For APCS frames, if IP register is clobbered
21266     when creating frame, save that register in a special
21267     way.  */
21268  if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21269    {
21270      if (IS_INTERRUPT (func_type))
21271	{
21272	  /* Interrupt functions must not corrupt any registers.
21273	     Creating a frame pointer however, corrupts the IP
21274	     register, so we must push it first.  */
21275	  emit_multi_reg_push (1 << IP_REGNUM, 1 << IP_REGNUM);
21276
21277	  /* Do not set RTX_FRAME_RELATED_P on this insn.
21278	     The dwarf stack unwinding code only wants to see one
21279	     stack decrement per function, and this is not it.  If
21280	     this instruction is labeled as being part of the frame
21281	     creation sequence then dwarf2out_frame_debug_expr will
21282	     die when it encounters the assignment of IP to FP
21283	     later on, since the use of SP here establishes SP as
21284	     the CFA register and not IP.
21285
21286	     Anyway this instruction is not really part of the stack
21287	     frame creation although it is part of the prologue.  */
21288	}
21289      else if (IS_NESTED (func_type))
21290	{
21291	  /* The static chain register is the same as the IP register
21292	     used as a scratch register during stack frame creation.
21293	     To get around this need to find somewhere to store IP
21294	     whilst the frame is being created.  We try the following
21295	     places in order:
21296
21297	       1. The last argument register r3 if it is available.
21298	       2. A slot on the stack above the frame if there are no
21299		  arguments to push onto the stack.
21300	       3. Register r3 again, after pushing the argument registers
21301	          onto the stack, if this is a varargs function.
21302	       4. The last slot on the stack created for the arguments to
21303		  push, if this isn't a varargs function.
21304
21305	     Note - we only need to tell the dwarf2 backend about the SP
21306	     adjustment in the second variant; the static chain register
21307	     doesn't need to be unwound, as it doesn't contain a value
21308	     inherited from the caller.  */
21309
21310	  if (!arm_r3_live_at_start_p ())
21311	    insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21312	  else if (args_to_push == 0)
21313	    {
21314	      rtx addr, dwarf;
21315
21316	      gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
21317	      saved_regs += 4;
21318
21319	      addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21320	      insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21321	      fp_offset = 4;
21322
21323	      /* Just tell the dwarf backend that we adjusted SP.  */
21324	      dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
21325				   plus_constant (Pmode, stack_pointer_rtx,
21326						  -fp_offset));
21327	      RTX_FRAME_RELATED_P (insn) = 1;
21328	      add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21329	    }
21330	  else
21331	    {
21332	      /* Store the args on the stack.  */
21333	      if (cfun->machine->uses_anonymous_args)
21334		{
21335		  insn
21336		    = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf,
21337					   (0xf0 >> (args_to_push / 4)) & 0xf);
21338		  emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21339		  saved_pretend_args = 1;
21340		}
21341	      else
21342		{
21343		  rtx addr, dwarf;
21344
21345		  if (args_to_push == 4)
21346		    addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21347		  else
21348		    addr
21349		      = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
21350					    plus_constant (Pmode,
21351							   stack_pointer_rtx,
21352							   -args_to_push));
21353
21354		  insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21355
21356		  /* Just tell the dwarf backend that we adjusted SP.  */
21357		  dwarf
21358		    = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
21359				   plus_constant (Pmode, stack_pointer_rtx,
21360						  -args_to_push));
21361		  add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21362		}
21363
21364	      RTX_FRAME_RELATED_P (insn) = 1;
21365	      fp_offset = args_to_push;
21366	      args_to_push = 0;
21367	    }
21368	}
21369
21370      insn = emit_set_insn (ip_rtx,
21371			    plus_constant (Pmode, stack_pointer_rtx,
21372					   fp_offset));
21373      RTX_FRAME_RELATED_P (insn) = 1;
21374    }
21375
21376  if (args_to_push)
21377    {
21378      /* Push the argument registers, or reserve space for them.  */
21379      if (cfun->machine->uses_anonymous_args)
21380	insn = emit_multi_reg_push
21381	  ((0xf0 >> (args_to_push / 4)) & 0xf,
21382	   (0xf0 >> (args_to_push / 4)) & 0xf);
21383      else
21384	insn = emit_insn
21385	  (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21386		       GEN_INT (- args_to_push)));
21387      RTX_FRAME_RELATED_P (insn) = 1;
21388    }
21389
21390  /* If this is an interrupt service routine, and the link register
21391     is going to be pushed, and we're not generating extra
21392     push of IP (needed when frame is needed and frame layout if apcs),
21393     subtracting four from LR now will mean that the function return
21394     can be done with a single instruction.  */
21395  if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
21396      && (live_regs_mask & (1 << LR_REGNUM)) != 0
21397      && !(frame_pointer_needed && TARGET_APCS_FRAME)
21398      && TARGET_ARM)
21399    {
21400      rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
21401
21402      emit_set_insn (lr, plus_constant (SImode, lr, -4));
21403    }
21404
21405  if (live_regs_mask)
21406    {
21407      unsigned long dwarf_regs_mask = live_regs_mask;
21408
21409      saved_regs += bit_count (live_regs_mask) * 4;
21410      if (optimize_size && !frame_pointer_needed
21411	  && saved_regs == offsets->saved_regs - offsets->saved_args)
21412	{
21413	  /* If no coprocessor registers are being pushed and we don't have
21414	     to worry about a frame pointer then push extra registers to
21415	     create the stack frame.  This is done is a way that does not
21416	     alter the frame layout, so is independent of the epilogue.  */
21417	  int n;
21418	  int frame;
21419	  n = 0;
21420	  while (n < 8 && (live_regs_mask & (1 << n)) == 0)
21421	    n++;
21422	  frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
21423	  if (frame && n * 4 >= frame)
21424	    {
21425	      n = frame / 4;
21426	      live_regs_mask |= (1 << n) - 1;
21427	      saved_regs += frame;
21428	    }
21429	}
21430
21431      if (TARGET_LDRD
21432	  && current_tune->prefer_ldrd_strd
21433          && !optimize_function_for_size_p (cfun))
21434        {
21435	  gcc_checking_assert (live_regs_mask == dwarf_regs_mask);
21436          if (TARGET_THUMB2)
21437	    thumb2_emit_strd_push (live_regs_mask);
21438          else if (TARGET_ARM
21439                   && !TARGET_APCS_FRAME
21440                   && !IS_INTERRUPT (func_type))
21441	    arm_emit_strd_push (live_regs_mask);
21442          else
21443            {
21444	      insn = emit_multi_reg_push (live_regs_mask, live_regs_mask);
21445              RTX_FRAME_RELATED_P (insn) = 1;
21446            }
21447        }
21448      else
21449        {
21450	  insn = emit_multi_reg_push (live_regs_mask, dwarf_regs_mask);
21451          RTX_FRAME_RELATED_P (insn) = 1;
21452        }
21453    }
21454
21455  if (! IS_VOLATILE (func_type))
21456    saved_regs += arm_save_coproc_regs ();
21457
21458  if (frame_pointer_needed && TARGET_ARM)
21459    {
21460      /* Create the new frame pointer.  */
21461      if (TARGET_APCS_FRAME)
21462	{
21463	  insn = GEN_INT (-(4 + args_to_push + fp_offset));
21464	  insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
21465	  RTX_FRAME_RELATED_P (insn) = 1;
21466
21467	  if (IS_NESTED (func_type))
21468	    {
21469	      /* Recover the static chain register.  */
21470	      if (!arm_r3_live_at_start_p () || saved_pretend_args)
21471		insn = gen_rtx_REG (SImode, 3);
21472	      else
21473		{
21474		  insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
21475		  insn = gen_frame_mem (SImode, insn);
21476		}
21477	      emit_set_insn (ip_rtx, insn);
21478	      /* Add a USE to stop propagate_one_insn() from barfing.  */
21479	      emit_insn (gen_force_register_use (ip_rtx));
21480	    }
21481	}
21482      else
21483	{
21484	  insn = GEN_INT (saved_regs - 4);
21485	  insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21486					stack_pointer_rtx, insn));
21487	  RTX_FRAME_RELATED_P (insn) = 1;
21488	}
21489    }
21490
21491  if (flag_stack_usage_info)
21492    current_function_static_stack_size
21493      = offsets->outgoing_args - offsets->saved_args;
21494
21495  if (offsets->outgoing_args != offsets->saved_args + saved_regs)
21496    {
21497      /* This add can produce multiple insns for a large constant, so we
21498	 need to get tricky.  */
21499      rtx_insn *last = get_last_insn ();
21500
21501      amount = GEN_INT (offsets->saved_args + saved_regs
21502			- offsets->outgoing_args);
21503
21504      insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21505				    amount));
21506      do
21507	{
21508	  last = last ? NEXT_INSN (last) : get_insns ();
21509	  RTX_FRAME_RELATED_P (last) = 1;
21510	}
21511      while (last != insn);
21512
21513      /* If the frame pointer is needed, emit a special barrier that
21514	 will prevent the scheduler from moving stores to the frame
21515	 before the stack adjustment.  */
21516      if (frame_pointer_needed)
21517	insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
21518					 hard_frame_pointer_rtx));
21519    }
21520
21521
21522  if (frame_pointer_needed && TARGET_THUMB2)
21523    thumb_set_frame_pointer (offsets);
21524
21525  if (flag_pic && arm_pic_register != INVALID_REGNUM)
21526    {
21527      unsigned long mask;
21528
21529      mask = live_regs_mask;
21530      mask &= THUMB2_WORK_REGS;
21531      if (!IS_NESTED (func_type))
21532	mask |= (1 << IP_REGNUM);
21533      arm_load_pic_register (mask);
21534    }
21535
21536  /* If we are profiling, make sure no instructions are scheduled before
21537     the call to mcount.  Similarly if the user has requested no
21538     scheduling in the prolog.  Similarly if we want non-call exceptions
21539     using the EABI unwinder, to prevent faulting instructions from being
21540     swapped with a stack adjustment.  */
21541  if (crtl->profile || !TARGET_SCHED_PROLOG
21542      || (arm_except_unwind_info (&global_options) == UI_TARGET
21543	  && cfun->can_throw_non_call_exceptions))
21544    emit_insn (gen_blockage ());
21545
21546  /* If the link register is being kept alive, with the return address in it,
21547     then make sure that it does not get reused by the ce2 pass.  */
21548  if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
21549    cfun->machine->lr_save_eliminated = 1;
21550}
21551
21552/* Print condition code to STREAM.  Helper function for arm_print_operand.  */
21553static void
21554arm_print_condition (FILE *stream)
21555{
21556  if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
21557    {
21558      /* Branch conversion is not implemented for Thumb-2.  */
21559      if (TARGET_THUMB)
21560	{
21561	  output_operand_lossage ("predicated Thumb instruction");
21562	  return;
21563	}
21564      if (current_insn_predicate != NULL)
21565	{
21566	  output_operand_lossage
21567	    ("predicated instruction in conditional sequence");
21568	  return;
21569	}
21570
21571      fputs (arm_condition_codes[arm_current_cc], stream);
21572    }
21573  else if (current_insn_predicate)
21574    {
21575      enum arm_cond_code code;
21576
21577      if (TARGET_THUMB1)
21578	{
21579	  output_operand_lossage ("predicated Thumb instruction");
21580	  return;
21581	}
21582
21583      code = get_arm_condition_code (current_insn_predicate);
21584      fputs (arm_condition_codes[code], stream);
21585    }
21586}
21587
21588
21589/* Globally reserved letters: acln
21590   Puncutation letters currently used: @_|?().!#
21591   Lower case letters currently used: bcdefhimpqtvwxyz
21592   Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
21593   Letters previously used, but now deprecated/obsolete: sVWXYZ.
21594
21595   Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
21596
21597   If CODE is 'd', then the X is a condition operand and the instruction
21598   should only be executed if the condition is true.
21599   if CODE is 'D', then the X is a condition operand and the instruction
21600   should only be executed if the condition is false: however, if the mode
21601   of the comparison is CCFPEmode, then always execute the instruction -- we
21602   do this because in these circumstances !GE does not necessarily imply LT;
21603   in these cases the instruction pattern will take care to make sure that
21604   an instruction containing %d will follow, thereby undoing the effects of
21605   doing this instruction unconditionally.
21606   If CODE is 'N' then X is a floating point operand that must be negated
21607   before output.
21608   If CODE is 'B' then output a bitwise inverted value of X (a const int).
21609   If X is a REG and CODE is `M', output a ldm/stm style multi-reg.  */
21610static void
21611arm_print_operand (FILE *stream, rtx x, int code)
21612{
21613  switch (code)
21614    {
21615    case '@':
21616      fputs (ASM_COMMENT_START, stream);
21617      return;
21618
21619    case '_':
21620      fputs (user_label_prefix, stream);
21621      return;
21622
21623    case '|':
21624      fputs (REGISTER_PREFIX, stream);
21625      return;
21626
21627    case '?':
21628      arm_print_condition (stream);
21629      return;
21630
21631    case '(':
21632      /* Nothing in unified syntax, otherwise the current condition code.  */
21633      if (!TARGET_UNIFIED_ASM)
21634	arm_print_condition (stream);
21635      break;
21636
21637    case ')':
21638      /* The current condition code in unified syntax, otherwise nothing.  */
21639      if (TARGET_UNIFIED_ASM)
21640	arm_print_condition (stream);
21641      break;
21642
21643    case '.':
21644      /* The current condition code for a condition code setting instruction.
21645	 Preceded by 's' in unified syntax, otherwise followed by 's'.  */
21646      if (TARGET_UNIFIED_ASM)
21647	{
21648	  fputc('s', stream);
21649	  arm_print_condition (stream);
21650	}
21651      else
21652	{
21653	  arm_print_condition (stream);
21654	  fputc('s', stream);
21655	}
21656      return;
21657
21658    case '!':
21659      /* If the instruction is conditionally executed then print
21660	 the current condition code, otherwise print 's'.  */
21661      gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
21662      if (current_insn_predicate)
21663	arm_print_condition (stream);
21664      else
21665	fputc('s', stream);
21666      break;
21667
21668    /* %# is a "break" sequence. It doesn't output anything, but is used to
21669       separate e.g. operand numbers from following text, if that text consists
21670       of further digits which we don't want to be part of the operand
21671       number.  */
21672    case '#':
21673      return;
21674
21675    case 'N':
21676      {
21677	REAL_VALUE_TYPE r;
21678	REAL_VALUE_FROM_CONST_DOUBLE (r, x);
21679	r = real_value_negate (&r);
21680	fprintf (stream, "%s", fp_const_from_val (&r));
21681      }
21682      return;
21683
21684    /* An integer or symbol address without a preceding # sign.  */
21685    case 'c':
21686      switch (GET_CODE (x))
21687	{
21688	case CONST_INT:
21689	  fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
21690	  break;
21691
21692	case SYMBOL_REF:
21693	  output_addr_const (stream, x);
21694	  break;
21695
21696	case CONST:
21697	  if (GET_CODE (XEXP (x, 0)) == PLUS
21698	      && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
21699	    {
21700	      output_addr_const (stream, x);
21701	      break;
21702	    }
21703	  /* Fall through.  */
21704
21705	default:
21706	  output_operand_lossage ("Unsupported operand for code '%c'", code);
21707	}
21708      return;
21709
21710    /* An integer that we want to print in HEX.  */
21711    case 'x':
21712      switch (GET_CODE (x))
21713	{
21714	case CONST_INT:
21715	  fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
21716	  break;
21717
21718	default:
21719	  output_operand_lossage ("Unsupported operand for code '%c'", code);
21720	}
21721      return;
21722
21723    case 'B':
21724      if (CONST_INT_P (x))
21725	{
21726	  HOST_WIDE_INT val;
21727	  val = ARM_SIGN_EXTEND (~INTVAL (x));
21728	  fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
21729	}
21730      else
21731	{
21732	  putc ('~', stream);
21733	  output_addr_const (stream, x);
21734	}
21735      return;
21736
21737    case 'b':
21738      /* Print the log2 of a CONST_INT.  */
21739      {
21740	HOST_WIDE_INT val;
21741
21742	if (!CONST_INT_P (x)
21743	    || (val = exact_log2 (INTVAL (x) & 0xffffffff)) < 0)
21744	  output_operand_lossage ("Unsupported operand for code '%c'", code);
21745	else
21746	  fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21747      }
21748      return;
21749
21750    case 'L':
21751      /* The low 16 bits of an immediate constant.  */
21752      fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
21753      return;
21754
21755    case 'i':
21756      fprintf (stream, "%s", arithmetic_instr (x, 1));
21757      return;
21758
21759    case 'I':
21760      fprintf (stream, "%s", arithmetic_instr (x, 0));
21761      return;
21762
21763    case 'S':
21764      {
21765	HOST_WIDE_INT val;
21766	const char *shift;
21767
21768	shift = shift_op (x, &val);
21769
21770	if (shift)
21771	  {
21772	    fprintf (stream, ", %s ", shift);
21773	    if (val == -1)
21774	      arm_print_operand (stream, XEXP (x, 1), 0);
21775	    else
21776	      fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21777	  }
21778      }
21779      return;
21780
21781      /* An explanation of the 'Q', 'R' and 'H' register operands:
21782
21783	 In a pair of registers containing a DI or DF value the 'Q'
21784	 operand returns the register number of the register containing
21785	 the least significant part of the value.  The 'R' operand returns
21786	 the register number of the register containing the most
21787	 significant part of the value.
21788
21789	 The 'H' operand returns the higher of the two register numbers.
21790	 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
21791	 same as the 'Q' operand, since the most significant part of the
21792	 value is held in the lower number register.  The reverse is true
21793	 on systems where WORDS_BIG_ENDIAN is false.
21794
21795	 The purpose of these operands is to distinguish between cases
21796	 where the endian-ness of the values is important (for example
21797	 when they are added together), and cases where the endian-ness
21798	 is irrelevant, but the order of register operations is important.
21799	 For example when loading a value from memory into a register
21800	 pair, the endian-ness does not matter.  Provided that the value
21801	 from the lower memory address is put into the lower numbered
21802	 register, and the value from the higher address is put into the
21803	 higher numbered register, the load will work regardless of whether
21804	 the value being loaded is big-wordian or little-wordian.  The
21805	 order of the two register loads can matter however, if the address
21806	 of the memory location is actually held in one of the registers
21807	 being overwritten by the load.
21808
21809	 The 'Q' and 'R' constraints are also available for 64-bit
21810	 constants.  */
21811    case 'Q':
21812      if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
21813	{
21814	  rtx part = gen_lowpart (SImode, x);
21815	  fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
21816	  return;
21817	}
21818
21819      if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21820	{
21821	  output_operand_lossage ("invalid operand for code '%c'", code);
21822	  return;
21823	}
21824
21825      asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
21826      return;
21827
21828    case 'R':
21829      if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
21830	{
21831	  machine_mode mode = GET_MODE (x);
21832	  rtx part;
21833
21834	  if (mode == VOIDmode)
21835	    mode = DImode;
21836	  part = gen_highpart_mode (SImode, mode, x);
21837	  fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
21838	  return;
21839	}
21840
21841      if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21842	{
21843	  output_operand_lossage ("invalid operand for code '%c'", code);
21844	  return;
21845	}
21846
21847      asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
21848      return;
21849
21850    case 'H':
21851      if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21852	{
21853	  output_operand_lossage ("invalid operand for code '%c'", code);
21854	  return;
21855	}
21856
21857      asm_fprintf (stream, "%r", REGNO (x) + 1);
21858      return;
21859
21860    case 'J':
21861      if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21862	{
21863	  output_operand_lossage ("invalid operand for code '%c'", code);
21864	  return;
21865	}
21866
21867      asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
21868      return;
21869
21870    case 'K':
21871      if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21872	{
21873	  output_operand_lossage ("invalid operand for code '%c'", code);
21874	  return;
21875	}
21876
21877      asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
21878      return;
21879
21880    case 'm':
21881      asm_fprintf (stream, "%r",
21882		   REG_P (XEXP (x, 0))
21883		   ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
21884      return;
21885
21886    case 'M':
21887      asm_fprintf (stream, "{%r-%r}",
21888		   REGNO (x),
21889		   REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
21890      return;
21891
21892    /* Like 'M', but writing doubleword vector registers, for use by Neon
21893       insns.  */
21894    case 'h':
21895      {
21896        int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
21897        int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
21898        if (numregs == 1)
21899          asm_fprintf (stream, "{d%d}", regno);
21900        else
21901          asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
21902      }
21903      return;
21904
21905    case 'd':
21906      /* CONST_TRUE_RTX means always -- that's the default.  */
21907      if (x == const_true_rtx)
21908	return;
21909
21910      if (!COMPARISON_P (x))
21911	{
21912	  output_operand_lossage ("invalid operand for code '%c'", code);
21913	  return;
21914	}
21915
21916      fputs (arm_condition_codes[get_arm_condition_code (x)],
21917	     stream);
21918      return;
21919
21920    case 'D':
21921      /* CONST_TRUE_RTX means not always -- i.e. never.  We shouldn't ever
21922	 want to do that.  */
21923      if (x == const_true_rtx)
21924	{
21925	  output_operand_lossage ("instruction never executed");
21926	  return;
21927	}
21928      if (!COMPARISON_P (x))
21929	{
21930	  output_operand_lossage ("invalid operand for code '%c'", code);
21931	  return;
21932	}
21933
21934      fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
21935				 (get_arm_condition_code (x))],
21936	     stream);
21937      return;
21938
21939    case 's':
21940    case 'V':
21941    case 'W':
21942    case 'X':
21943    case 'Y':
21944    case 'Z':
21945      /* Former Maverick support, removed after GCC-4.7.  */
21946      output_operand_lossage ("obsolete Maverick format code '%c'", code);
21947      return;
21948
21949    case 'U':
21950      if (!REG_P (x)
21951	  || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
21952	  || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
21953	/* Bad value for wCG register number.  */
21954	{
21955	  output_operand_lossage ("invalid operand for code '%c'", code);
21956	  return;
21957	}
21958
21959      else
21960	fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
21961      return;
21962
21963      /* Print an iWMMXt control register name.  */
21964    case 'w':
21965      if (!CONST_INT_P (x)
21966	  || INTVAL (x) < 0
21967	  || INTVAL (x) >= 16)
21968	/* Bad value for wC register number.  */
21969	{
21970	  output_operand_lossage ("invalid operand for code '%c'", code);
21971	  return;
21972	}
21973
21974      else
21975	{
21976	  static const char * wc_reg_names [16] =
21977	    {
21978	      "wCID",  "wCon",  "wCSSF", "wCASF",
21979	      "wC4",   "wC5",   "wC6",   "wC7",
21980	      "wCGR0", "wCGR1", "wCGR2", "wCGR3",
21981	      "wC12",  "wC13",  "wC14",  "wC15"
21982	    };
21983
21984	  fputs (wc_reg_names [INTVAL (x)], stream);
21985	}
21986      return;
21987
21988    /* Print the high single-precision register of a VFP double-precision
21989       register.  */
21990    case 'p':
21991      {
21992        machine_mode mode = GET_MODE (x);
21993        int regno;
21994
21995        if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
21996          {
21997	    output_operand_lossage ("invalid operand for code '%c'", code);
21998	    return;
21999          }
22000
22001        regno = REGNO (x);
22002        if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
22003          {
22004	    output_operand_lossage ("invalid operand for code '%c'", code);
22005	    return;
22006          }
22007
22008	fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
22009      }
22010      return;
22011
22012    /* Print a VFP/Neon double precision or quad precision register name.  */
22013    case 'P':
22014    case 'q':
22015      {
22016	machine_mode mode = GET_MODE (x);
22017	int is_quad = (code == 'q');
22018	int regno;
22019
22020	if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
22021	  {
22022	    output_operand_lossage ("invalid operand for code '%c'", code);
22023	    return;
22024	  }
22025
22026	if (!REG_P (x)
22027	    || !IS_VFP_REGNUM (REGNO (x)))
22028	  {
22029	    output_operand_lossage ("invalid operand for code '%c'", code);
22030	    return;
22031	  }
22032
22033	regno = REGNO (x);
22034	if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
22035            || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
22036	  {
22037	    output_operand_lossage ("invalid operand for code '%c'", code);
22038	    return;
22039	  }
22040
22041	fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
22042	  (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
22043      }
22044      return;
22045
22046    /* These two codes print the low/high doubleword register of a Neon quad
22047       register, respectively.  For pair-structure types, can also print
22048       low/high quadword registers.  */
22049    case 'e':
22050    case 'f':
22051      {
22052        machine_mode mode = GET_MODE (x);
22053        int regno;
22054
22055        if ((GET_MODE_SIZE (mode) != 16
22056	     && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
22057          {
22058	    output_operand_lossage ("invalid operand for code '%c'", code);
22059	    return;
22060          }
22061
22062        regno = REGNO (x);
22063        if (!NEON_REGNO_OK_FOR_QUAD (regno))
22064          {
22065	    output_operand_lossage ("invalid operand for code '%c'", code);
22066	    return;
22067          }
22068
22069        if (GET_MODE_SIZE (mode) == 16)
22070          fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
22071				  + (code == 'f' ? 1 : 0));
22072        else
22073          fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
22074				  + (code == 'f' ? 1 : 0));
22075      }
22076      return;
22077
22078    /* Print a VFPv3 floating-point constant, represented as an integer
22079       index.  */
22080    case 'G':
22081      {
22082        int index = vfp3_const_double_index (x);
22083	gcc_assert (index != -1);
22084	fprintf (stream, "%d", index);
22085      }
22086      return;
22087
22088    /* Print bits representing opcode features for Neon.
22089
22090       Bit 0 is 1 for signed, 0 for unsigned.  Floats count as signed
22091       and polynomials as unsigned.
22092
22093       Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
22094
22095       Bit 2 is 1 for rounding functions, 0 otherwise.  */
22096
22097    /* Identify the type as 's', 'u', 'p' or 'f'.  */
22098    case 'T':
22099      {
22100        HOST_WIDE_INT bits = INTVAL (x);
22101        fputc ("uspf"[bits & 3], stream);
22102      }
22103      return;
22104
22105    /* Likewise, but signed and unsigned integers are both 'i'.  */
22106    case 'F':
22107      {
22108        HOST_WIDE_INT bits = INTVAL (x);
22109        fputc ("iipf"[bits & 3], stream);
22110      }
22111      return;
22112
22113    /* As for 'T', but emit 'u' instead of 'p'.  */
22114    case 't':
22115      {
22116        HOST_WIDE_INT bits = INTVAL (x);
22117        fputc ("usuf"[bits & 3], stream);
22118      }
22119      return;
22120
22121    /* Bit 2: rounding (vs none).  */
22122    case 'O':
22123      {
22124        HOST_WIDE_INT bits = INTVAL (x);
22125        fputs ((bits & 4) != 0 ? "r" : "", stream);
22126      }
22127      return;
22128
22129    /* Memory operand for vld1/vst1 instruction.  */
22130    case 'A':
22131      {
22132	rtx addr;
22133	bool postinc = FALSE;
22134	rtx postinc_reg = NULL;
22135	unsigned align, memsize, align_bits;
22136
22137	gcc_assert (MEM_P (x));
22138	addr = XEXP (x, 0);
22139	if (GET_CODE (addr) == POST_INC)
22140	  {
22141	    postinc = 1;
22142	    addr = XEXP (addr, 0);
22143	  }
22144	if (GET_CODE (addr) == POST_MODIFY)
22145	  {
22146	    postinc_reg = XEXP( XEXP (addr, 1), 1);
22147	    addr = XEXP (addr, 0);
22148	  }
22149	asm_fprintf (stream, "[%r", REGNO (addr));
22150
22151	/* We know the alignment of this access, so we can emit a hint in the
22152	   instruction (for some alignments) as an aid to the memory subsystem
22153	   of the target.  */
22154	align = MEM_ALIGN (x) >> 3;
22155	memsize = MEM_SIZE (x);
22156
22157	/* Only certain alignment specifiers are supported by the hardware.  */
22158	if (memsize == 32 && (align % 32) == 0)
22159	  align_bits = 256;
22160	else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
22161	  align_bits = 128;
22162	else if (memsize >= 8 && (align % 8) == 0)
22163	  align_bits = 64;
22164	else
22165	  align_bits = 0;
22166
22167	if (align_bits != 0)
22168	  asm_fprintf (stream, ":%d", align_bits);
22169
22170	asm_fprintf (stream, "]");
22171
22172	if (postinc)
22173	  fputs("!", stream);
22174	if (postinc_reg)
22175	  asm_fprintf (stream, ", %r", REGNO (postinc_reg));
22176      }
22177      return;
22178
22179    case 'C':
22180      {
22181	rtx addr;
22182
22183	gcc_assert (MEM_P (x));
22184	addr = XEXP (x, 0);
22185	gcc_assert (REG_P (addr));
22186	asm_fprintf (stream, "[%r]", REGNO (addr));
22187      }
22188      return;
22189
22190    /* Translate an S register number into a D register number and element index.  */
22191    case 'y':
22192      {
22193        machine_mode mode = GET_MODE (x);
22194        int regno;
22195
22196        if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
22197          {
22198	    output_operand_lossage ("invalid operand for code '%c'", code);
22199	    return;
22200          }
22201
22202        regno = REGNO (x);
22203        if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22204          {
22205	    output_operand_lossage ("invalid operand for code '%c'", code);
22206	    return;
22207          }
22208
22209	regno = regno - FIRST_VFP_REGNUM;
22210	fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
22211      }
22212      return;
22213
22214    case 'v':
22215	gcc_assert (CONST_DOUBLE_P (x));
22216	int result;
22217	result = vfp3_const_double_for_fract_bits (x);
22218	if (result == 0)
22219	  result = vfp3_const_double_for_bits (x);
22220	fprintf (stream, "#%d", result);
22221	return;
22222
22223    /* Register specifier for vld1.16/vst1.16.  Translate the S register
22224       number into a D register number and element index.  */
22225    case 'z':
22226      {
22227        machine_mode mode = GET_MODE (x);
22228        int regno;
22229
22230        if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
22231          {
22232	    output_operand_lossage ("invalid operand for code '%c'", code);
22233	    return;
22234          }
22235
22236        regno = REGNO (x);
22237        if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22238          {
22239	    output_operand_lossage ("invalid operand for code '%c'", code);
22240	    return;
22241          }
22242
22243	regno = regno - FIRST_VFP_REGNUM;
22244	fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
22245      }
22246      return;
22247
22248    default:
22249      if (x == 0)
22250	{
22251	  output_operand_lossage ("missing operand");
22252	  return;
22253	}
22254
22255      switch (GET_CODE (x))
22256	{
22257	case REG:
22258	  asm_fprintf (stream, "%r", REGNO (x));
22259	  break;
22260
22261	case MEM:
22262	  output_memory_reference_mode = GET_MODE (x);
22263	  output_address (XEXP (x, 0));
22264	  break;
22265
22266	case CONST_DOUBLE:
22267	  {
22268            char fpstr[20];
22269            real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
22270			      sizeof (fpstr), 0, 1);
22271            fprintf (stream, "#%s", fpstr);
22272	  }
22273	  break;
22274
22275	default:
22276	  gcc_assert (GET_CODE (x) != NEG);
22277	  fputc ('#', stream);
22278	  if (GET_CODE (x) == HIGH)
22279	    {
22280	      fputs (":lower16:", stream);
22281	      x = XEXP (x, 0);
22282	    }
22283
22284	  output_addr_const (stream, x);
22285	  break;
22286	}
22287    }
22288}
22289
22290/* Target hook for printing a memory address.  */
22291static void
22292arm_print_operand_address (FILE *stream, rtx x)
22293{
22294  if (TARGET_32BIT)
22295    {
22296      int is_minus = GET_CODE (x) == MINUS;
22297
22298      if (REG_P (x))
22299	asm_fprintf (stream, "[%r]", REGNO (x));
22300      else if (GET_CODE (x) == PLUS || is_minus)
22301	{
22302	  rtx base = XEXP (x, 0);
22303	  rtx index = XEXP (x, 1);
22304	  HOST_WIDE_INT offset = 0;
22305	  if (!REG_P (base)
22306	      || (REG_P (index) && REGNO (index) == SP_REGNUM))
22307	    {
22308	      /* Ensure that BASE is a register.  */
22309	      /* (one of them must be).  */
22310	      /* Also ensure the SP is not used as in index register.  */
22311	      std::swap (base, index);
22312	    }
22313	  switch (GET_CODE (index))
22314	    {
22315	    case CONST_INT:
22316	      offset = INTVAL (index);
22317	      if (is_minus)
22318		offset = -offset;
22319	      asm_fprintf (stream, "[%r, #%wd]",
22320			   REGNO (base), offset);
22321	      break;
22322
22323	    case REG:
22324	      asm_fprintf (stream, "[%r, %s%r]",
22325			   REGNO (base), is_minus ? "-" : "",
22326			   REGNO (index));
22327	      break;
22328
22329	    case MULT:
22330	    case ASHIFTRT:
22331	    case LSHIFTRT:
22332	    case ASHIFT:
22333	    case ROTATERT:
22334	      {
22335		asm_fprintf (stream, "[%r, %s%r",
22336			     REGNO (base), is_minus ? "-" : "",
22337			     REGNO (XEXP (index, 0)));
22338		arm_print_operand (stream, index, 'S');
22339		fputs ("]", stream);
22340		break;
22341	      }
22342
22343	    default:
22344	      gcc_unreachable ();
22345	    }
22346	}
22347      else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
22348	       || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
22349	{
22350	  extern machine_mode output_memory_reference_mode;
22351
22352	  gcc_assert (REG_P (XEXP (x, 0)));
22353
22354	  if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
22355	    asm_fprintf (stream, "[%r, #%s%d]!",
22356			 REGNO (XEXP (x, 0)),
22357			 GET_CODE (x) == PRE_DEC ? "-" : "",
22358			 GET_MODE_SIZE (output_memory_reference_mode));
22359	  else
22360	    asm_fprintf (stream, "[%r], #%s%d",
22361			 REGNO (XEXP (x, 0)),
22362			 GET_CODE (x) == POST_DEC ? "-" : "",
22363			 GET_MODE_SIZE (output_memory_reference_mode));
22364	}
22365      else if (GET_CODE (x) == PRE_MODIFY)
22366	{
22367	  asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
22368	  if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22369	    asm_fprintf (stream, "#%wd]!",
22370			 INTVAL (XEXP (XEXP (x, 1), 1)));
22371	  else
22372	    asm_fprintf (stream, "%r]!",
22373			 REGNO (XEXP (XEXP (x, 1), 1)));
22374	}
22375      else if (GET_CODE (x) == POST_MODIFY)
22376	{
22377	  asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
22378	  if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22379	    asm_fprintf (stream, "#%wd",
22380			 INTVAL (XEXP (XEXP (x, 1), 1)));
22381	  else
22382	    asm_fprintf (stream, "%r",
22383			 REGNO (XEXP (XEXP (x, 1), 1)));
22384	}
22385      else output_addr_const (stream, x);
22386    }
22387  else
22388    {
22389      if (REG_P (x))
22390	asm_fprintf (stream, "[%r]", REGNO (x));
22391      else if (GET_CODE (x) == POST_INC)
22392	asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
22393      else if (GET_CODE (x) == PLUS)
22394	{
22395	  gcc_assert (REG_P (XEXP (x, 0)));
22396	  if (CONST_INT_P (XEXP (x, 1)))
22397	    asm_fprintf (stream, "[%r, #%wd]",
22398			 REGNO (XEXP (x, 0)),
22399			 INTVAL (XEXP (x, 1)));
22400	  else
22401	    asm_fprintf (stream, "[%r, %r]",
22402			 REGNO (XEXP (x, 0)),
22403			 REGNO (XEXP (x, 1)));
22404	}
22405      else
22406	output_addr_const (stream, x);
22407    }
22408}
22409
22410/* Target hook for indicating whether a punctuation character for
22411   TARGET_PRINT_OPERAND is valid.  */
22412static bool
22413arm_print_operand_punct_valid_p (unsigned char code)
22414{
22415  return (code == '@' || code == '|' || code == '.'
22416	  || code == '(' || code == ')' || code == '#'
22417	  || (TARGET_32BIT && (code == '?'))
22418	  || (TARGET_THUMB2 && (code == '!'))
22419	  || (TARGET_THUMB && (code == '_')));
22420}
22421
22422/* Target hook for assembling integer objects.  The ARM version needs to
22423   handle word-sized values specially.  */
22424static bool
22425arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
22426{
22427  machine_mode mode;
22428
22429  if (size == UNITS_PER_WORD && aligned_p)
22430    {
22431      fputs ("\t.word\t", asm_out_file);
22432      output_addr_const (asm_out_file, x);
22433
22434      /* Mark symbols as position independent.  We only do this in the
22435	 .text segment, not in the .data segment.  */
22436      if (NEED_GOT_RELOC && flag_pic && making_const_table &&
22437	  (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
22438	{
22439	  /* See legitimize_pic_address for an explanation of the
22440	     TARGET_VXWORKS_RTP check.  */
22441	  if (!arm_pic_data_is_text_relative
22442	      || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
22443	    fputs ("(GOT)", asm_out_file);
22444	  else
22445	    fputs ("(GOTOFF)", asm_out_file);
22446	}
22447      fputc ('\n', asm_out_file);
22448      return true;
22449    }
22450
22451  mode = GET_MODE (x);
22452
22453  if (arm_vector_mode_supported_p (mode))
22454    {
22455      int i, units;
22456
22457      gcc_assert (GET_CODE (x) == CONST_VECTOR);
22458
22459      units = CONST_VECTOR_NUNITS (x);
22460      size = GET_MODE_SIZE (GET_MODE_INNER (mode));
22461
22462      if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22463        for (i = 0; i < units; i++)
22464	  {
22465	    rtx elt = CONST_VECTOR_ELT (x, i);
22466	    assemble_integer
22467	      (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
22468	  }
22469      else
22470        for (i = 0; i < units; i++)
22471          {
22472            rtx elt = CONST_VECTOR_ELT (x, i);
22473            REAL_VALUE_TYPE rval;
22474
22475            REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
22476
22477            assemble_real
22478              (rval, GET_MODE_INNER (mode),
22479              i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
22480          }
22481
22482      return true;
22483    }
22484
22485  return default_assemble_integer (x, size, aligned_p);
22486}
22487
22488static void
22489arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
22490{
22491  section *s;
22492
22493  if (!TARGET_AAPCS_BASED)
22494    {
22495      (is_ctor ?
22496       default_named_section_asm_out_constructor
22497       : default_named_section_asm_out_destructor) (symbol, priority);
22498      return;
22499    }
22500
22501  /* Put these in the .init_array section, using a special relocation.  */
22502  if (priority != DEFAULT_INIT_PRIORITY)
22503    {
22504      char buf[18];
22505      sprintf (buf, "%s.%.5u",
22506	       is_ctor ? ".init_array" : ".fini_array",
22507	       priority);
22508      s = get_section (buf, SECTION_WRITE, NULL_TREE);
22509    }
22510  else if (is_ctor)
22511    s = ctors_section;
22512  else
22513    s = dtors_section;
22514
22515  switch_to_section (s);
22516  assemble_align (POINTER_SIZE);
22517  fputs ("\t.word\t", asm_out_file);
22518  output_addr_const (asm_out_file, symbol);
22519  fputs ("(target1)\n", asm_out_file);
22520}
22521
22522/* Add a function to the list of static constructors.  */
22523
22524static void
22525arm_elf_asm_constructor (rtx symbol, int priority)
22526{
22527  arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
22528}
22529
22530/* Add a function to the list of static destructors.  */
22531
22532static void
22533arm_elf_asm_destructor (rtx symbol, int priority)
22534{
22535  arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
22536}
22537
22538/* A finite state machine takes care of noticing whether or not instructions
22539   can be conditionally executed, and thus decrease execution time and code
22540   size by deleting branch instructions.  The fsm is controlled by
22541   final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE.  */
22542
22543/* The state of the fsm controlling condition codes are:
22544   0: normal, do nothing special
22545   1: make ASM_OUTPUT_OPCODE not output this instruction
22546   2: make ASM_OUTPUT_OPCODE not output this instruction
22547   3: make instructions conditional
22548   4: make instructions conditional
22549
22550   State transitions (state->state by whom under condition):
22551   0 -> 1 final_prescan_insn if the `target' is a label
22552   0 -> 2 final_prescan_insn if the `target' is an unconditional branch
22553   1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
22554   2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
22555   3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
22556          (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
22557   4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
22558          (the target insn is arm_target_insn).
22559
22560   If the jump clobbers the conditions then we use states 2 and 4.
22561
22562   A similar thing can be done with conditional return insns.
22563
22564   XXX In case the `target' is an unconditional branch, this conditionalising
22565   of the instructions always reduces code size, but not always execution
22566   time.  But then, I want to reduce the code size to somewhere near what
22567   /bin/cc produces.  */
22568
22569/* In addition to this, state is maintained for Thumb-2 COND_EXEC
22570   instructions.  When a COND_EXEC instruction is seen the subsequent
22571   instructions are scanned so that multiple conditional instructions can be
22572   combined into a single IT block.  arm_condexec_count and arm_condexec_mask
22573   specify the length and true/false mask for the IT block.  These will be
22574   decremented/zeroed by arm_asm_output_opcode as the insns are output.  */
22575
22576/* Returns the index of the ARM condition code string in
22577   `arm_condition_codes', or ARM_NV if the comparison is invalid.
22578   COMPARISON should be an rtx like `(eq (...) (...))'.  */
22579
22580enum arm_cond_code
22581maybe_get_arm_condition_code (rtx comparison)
22582{
22583  machine_mode mode = GET_MODE (XEXP (comparison, 0));
22584  enum arm_cond_code code;
22585  enum rtx_code comp_code = GET_CODE (comparison);
22586
22587  if (GET_MODE_CLASS (mode) != MODE_CC)
22588    mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
22589			   XEXP (comparison, 1));
22590
22591  switch (mode)
22592    {
22593    case CC_DNEmode: code = ARM_NE; goto dominance;
22594    case CC_DEQmode: code = ARM_EQ; goto dominance;
22595    case CC_DGEmode: code = ARM_GE; goto dominance;
22596    case CC_DGTmode: code = ARM_GT; goto dominance;
22597    case CC_DLEmode: code = ARM_LE; goto dominance;
22598    case CC_DLTmode: code = ARM_LT; goto dominance;
22599    case CC_DGEUmode: code = ARM_CS; goto dominance;
22600    case CC_DGTUmode: code = ARM_HI; goto dominance;
22601    case CC_DLEUmode: code = ARM_LS; goto dominance;
22602    case CC_DLTUmode: code = ARM_CC;
22603
22604    dominance:
22605      if (comp_code == EQ)
22606	return ARM_INVERSE_CONDITION_CODE (code);
22607      if (comp_code == NE)
22608	return code;
22609      return ARM_NV;
22610
22611    case CC_NOOVmode:
22612      switch (comp_code)
22613	{
22614	case NE: return ARM_NE;
22615	case EQ: return ARM_EQ;
22616	case GE: return ARM_PL;
22617	case LT: return ARM_MI;
22618	default: return ARM_NV;
22619	}
22620
22621    case CC_Zmode:
22622      switch (comp_code)
22623	{
22624	case NE: return ARM_NE;
22625	case EQ: return ARM_EQ;
22626	default: return ARM_NV;
22627	}
22628
22629    case CC_Nmode:
22630      switch (comp_code)
22631	{
22632	case NE: return ARM_MI;
22633	case EQ: return ARM_PL;
22634	default: return ARM_NV;
22635	}
22636
22637    case CCFPEmode:
22638    case CCFPmode:
22639      /* We can handle all cases except UNEQ and LTGT.  */
22640      switch (comp_code)
22641	{
22642	case GE: return ARM_GE;
22643	case GT: return ARM_GT;
22644	case LE: return ARM_LS;
22645	case LT: return ARM_MI;
22646	case NE: return ARM_NE;
22647	case EQ: return ARM_EQ;
22648	case ORDERED: return ARM_VC;
22649	case UNORDERED: return ARM_VS;
22650	case UNLT: return ARM_LT;
22651	case UNLE: return ARM_LE;
22652	case UNGT: return ARM_HI;
22653	case UNGE: return ARM_PL;
22654	  /* UNEQ and LTGT do not have a representation.  */
22655	case UNEQ: /* Fall through.  */
22656	case LTGT: /* Fall through.  */
22657	default: return ARM_NV;
22658	}
22659
22660    case CC_SWPmode:
22661      switch (comp_code)
22662	{
22663	case NE: return ARM_NE;
22664	case EQ: return ARM_EQ;
22665	case GE: return ARM_LE;
22666	case GT: return ARM_LT;
22667	case LE: return ARM_GE;
22668	case LT: return ARM_GT;
22669	case GEU: return ARM_LS;
22670	case GTU: return ARM_CC;
22671	case LEU: return ARM_CS;
22672	case LTU: return ARM_HI;
22673	default: return ARM_NV;
22674	}
22675
22676    case CC_Cmode:
22677      switch (comp_code)
22678	{
22679	case LTU: return ARM_CS;
22680	case GEU: return ARM_CC;
22681	default: return ARM_NV;
22682	}
22683
22684    case CC_CZmode:
22685      switch (comp_code)
22686	{
22687	case NE: return ARM_NE;
22688	case EQ: return ARM_EQ;
22689	case GEU: return ARM_CS;
22690	case GTU: return ARM_HI;
22691	case LEU: return ARM_LS;
22692	case LTU: return ARM_CC;
22693	default: return ARM_NV;
22694	}
22695
22696    case CC_NCVmode:
22697      switch (comp_code)
22698	{
22699	case GE: return ARM_GE;
22700	case LT: return ARM_LT;
22701	case GEU: return ARM_CS;
22702	case LTU: return ARM_CC;
22703	default: return ARM_NV;
22704	}
22705
22706    case CCmode:
22707      switch (comp_code)
22708	{
22709	case NE: return ARM_NE;
22710	case EQ: return ARM_EQ;
22711	case GE: return ARM_GE;
22712	case GT: return ARM_GT;
22713	case LE: return ARM_LE;
22714	case LT: return ARM_LT;
22715	case GEU: return ARM_CS;
22716	case GTU: return ARM_HI;
22717	case LEU: return ARM_LS;
22718	case LTU: return ARM_CC;
22719	default: return ARM_NV;
22720	}
22721
22722    default: gcc_unreachable ();
22723    }
22724}
22725
22726/* Like maybe_get_arm_condition_code, but never return ARM_NV.  */
22727static enum arm_cond_code
22728get_arm_condition_code (rtx comparison)
22729{
22730  enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
22731  gcc_assert (code != ARM_NV);
22732  return code;
22733}
22734
22735/* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
22736   instructions.  */
22737void
22738thumb2_final_prescan_insn (rtx_insn *insn)
22739{
22740  rtx_insn *first_insn = insn;
22741  rtx body = PATTERN (insn);
22742  rtx predicate;
22743  enum arm_cond_code code;
22744  int n;
22745  int mask;
22746  int max;
22747
22748  /* max_insns_skipped in the tune was already taken into account in the
22749     cost model of ifcvt pass when generating COND_EXEC insns.  At this stage
22750     just emit the IT blocks as we can.  It does not make sense to split
22751     the IT blocks.  */
22752  max = MAX_INSN_PER_IT_BLOCK;
22753
22754  /* Remove the previous insn from the count of insns to be output.  */
22755  if (arm_condexec_count)
22756      arm_condexec_count--;
22757
22758  /* Nothing to do if we are already inside a conditional block.  */
22759  if (arm_condexec_count)
22760    return;
22761
22762  if (GET_CODE (body) != COND_EXEC)
22763    return;
22764
22765  /* Conditional jumps are implemented directly.  */
22766  if (JUMP_P (insn))
22767    return;
22768
22769  predicate = COND_EXEC_TEST (body);
22770  arm_current_cc = get_arm_condition_code (predicate);
22771
22772  n = get_attr_ce_count (insn);
22773  arm_condexec_count = 1;
22774  arm_condexec_mask = (1 << n) - 1;
22775  arm_condexec_masklen = n;
22776  /* See if subsequent instructions can be combined into the same block.  */
22777  for (;;)
22778    {
22779      insn = next_nonnote_insn (insn);
22780
22781      /* Jumping into the middle of an IT block is illegal, so a label or
22782         barrier terminates the block.  */
22783      if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
22784	break;
22785
22786      body = PATTERN (insn);
22787      /* USE and CLOBBER aren't really insns, so just skip them.  */
22788      if (GET_CODE (body) == USE
22789	  || GET_CODE (body) == CLOBBER)
22790	continue;
22791
22792      /* ??? Recognize conditional jumps, and combine them with IT blocks.  */
22793      if (GET_CODE (body) != COND_EXEC)
22794	break;
22795      /* Maximum number of conditionally executed instructions in a block.  */
22796      n = get_attr_ce_count (insn);
22797      if (arm_condexec_masklen + n > max)
22798	break;
22799
22800      predicate = COND_EXEC_TEST (body);
22801      code = get_arm_condition_code (predicate);
22802      mask = (1 << n) - 1;
22803      if (arm_current_cc == code)
22804	arm_condexec_mask |= (mask << arm_condexec_masklen);
22805      else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
22806	break;
22807
22808      arm_condexec_count++;
22809      arm_condexec_masklen += n;
22810
22811      /* A jump must be the last instruction in a conditional block.  */
22812      if (JUMP_P (insn))
22813	break;
22814    }
22815  /* Restore recog_data (getting the attributes of other insns can
22816     destroy this array, but final.c assumes that it remains intact
22817     across this call).  */
22818  extract_constrain_insn_cached (first_insn);
22819}
22820
22821void
22822arm_final_prescan_insn (rtx_insn *insn)
22823{
22824  /* BODY will hold the body of INSN.  */
22825  rtx body = PATTERN (insn);
22826
22827  /* This will be 1 if trying to repeat the trick, and things need to be
22828     reversed if it appears to fail.  */
22829  int reverse = 0;
22830
22831  /* If we start with a return insn, we only succeed if we find another one.  */
22832  int seeking_return = 0;
22833  enum rtx_code return_code = UNKNOWN;
22834
22835  /* START_INSN will hold the insn from where we start looking.  This is the
22836     first insn after the following code_label if REVERSE is true.  */
22837  rtx_insn *start_insn = insn;
22838
22839  /* If in state 4, check if the target branch is reached, in order to
22840     change back to state 0.  */
22841  if (arm_ccfsm_state == 4)
22842    {
22843      if (insn == arm_target_insn)
22844	{
22845	  arm_target_insn = NULL;
22846	  arm_ccfsm_state = 0;
22847	}
22848      return;
22849    }
22850
22851  /* If in state 3, it is possible to repeat the trick, if this insn is an
22852     unconditional branch to a label, and immediately following this branch
22853     is the previous target label which is only used once, and the label this
22854     branch jumps to is not too far off.  */
22855  if (arm_ccfsm_state == 3)
22856    {
22857      if (simplejump_p (insn))
22858	{
22859	  start_insn = next_nonnote_insn (start_insn);
22860	  if (BARRIER_P (start_insn))
22861	    {
22862	      /* XXX Isn't this always a barrier?  */
22863	      start_insn = next_nonnote_insn (start_insn);
22864	    }
22865	  if (LABEL_P (start_insn)
22866	      && CODE_LABEL_NUMBER (start_insn) == arm_target_label
22867	      && LABEL_NUSES (start_insn) == 1)
22868	    reverse = TRUE;
22869	  else
22870	    return;
22871	}
22872      else if (ANY_RETURN_P (body))
22873        {
22874	  start_insn = next_nonnote_insn (start_insn);
22875	  if (BARRIER_P (start_insn))
22876	    start_insn = next_nonnote_insn (start_insn);
22877	  if (LABEL_P (start_insn)
22878	      && CODE_LABEL_NUMBER (start_insn) == arm_target_label
22879	      && LABEL_NUSES (start_insn) == 1)
22880	    {
22881	      reverse = TRUE;
22882	      seeking_return = 1;
22883	      return_code = GET_CODE (body);
22884	    }
22885	  else
22886	    return;
22887        }
22888      else
22889	return;
22890    }
22891
22892  gcc_assert (!arm_ccfsm_state || reverse);
22893  if (!JUMP_P (insn))
22894    return;
22895
22896  /* This jump might be paralleled with a clobber of the condition codes
22897     the jump should always come first */
22898  if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
22899    body = XVECEXP (body, 0, 0);
22900
22901  if (reverse
22902      || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
22903	  && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
22904    {
22905      int insns_skipped;
22906      int fail = FALSE, succeed = FALSE;
22907      /* Flag which part of the IF_THEN_ELSE is the LABEL_REF.  */
22908      int then_not_else = TRUE;
22909      rtx_insn *this_insn = start_insn;
22910      rtx label = 0;
22911
22912      /* Register the insn jumped to.  */
22913      if (reverse)
22914        {
22915	  if (!seeking_return)
22916	    label = XEXP (SET_SRC (body), 0);
22917        }
22918      else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
22919	label = XEXP (XEXP (SET_SRC (body), 1), 0);
22920      else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
22921	{
22922	  label = XEXP (XEXP (SET_SRC (body), 2), 0);
22923	  then_not_else = FALSE;
22924	}
22925      else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
22926	{
22927	  seeking_return = 1;
22928	  return_code = GET_CODE (XEXP (SET_SRC (body), 1));
22929	}
22930      else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
22931        {
22932	  seeking_return = 1;
22933	  return_code = GET_CODE (XEXP (SET_SRC (body), 2));
22934	  then_not_else = FALSE;
22935        }
22936      else
22937	gcc_unreachable ();
22938
22939      /* See how many insns this branch skips, and what kind of insns.  If all
22940	 insns are okay, and the label or unconditional branch to the same
22941	 label is not too far away, succeed.  */
22942      for (insns_skipped = 0;
22943	   !fail && !succeed && insns_skipped++ < max_insns_skipped;)
22944	{
22945	  rtx scanbody;
22946
22947	  this_insn = next_nonnote_insn (this_insn);
22948	  if (!this_insn)
22949	    break;
22950
22951	  switch (GET_CODE (this_insn))
22952	    {
22953	    case CODE_LABEL:
22954	      /* Succeed if it is the target label, otherwise fail since
22955		 control falls in from somewhere else.  */
22956	      if (this_insn == label)
22957		{
22958		  arm_ccfsm_state = 1;
22959		  succeed = TRUE;
22960		}
22961	      else
22962		fail = TRUE;
22963	      break;
22964
22965	    case BARRIER:
22966	      /* Succeed if the following insn is the target label.
22967		 Otherwise fail.
22968		 If return insns are used then the last insn in a function
22969		 will be a barrier.  */
22970	      this_insn = next_nonnote_insn (this_insn);
22971	      if (this_insn && this_insn == label)
22972		{
22973		  arm_ccfsm_state = 1;
22974		  succeed = TRUE;
22975		}
22976	      else
22977		fail = TRUE;
22978	      break;
22979
22980	    case CALL_INSN:
22981	      /* The AAPCS says that conditional calls should not be
22982		 used since they make interworking inefficient (the
22983		 linker can't transform BL<cond> into BLX).  That's
22984		 only a problem if the machine has BLX.  */
22985	      if (arm_arch5)
22986		{
22987		  fail = TRUE;
22988		  break;
22989		}
22990
22991	      /* Succeed if the following insn is the target label, or
22992		 if the following two insns are a barrier and the
22993		 target label.  */
22994	      this_insn = next_nonnote_insn (this_insn);
22995	      if (this_insn && BARRIER_P (this_insn))
22996		this_insn = next_nonnote_insn (this_insn);
22997
22998	      if (this_insn && this_insn == label
22999		  && insns_skipped < max_insns_skipped)
23000		{
23001		  arm_ccfsm_state = 1;
23002		  succeed = TRUE;
23003		}
23004	      else
23005		fail = TRUE;
23006	      break;
23007
23008	    case JUMP_INSN:
23009      	      /* If this is an unconditional branch to the same label, succeed.
23010		 If it is to another label, do nothing.  If it is conditional,
23011		 fail.  */
23012	      /* XXX Probably, the tests for SET and the PC are
23013		 unnecessary.  */
23014
23015	      scanbody = PATTERN (this_insn);
23016	      if (GET_CODE (scanbody) == SET
23017		  && GET_CODE (SET_DEST (scanbody)) == PC)
23018		{
23019		  if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
23020		      && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
23021		    {
23022		      arm_ccfsm_state = 2;
23023		      succeed = TRUE;
23024		    }
23025		  else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
23026		    fail = TRUE;
23027		}
23028	      /* Fail if a conditional return is undesirable (e.g. on a
23029		 StrongARM), but still allow this if optimizing for size.  */
23030	      else if (GET_CODE (scanbody) == return_code
23031		       && !use_return_insn (TRUE, NULL)
23032		       && !optimize_size)
23033		fail = TRUE;
23034	      else if (GET_CODE (scanbody) == return_code)
23035	        {
23036		  arm_ccfsm_state = 2;
23037		  succeed = TRUE;
23038	        }
23039	      else if (GET_CODE (scanbody) == PARALLEL)
23040	        {
23041		  switch (get_attr_conds (this_insn))
23042		    {
23043		    case CONDS_NOCOND:
23044		      break;
23045		    default:
23046		      fail = TRUE;
23047		      break;
23048		    }
23049		}
23050	      else
23051		fail = TRUE;	/* Unrecognized jump (e.g. epilogue).  */
23052
23053	      break;
23054
23055	    case INSN:
23056	      /* Instructions using or affecting the condition codes make it
23057		 fail.  */
23058	      scanbody = PATTERN (this_insn);
23059	      if (!(GET_CODE (scanbody) == SET
23060		    || GET_CODE (scanbody) == PARALLEL)
23061		  || get_attr_conds (this_insn) != CONDS_NOCOND)
23062		fail = TRUE;
23063	      break;
23064
23065	    default:
23066	      break;
23067	    }
23068	}
23069      if (succeed)
23070	{
23071	  if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
23072	    arm_target_label = CODE_LABEL_NUMBER (label);
23073	  else
23074	    {
23075	      gcc_assert (seeking_return || arm_ccfsm_state == 2);
23076
23077	      while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
23078	        {
23079		  this_insn = next_nonnote_insn (this_insn);
23080		  gcc_assert (!this_insn
23081			      || (!BARRIER_P (this_insn)
23082				  && !LABEL_P (this_insn)));
23083	        }
23084	      if (!this_insn)
23085	        {
23086		  /* Oh, dear! we ran off the end.. give up.  */
23087		  extract_constrain_insn_cached (insn);
23088		  arm_ccfsm_state = 0;
23089		  arm_target_insn = NULL;
23090		  return;
23091	        }
23092	      arm_target_insn = this_insn;
23093	    }
23094
23095	  /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
23096	     what it was.  */
23097	  if (!reverse)
23098	    arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
23099
23100	  if (reverse || then_not_else)
23101	    arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
23102	}
23103
23104      /* Restore recog_data (getting the attributes of other insns can
23105	 destroy this array, but final.c assumes that it remains intact
23106	 across this call.  */
23107      extract_constrain_insn_cached (insn);
23108    }
23109}
23110
23111/* Output IT instructions.  */
23112void
23113thumb2_asm_output_opcode (FILE * stream)
23114{
23115  char buff[5];
23116  int n;
23117
23118  if (arm_condexec_mask)
23119    {
23120      for (n = 0; n < arm_condexec_masklen; n++)
23121	buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
23122      buff[n] = 0;
23123      asm_fprintf(stream, "i%s\t%s\n\t", buff,
23124		  arm_condition_codes[arm_current_cc]);
23125      arm_condexec_mask = 0;
23126    }
23127}
23128
23129/* Returns true if REGNO is a valid register
23130   for holding a quantity of type MODE.  */
23131int
23132arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
23133{
23134  if (GET_MODE_CLASS (mode) == MODE_CC)
23135    return (regno == CC_REGNUM
23136	    || (TARGET_HARD_FLOAT && TARGET_VFP
23137		&& regno == VFPCC_REGNUM));
23138
23139  if (regno == CC_REGNUM && GET_MODE_CLASS (mode) != MODE_CC)
23140    return false;
23141
23142  if (TARGET_THUMB1)
23143    /* For the Thumb we only allow values bigger than SImode in
23144       registers 0 - 6, so that there is always a second low
23145       register available to hold the upper part of the value.
23146       We probably we ought to ensure that the register is the
23147       start of an even numbered register pair.  */
23148    return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
23149
23150  if (TARGET_HARD_FLOAT && TARGET_VFP
23151      && IS_VFP_REGNUM (regno))
23152    {
23153      if (mode == SFmode || mode == SImode)
23154	return VFP_REGNO_OK_FOR_SINGLE (regno);
23155
23156      if (mode == DFmode)
23157	return VFP_REGNO_OK_FOR_DOUBLE (regno);
23158
23159      /* VFP registers can hold HFmode values, but there is no point in
23160	 putting them there unless we have hardware conversion insns. */
23161      if (mode == HFmode)
23162	return TARGET_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno);
23163
23164      if (TARGET_NEON)
23165        return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
23166               || (VALID_NEON_QREG_MODE (mode)
23167                   && NEON_REGNO_OK_FOR_QUAD (regno))
23168	       || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
23169	       || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
23170	       || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
23171	       || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
23172	       || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
23173
23174      return FALSE;
23175    }
23176
23177  if (TARGET_REALLY_IWMMXT)
23178    {
23179      if (IS_IWMMXT_GR_REGNUM (regno))
23180	return mode == SImode;
23181
23182      if (IS_IWMMXT_REGNUM (regno))
23183	return VALID_IWMMXT_REG_MODE (mode);
23184    }
23185
23186  /* We allow almost any value to be stored in the general registers.
23187     Restrict doubleword quantities to even register pairs in ARM state
23188     so that we can use ldrd.  Do not allow very large Neon structure
23189     opaque modes in general registers; they would use too many.  */
23190  if (regno <= LAST_ARM_REGNUM)
23191    {
23192      if (ARM_NUM_REGS (mode) > 4)
23193	  return FALSE;
23194
23195      if (TARGET_THUMB2)
23196	return TRUE;
23197
23198      return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
23199    }
23200
23201  if (regno == FRAME_POINTER_REGNUM
23202      || regno == ARG_POINTER_REGNUM)
23203    /* We only allow integers in the fake hard registers.  */
23204    return GET_MODE_CLASS (mode) == MODE_INT;
23205
23206  return FALSE;
23207}
23208
23209/* Implement MODES_TIEABLE_P.  */
23210
23211bool
23212arm_modes_tieable_p (machine_mode mode1, machine_mode mode2)
23213{
23214  if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
23215    return true;
23216
23217  /* We specifically want to allow elements of "structure" modes to
23218     be tieable to the structure.  This more general condition allows
23219     other rarer situations too.  */
23220  if (TARGET_NEON
23221      && (VALID_NEON_DREG_MODE (mode1)
23222	  || VALID_NEON_QREG_MODE (mode1)
23223	  || VALID_NEON_STRUCT_MODE (mode1))
23224      && (VALID_NEON_DREG_MODE (mode2)
23225	  || VALID_NEON_QREG_MODE (mode2)
23226	  || VALID_NEON_STRUCT_MODE (mode2)))
23227    return true;
23228
23229  return false;
23230}
23231
23232/* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
23233   not used in arm mode.  */
23234
23235enum reg_class
23236arm_regno_class (int regno)
23237{
23238  if (regno == PC_REGNUM)
23239    return NO_REGS;
23240
23241  if (TARGET_THUMB1)
23242    {
23243      if (regno == STACK_POINTER_REGNUM)
23244	return STACK_REG;
23245      if (regno == CC_REGNUM)
23246	return CC_REG;
23247      if (regno < 8)
23248	return LO_REGS;
23249      return HI_REGS;
23250    }
23251
23252  if (TARGET_THUMB2 && regno < 8)
23253    return LO_REGS;
23254
23255  if (   regno <= LAST_ARM_REGNUM
23256      || regno == FRAME_POINTER_REGNUM
23257      || regno == ARG_POINTER_REGNUM)
23258    return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
23259
23260  if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
23261    return TARGET_THUMB2 ? CC_REG : NO_REGS;
23262
23263  if (IS_VFP_REGNUM (regno))
23264    {
23265      if (regno <= D7_VFP_REGNUM)
23266	return VFP_D0_D7_REGS;
23267      else if (regno <= LAST_LO_VFP_REGNUM)
23268        return VFP_LO_REGS;
23269      else
23270        return VFP_HI_REGS;
23271    }
23272
23273  if (IS_IWMMXT_REGNUM (regno))
23274    return IWMMXT_REGS;
23275
23276  if (IS_IWMMXT_GR_REGNUM (regno))
23277    return IWMMXT_GR_REGS;
23278
23279  return NO_REGS;
23280}
23281
23282/* Handle a special case when computing the offset
23283   of an argument from the frame pointer.  */
23284int
23285arm_debugger_arg_offset (int value, rtx addr)
23286{
23287  rtx_insn *insn;
23288
23289  /* We are only interested if dbxout_parms() failed to compute the offset.  */
23290  if (value != 0)
23291    return 0;
23292
23293  /* We can only cope with the case where the address is held in a register.  */
23294  if (!REG_P (addr))
23295    return 0;
23296
23297  /* If we are using the frame pointer to point at the argument, then
23298     an offset of 0 is correct.  */
23299  if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
23300    return 0;
23301
23302  /* If we are using the stack pointer to point at the
23303     argument, then an offset of 0 is correct.  */
23304  /* ??? Check this is consistent with thumb2 frame layout.  */
23305  if ((TARGET_THUMB || !frame_pointer_needed)
23306      && REGNO (addr) == SP_REGNUM)
23307    return 0;
23308
23309  /* Oh dear.  The argument is pointed to by a register rather
23310     than being held in a register, or being stored at a known
23311     offset from the frame pointer.  Since GDB only understands
23312     those two kinds of argument we must translate the address
23313     held in the register into an offset from the frame pointer.
23314     We do this by searching through the insns for the function
23315     looking to see where this register gets its value.  If the
23316     register is initialized from the frame pointer plus an offset
23317     then we are in luck and we can continue, otherwise we give up.
23318
23319     This code is exercised by producing debugging information
23320     for a function with arguments like this:
23321
23322           double func (double a, double b, int c, double d) {return d;}
23323
23324     Without this code the stab for parameter 'd' will be set to
23325     an offset of 0 from the frame pointer, rather than 8.  */
23326
23327  /* The if() statement says:
23328
23329     If the insn is a normal instruction
23330     and if the insn is setting the value in a register
23331     and if the register being set is the register holding the address of the argument
23332     and if the address is computing by an addition
23333     that involves adding to a register
23334     which is the frame pointer
23335     a constant integer
23336
23337     then...  */
23338
23339  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
23340    {
23341      if (   NONJUMP_INSN_P (insn)
23342	  && GET_CODE (PATTERN (insn)) == SET
23343	  && REGNO    (XEXP (PATTERN (insn), 0)) == REGNO (addr)
23344	  && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
23345	  && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
23346	  && REGNO    (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
23347	  && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
23348	     )
23349	{
23350	  value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
23351
23352	  break;
23353	}
23354    }
23355
23356  if (value == 0)
23357    {
23358      debug_rtx (addr);
23359      warning (0, "unable to compute real location of stacked parameter");
23360      value = 8; /* XXX magic hack */
23361    }
23362
23363  return value;
23364}
23365
23366/* Implement TARGET_INVALID_PARAMETER_TYPE.  */
23367
23368static const char *
23369arm_invalid_parameter_type (const_tree t)
23370{
23371  if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
23372    return N_("function parameters cannot have __fp16 type");
23373  return NULL;
23374}
23375
23376/* Implement TARGET_INVALID_PARAMETER_TYPE.  */
23377
23378static const char *
23379arm_invalid_return_type (const_tree t)
23380{
23381  if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
23382    return N_("functions cannot return __fp16 type");
23383  return NULL;
23384}
23385
23386/* Implement TARGET_PROMOTED_TYPE.  */
23387
23388static tree
23389arm_promoted_type (const_tree t)
23390{
23391  if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
23392    return float_type_node;
23393  return NULL_TREE;
23394}
23395
23396/* Implement TARGET_CONVERT_TO_TYPE.
23397   Specifically, this hook implements the peculiarity of the ARM
23398   half-precision floating-point C semantics that requires conversions between
23399   __fp16 to or from double to do an intermediate conversion to float.  */
23400
23401static tree
23402arm_convert_to_type (tree type, tree expr)
23403{
23404  tree fromtype = TREE_TYPE (expr);
23405  if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
23406    return NULL_TREE;
23407  if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
23408      || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
23409    return convert (type, convert (float_type_node, expr));
23410  return NULL_TREE;
23411}
23412
23413/* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
23414   This simply adds HFmode as a supported mode; even though we don't
23415   implement arithmetic on this type directly, it's supported by
23416   optabs conversions, much the way the double-word arithmetic is
23417   special-cased in the default hook.  */
23418
23419static bool
23420arm_scalar_mode_supported_p (machine_mode mode)
23421{
23422  if (mode == HFmode)
23423    return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
23424  else if (ALL_FIXED_POINT_MODE_P (mode))
23425    return true;
23426  else
23427    return default_scalar_mode_supported_p (mode);
23428}
23429
23430/* Emit code to reinterpret one Neon type as another, without altering bits.  */
23431void
23432neon_reinterpret (rtx dest, rtx src)
23433{
23434  emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
23435}
23436
23437/* Set up OPERANDS for a register copy from SRC to DEST, taking care
23438   not to early-clobber SRC registers in the process.
23439
23440   We assume that the operands described by SRC and DEST represent a
23441   decomposed copy of OPERANDS[1] into OPERANDS[0].  COUNT is the
23442   number of components into which the copy has been decomposed.  */
23443void
23444neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
23445{
23446  unsigned int i;
23447
23448  if (!reg_overlap_mentioned_p (operands[0], operands[1])
23449      || REGNO (operands[0]) < REGNO (operands[1]))
23450    {
23451      for (i = 0; i < count; i++)
23452	{
23453	  operands[2 * i] = dest[i];
23454	  operands[2 * i + 1] = src[i];
23455	}
23456    }
23457  else
23458    {
23459      for (i = 0; i < count; i++)
23460	{
23461	  operands[2 * i] = dest[count - i - 1];
23462	  operands[2 * i + 1] = src[count - i - 1];
23463	}
23464    }
23465}
23466
23467/* Split operands into moves from op[1] + op[2] into op[0].  */
23468
23469void
23470neon_split_vcombine (rtx operands[3])
23471{
23472  unsigned int dest = REGNO (operands[0]);
23473  unsigned int src1 = REGNO (operands[1]);
23474  unsigned int src2 = REGNO (operands[2]);
23475  machine_mode halfmode = GET_MODE (operands[1]);
23476  unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
23477  rtx destlo, desthi;
23478
23479  if (src1 == dest && src2 == dest + halfregs)
23480    {
23481      /* No-op move.  Can't split to nothing; emit something.  */
23482      emit_note (NOTE_INSN_DELETED);
23483      return;
23484    }
23485
23486  /* Preserve register attributes for variable tracking.  */
23487  destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
23488  desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
23489			       GET_MODE_SIZE (halfmode));
23490
23491  /* Special case of reversed high/low parts.  Use VSWP.  */
23492  if (src2 == dest && src1 == dest + halfregs)
23493    {
23494      rtx x = gen_rtx_SET (VOIDmode, destlo, operands[1]);
23495      rtx y = gen_rtx_SET (VOIDmode, desthi, operands[2]);
23496      emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
23497      return;
23498    }
23499
23500  if (!reg_overlap_mentioned_p (operands[2], destlo))
23501    {
23502      /* Try to avoid unnecessary moves if part of the result
23503	 is in the right place already.  */
23504      if (src1 != dest)
23505	emit_move_insn (destlo, operands[1]);
23506      if (src2 != dest + halfregs)
23507	emit_move_insn (desthi, operands[2]);
23508    }
23509  else
23510    {
23511      if (src2 != dest + halfregs)
23512	emit_move_insn (desthi, operands[2]);
23513      if (src1 != dest)
23514	emit_move_insn (destlo, operands[1]);
23515    }
23516}
23517
23518/* Return the number (counting from 0) of
23519   the least significant set bit in MASK.  */
23520
23521inline static int
23522number_of_first_bit_set (unsigned mask)
23523{
23524  return ctz_hwi (mask);
23525}
23526
23527/* Like emit_multi_reg_push, but allowing for a different set of
23528   registers to be described as saved.  MASK is the set of registers
23529   to be saved; REAL_REGS is the set of registers to be described as
23530   saved.  If REAL_REGS is 0, only describe the stack adjustment.  */
23531
23532static rtx_insn *
23533thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
23534{
23535  unsigned long regno;
23536  rtx par[10], tmp, reg;
23537  rtx_insn *insn;
23538  int i, j;
23539
23540  /* Build the parallel of the registers actually being stored.  */
23541  for (i = 0; mask; ++i, mask &= mask - 1)
23542    {
23543      regno = ctz_hwi (mask);
23544      reg = gen_rtx_REG (SImode, regno);
23545
23546      if (i == 0)
23547	tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
23548      else
23549	tmp = gen_rtx_USE (VOIDmode, reg);
23550
23551      par[i] = tmp;
23552    }
23553
23554  tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
23555  tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
23556  tmp = gen_frame_mem (BLKmode, tmp);
23557  tmp = gen_rtx_SET (VOIDmode, tmp, par[0]);
23558  par[0] = tmp;
23559
23560  tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
23561  insn = emit_insn (tmp);
23562
23563  /* Always build the stack adjustment note for unwind info.  */
23564  tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
23565  tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp);
23566  par[0] = tmp;
23567
23568  /* Build the parallel of the registers recorded as saved for unwind.  */
23569  for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
23570    {
23571      regno = ctz_hwi (real_regs);
23572      reg = gen_rtx_REG (SImode, regno);
23573
23574      tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
23575      tmp = gen_frame_mem (SImode, tmp);
23576      tmp = gen_rtx_SET (VOIDmode, tmp, reg);
23577      RTX_FRAME_RELATED_P (tmp) = 1;
23578      par[j + 1] = tmp;
23579    }
23580
23581  if (j == 0)
23582    tmp = par[0];
23583  else
23584    {
23585      RTX_FRAME_RELATED_P (par[0]) = 1;
23586      tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
23587    }
23588
23589  add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
23590
23591  return insn;
23592}
23593
23594/* Emit code to push or pop registers to or from the stack.  F is the
23595   assembly file.  MASK is the registers to pop.  */
23596static void
23597thumb_pop (FILE *f, unsigned long mask)
23598{
23599  int regno;
23600  int lo_mask = mask & 0xFF;
23601  int pushed_words = 0;
23602
23603  gcc_assert (mask);
23604
23605  if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
23606    {
23607      /* Special case.  Do not generate a POP PC statement here, do it in
23608	 thumb_exit() */
23609      thumb_exit (f, -1);
23610      return;
23611    }
23612
23613  fprintf (f, "\tpop\t{");
23614
23615  /* Look at the low registers first.  */
23616  for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
23617    {
23618      if (lo_mask & 1)
23619	{
23620	  asm_fprintf (f, "%r", regno);
23621
23622	  if ((lo_mask & ~1) != 0)
23623	    fprintf (f, ", ");
23624
23625	  pushed_words++;
23626	}
23627    }
23628
23629  if (mask & (1 << PC_REGNUM))
23630    {
23631      /* Catch popping the PC.  */
23632      if (TARGET_INTERWORK || TARGET_BACKTRACE
23633	  || crtl->calls_eh_return)
23634	{
23635	  /* The PC is never poped directly, instead
23636	     it is popped into r3 and then BX is used.  */
23637	  fprintf (f, "}\n");
23638
23639	  thumb_exit (f, -1);
23640
23641	  return;
23642	}
23643      else
23644	{
23645	  if (mask & 0xFF)
23646	    fprintf (f, ", ");
23647
23648	  asm_fprintf (f, "%r", PC_REGNUM);
23649	}
23650    }
23651
23652  fprintf (f, "}\n");
23653}
23654
23655/* Generate code to return from a thumb function.
23656   If 'reg_containing_return_addr' is -1, then the return address is
23657   actually on the stack, at the stack pointer.  */
23658static void
23659thumb_exit (FILE *f, int reg_containing_return_addr)
23660{
23661  unsigned regs_available_for_popping;
23662  unsigned regs_to_pop;
23663  int pops_needed;
23664  unsigned available;
23665  unsigned required;
23666  machine_mode mode;
23667  int size;
23668  int restore_a4 = FALSE;
23669
23670  /* Compute the registers we need to pop.  */
23671  regs_to_pop = 0;
23672  pops_needed = 0;
23673
23674  if (reg_containing_return_addr == -1)
23675    {
23676      regs_to_pop |= 1 << LR_REGNUM;
23677      ++pops_needed;
23678    }
23679
23680  if (TARGET_BACKTRACE)
23681    {
23682      /* Restore the (ARM) frame pointer and stack pointer.  */
23683      regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
23684      pops_needed += 2;
23685    }
23686
23687  /* If there is nothing to pop then just emit the BX instruction and
23688     return.  */
23689  if (pops_needed == 0)
23690    {
23691      if (crtl->calls_eh_return)
23692	asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
23693
23694      asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
23695      return;
23696    }
23697  /* Otherwise if we are not supporting interworking and we have not created
23698     a backtrace structure and the function was not entered in ARM mode then
23699     just pop the return address straight into the PC.  */
23700  else if (!TARGET_INTERWORK
23701	   && !TARGET_BACKTRACE
23702	   && !is_called_in_ARM_mode (current_function_decl)
23703	   && !crtl->calls_eh_return)
23704    {
23705      asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
23706      return;
23707    }
23708
23709  /* Find out how many of the (return) argument registers we can corrupt.  */
23710  regs_available_for_popping = 0;
23711
23712  /* If returning via __builtin_eh_return, the bottom three registers
23713     all contain information needed for the return.  */
23714  if (crtl->calls_eh_return)
23715    size = 12;
23716  else
23717    {
23718      /* If we can deduce the registers used from the function's
23719	 return value.  This is more reliable that examining
23720	 df_regs_ever_live_p () because that will be set if the register is
23721	 ever used in the function, not just if the register is used
23722	 to hold a return value.  */
23723
23724      if (crtl->return_rtx != 0)
23725	mode = GET_MODE (crtl->return_rtx);
23726      else
23727	mode = DECL_MODE (DECL_RESULT (current_function_decl));
23728
23729      size = GET_MODE_SIZE (mode);
23730
23731      if (size == 0)
23732	{
23733	  /* In a void function we can use any argument register.
23734	     In a function that returns a structure on the stack
23735	     we can use the second and third argument registers.  */
23736	  if (mode == VOIDmode)
23737	    regs_available_for_popping =
23738	      (1 << ARG_REGISTER (1))
23739	      | (1 << ARG_REGISTER (2))
23740	      | (1 << ARG_REGISTER (3));
23741	  else
23742	    regs_available_for_popping =
23743	      (1 << ARG_REGISTER (2))
23744	      | (1 << ARG_REGISTER (3));
23745	}
23746      else if (size <= 4)
23747	regs_available_for_popping =
23748	  (1 << ARG_REGISTER (2))
23749	  | (1 << ARG_REGISTER (3));
23750      else if (size <= 8)
23751	regs_available_for_popping =
23752	  (1 << ARG_REGISTER (3));
23753    }
23754
23755  /* Match registers to be popped with registers into which we pop them.  */
23756  for (available = regs_available_for_popping,
23757       required  = regs_to_pop;
23758       required != 0 && available != 0;
23759       available &= ~(available & - available),
23760       required  &= ~(required  & - required))
23761    -- pops_needed;
23762
23763  /* If we have any popping registers left over, remove them.  */
23764  if (available > 0)
23765    regs_available_for_popping &= ~available;
23766
23767  /* Otherwise if we need another popping register we can use
23768     the fourth argument register.  */
23769  else if (pops_needed)
23770    {
23771      /* If we have not found any free argument registers and
23772	 reg a4 contains the return address, we must move it.  */
23773      if (regs_available_for_popping == 0
23774	  && reg_containing_return_addr == LAST_ARG_REGNUM)
23775	{
23776	  asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
23777	  reg_containing_return_addr = LR_REGNUM;
23778	}
23779      else if (size > 12)
23780	{
23781	  /* Register a4 is being used to hold part of the return value,
23782	     but we have dire need of a free, low register.  */
23783	  restore_a4 = TRUE;
23784
23785	  asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
23786	}
23787
23788      if (reg_containing_return_addr != LAST_ARG_REGNUM)
23789	{
23790	  /* The fourth argument register is available.  */
23791	  regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
23792
23793	  --pops_needed;
23794	}
23795    }
23796
23797  /* Pop as many registers as we can.  */
23798  thumb_pop (f, regs_available_for_popping);
23799
23800  /* Process the registers we popped.  */
23801  if (reg_containing_return_addr == -1)
23802    {
23803      /* The return address was popped into the lowest numbered register.  */
23804      regs_to_pop &= ~(1 << LR_REGNUM);
23805
23806      reg_containing_return_addr =
23807	number_of_first_bit_set (regs_available_for_popping);
23808
23809      /* Remove this register for the mask of available registers, so that
23810         the return address will not be corrupted by further pops.  */
23811      regs_available_for_popping &= ~(1 << reg_containing_return_addr);
23812    }
23813
23814  /* If we popped other registers then handle them here.  */
23815  if (regs_available_for_popping)
23816    {
23817      int frame_pointer;
23818
23819      /* Work out which register currently contains the frame pointer.  */
23820      frame_pointer = number_of_first_bit_set (regs_available_for_popping);
23821
23822      /* Move it into the correct place.  */
23823      asm_fprintf (f, "\tmov\t%r, %r\n",
23824		   ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
23825
23826      /* (Temporarily) remove it from the mask of popped registers.  */
23827      regs_available_for_popping &= ~(1 << frame_pointer);
23828      regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
23829
23830      if (regs_available_for_popping)
23831	{
23832	  int stack_pointer;
23833
23834	  /* We popped the stack pointer as well,
23835	     find the register that contains it.  */
23836	  stack_pointer = number_of_first_bit_set (regs_available_for_popping);
23837
23838	  /* Move it into the stack register.  */
23839	  asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
23840
23841	  /* At this point we have popped all necessary registers, so
23842	     do not worry about restoring regs_available_for_popping
23843	     to its correct value:
23844
23845	     assert (pops_needed == 0)
23846	     assert (regs_available_for_popping == (1 << frame_pointer))
23847	     assert (regs_to_pop == (1 << STACK_POINTER))  */
23848	}
23849      else
23850	{
23851	  /* Since we have just move the popped value into the frame
23852	     pointer, the popping register is available for reuse, and
23853	     we know that we still have the stack pointer left to pop.  */
23854	  regs_available_for_popping |= (1 << frame_pointer);
23855	}
23856    }
23857
23858  /* If we still have registers left on the stack, but we no longer have
23859     any registers into which we can pop them, then we must move the return
23860     address into the link register and make available the register that
23861     contained it.  */
23862  if (regs_available_for_popping == 0 && pops_needed > 0)
23863    {
23864      regs_available_for_popping |= 1 << reg_containing_return_addr;
23865
23866      asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
23867		   reg_containing_return_addr);
23868
23869      reg_containing_return_addr = LR_REGNUM;
23870    }
23871
23872  /* If we have registers left on the stack then pop some more.
23873     We know that at most we will want to pop FP and SP.  */
23874  if (pops_needed > 0)
23875    {
23876      int  popped_into;
23877      int  move_to;
23878
23879      thumb_pop (f, regs_available_for_popping);
23880
23881      /* We have popped either FP or SP.
23882	 Move whichever one it is into the correct register.  */
23883      popped_into = number_of_first_bit_set (regs_available_for_popping);
23884      move_to     = number_of_first_bit_set (regs_to_pop);
23885
23886      asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
23887
23888      regs_to_pop &= ~(1 << move_to);
23889
23890      --pops_needed;
23891    }
23892
23893  /* If we still have not popped everything then we must have only
23894     had one register available to us and we are now popping the SP.  */
23895  if (pops_needed > 0)
23896    {
23897      int  popped_into;
23898
23899      thumb_pop (f, regs_available_for_popping);
23900
23901      popped_into = number_of_first_bit_set (regs_available_for_popping);
23902
23903      asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
23904      /*
23905	assert (regs_to_pop == (1 << STACK_POINTER))
23906	assert (pops_needed == 1)
23907      */
23908    }
23909
23910  /* If necessary restore the a4 register.  */
23911  if (restore_a4)
23912    {
23913      if (reg_containing_return_addr != LR_REGNUM)
23914	{
23915	  asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
23916	  reg_containing_return_addr = LR_REGNUM;
23917	}
23918
23919      asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
23920    }
23921
23922  if (crtl->calls_eh_return)
23923    asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
23924
23925  /* Return to caller.  */
23926  asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
23927}
23928
23929/* Scan INSN just before assembler is output for it.
23930   For Thumb-1, we track the status of the condition codes; this
23931   information is used in the cbranchsi4_insn pattern.  */
23932void
23933thumb1_final_prescan_insn (rtx_insn *insn)
23934{
23935  if (flag_print_asm_name)
23936    asm_fprintf (asm_out_file, "%@ 0x%04x\n",
23937		 INSN_ADDRESSES (INSN_UID (insn)));
23938  /* Don't overwrite the previous setter when we get to a cbranch.  */
23939  if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
23940    {
23941      enum attr_conds conds;
23942
23943      if (cfun->machine->thumb1_cc_insn)
23944	{
23945	  if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
23946	      || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
23947	    CC_STATUS_INIT;
23948	}
23949      conds = get_attr_conds (insn);
23950      if (conds == CONDS_SET)
23951	{
23952	  rtx set = single_set (insn);
23953	  cfun->machine->thumb1_cc_insn = insn;
23954	  cfun->machine->thumb1_cc_op0 = SET_DEST (set);
23955	  cfun->machine->thumb1_cc_op1 = const0_rtx;
23956	  cfun->machine->thumb1_cc_mode = CC_NOOVmode;
23957	  if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
23958	    {
23959	      rtx src1 = XEXP (SET_SRC (set), 1);
23960	      if (src1 == const0_rtx)
23961		cfun->machine->thumb1_cc_mode = CCmode;
23962	    }
23963	  else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
23964	    {
23965	      /* Record the src register operand instead of dest because
23966		 cprop_hardreg pass propagates src.  */
23967	      cfun->machine->thumb1_cc_op0 = SET_SRC (set);
23968	    }
23969	}
23970      else if (conds != CONDS_NOCOND)
23971	cfun->machine->thumb1_cc_insn = NULL_RTX;
23972    }
23973
23974    /* Check if unexpected far jump is used.  */
23975    if (cfun->machine->lr_save_eliminated
23976        && get_attr_far_jump (insn) == FAR_JUMP_YES)
23977      internal_error("Unexpected thumb1 far jump");
23978}
23979
23980int
23981thumb_shiftable_const (unsigned HOST_WIDE_INT val)
23982{
23983  unsigned HOST_WIDE_INT mask = 0xff;
23984  int i;
23985
23986  val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
23987  if (val == 0) /* XXX */
23988    return 0;
23989
23990  for (i = 0; i < 25; i++)
23991    if ((val & (mask << i)) == val)
23992      return 1;
23993
23994  return 0;
23995}
23996
23997/* Returns nonzero if the current function contains,
23998   or might contain a far jump.  */
23999static int
24000thumb_far_jump_used_p (void)
24001{
24002  rtx_insn *insn;
24003  bool far_jump = false;
24004  unsigned int func_size = 0;
24005
24006  /* This test is only important for leaf functions.  */
24007  /* assert (!leaf_function_p ()); */
24008
24009  /* If we have already decided that far jumps may be used,
24010     do not bother checking again, and always return true even if
24011     it turns out that they are not being used.  Once we have made
24012     the decision that far jumps are present (and that hence the link
24013     register will be pushed onto the stack) we cannot go back on it.  */
24014  if (cfun->machine->far_jump_used)
24015    return 1;
24016
24017  /* If this function is not being called from the prologue/epilogue
24018     generation code then it must be being called from the
24019     INITIAL_ELIMINATION_OFFSET macro.  */
24020  if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
24021    {
24022      /* In this case we know that we are being asked about the elimination
24023	 of the arg pointer register.  If that register is not being used,
24024	 then there are no arguments on the stack, and we do not have to
24025	 worry that a far jump might force the prologue to push the link
24026	 register, changing the stack offsets.  In this case we can just
24027	 return false, since the presence of far jumps in the function will
24028	 not affect stack offsets.
24029
24030	 If the arg pointer is live (or if it was live, but has now been
24031	 eliminated and so set to dead) then we do have to test to see if
24032	 the function might contain a far jump.  This test can lead to some
24033	 false negatives, since before reload is completed, then length of
24034	 branch instructions is not known, so gcc defaults to returning their
24035	 longest length, which in turn sets the far jump attribute to true.
24036
24037	 A false negative will not result in bad code being generated, but it
24038	 will result in a needless push and pop of the link register.  We
24039	 hope that this does not occur too often.
24040
24041	 If we need doubleword stack alignment this could affect the other
24042	 elimination offsets so we can't risk getting it wrong.  */
24043      if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
24044	cfun->machine->arg_pointer_live = 1;
24045      else if (!cfun->machine->arg_pointer_live)
24046	return 0;
24047    }
24048
24049  /* We should not change far_jump_used during or after reload, as there is
24050     no chance to change stack frame layout.  */
24051  if (reload_in_progress || reload_completed)
24052    return 0;
24053
24054  /* Check to see if the function contains a branch
24055     insn with the far jump attribute set.  */
24056  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
24057    {
24058      if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
24059	{
24060	  far_jump = true;
24061	}
24062      func_size += get_attr_length (insn);
24063    }
24064
24065  /* Attribute far_jump will always be true for thumb1 before
24066     shorten_branch pass.  So checking far_jump attribute before
24067     shorten_branch isn't much useful.
24068
24069     Following heuristic tries to estimate more accurately if a far jump
24070     may finally be used.  The heuristic is very conservative as there is
24071     no chance to roll-back the decision of not to use far jump.
24072
24073     Thumb1 long branch offset is -2048 to 2046.  The worst case is each
24074     2-byte insn is associated with a 4 byte constant pool.  Using
24075     function size 2048/3 as the threshold is conservative enough.  */
24076  if (far_jump)
24077    {
24078      if ((func_size * 3) >= 2048)
24079        {
24080	  /* Record the fact that we have decided that
24081	     the function does use far jumps.  */
24082	  cfun->machine->far_jump_used = 1;
24083	  return 1;
24084	}
24085    }
24086
24087  return 0;
24088}
24089
24090/* Return nonzero if FUNC must be entered in ARM mode.  */
24091int
24092is_called_in_ARM_mode (tree func)
24093{
24094  gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
24095
24096  /* Ignore the problem about functions whose address is taken.  */
24097  if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
24098    return TRUE;
24099
24100#ifdef ARM_PE
24101  return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
24102#else
24103  return FALSE;
24104#endif
24105}
24106
24107/* Given the stack offsets and register mask in OFFSETS, decide how
24108   many additional registers to push instead of subtracting a constant
24109   from SP.  For epilogues the principle is the same except we use pop.
24110   FOR_PROLOGUE indicates which we're generating.  */
24111static int
24112thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
24113{
24114  HOST_WIDE_INT amount;
24115  unsigned long live_regs_mask = offsets->saved_regs_mask;
24116  /* Extract a mask of the ones we can give to the Thumb's push/pop
24117     instruction.  */
24118  unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
24119  /* Then count how many other high registers will need to be pushed.  */
24120  unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24121  int n_free, reg_base, size;
24122
24123  if (!for_prologue && frame_pointer_needed)
24124    amount = offsets->locals_base - offsets->saved_regs;
24125  else
24126    amount = offsets->outgoing_args - offsets->saved_regs;
24127
24128  /* If the stack frame size is 512 exactly, we can save one load
24129     instruction, which should make this a win even when optimizing
24130     for speed.  */
24131  if (!optimize_size && amount != 512)
24132    return 0;
24133
24134  /* Can't do this if there are high registers to push.  */
24135  if (high_regs_pushed != 0)
24136    return 0;
24137
24138  /* Shouldn't do it in the prologue if no registers would normally
24139     be pushed at all.  In the epilogue, also allow it if we'll have
24140     a pop insn for the PC.  */
24141  if  (l_mask == 0
24142       && (for_prologue
24143	   || TARGET_BACKTRACE
24144	   || (live_regs_mask & 1 << LR_REGNUM) == 0
24145	   || TARGET_INTERWORK
24146	   || crtl->args.pretend_args_size != 0))
24147    return 0;
24148
24149  /* Don't do this if thumb_expand_prologue wants to emit instructions
24150     between the push and the stack frame allocation.  */
24151  if (for_prologue
24152      && ((flag_pic && arm_pic_register != INVALID_REGNUM)
24153	  || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
24154    return 0;
24155
24156  reg_base = 0;
24157  n_free = 0;
24158  if (!for_prologue)
24159    {
24160      size = arm_size_return_regs ();
24161      reg_base = ARM_NUM_INTS (size);
24162      live_regs_mask >>= reg_base;
24163    }
24164
24165  while (reg_base + n_free < 8 && !(live_regs_mask & 1)
24166	 && (for_prologue || call_used_regs[reg_base + n_free]))
24167    {
24168      live_regs_mask >>= 1;
24169      n_free++;
24170    }
24171
24172  if (n_free == 0)
24173    return 0;
24174  gcc_assert (amount / 4 * 4 == amount);
24175
24176  if (amount >= 512 && (amount - n_free * 4) < 512)
24177    return (amount - 508) / 4;
24178  if (amount <= n_free * 4)
24179    return amount / 4;
24180  return 0;
24181}
24182
24183/* The bits which aren't usefully expanded as rtl.  */
24184const char *
24185thumb1_unexpanded_epilogue (void)
24186{
24187  arm_stack_offsets *offsets;
24188  int regno;
24189  unsigned long live_regs_mask = 0;
24190  int high_regs_pushed = 0;
24191  int extra_pop;
24192  int had_to_push_lr;
24193  int size;
24194
24195  if (cfun->machine->return_used_this_function != 0)
24196    return "";
24197
24198  if (IS_NAKED (arm_current_func_type ()))
24199    return "";
24200
24201  offsets = arm_get_frame_offsets ();
24202  live_regs_mask = offsets->saved_regs_mask;
24203  high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24204
24205  /* If we can deduce the registers used from the function's return value.
24206     This is more reliable that examining df_regs_ever_live_p () because that
24207     will be set if the register is ever used in the function, not just if
24208     the register is used to hold a return value.  */
24209  size = arm_size_return_regs ();
24210
24211  extra_pop = thumb1_extra_regs_pushed (offsets, false);
24212  if (extra_pop > 0)
24213    {
24214      unsigned long extra_mask = (1 << extra_pop) - 1;
24215      live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
24216    }
24217
24218  /* The prolog may have pushed some high registers to use as
24219     work registers.  e.g. the testsuite file:
24220     gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
24221     compiles to produce:
24222	push	{r4, r5, r6, r7, lr}
24223	mov	r7, r9
24224	mov	r6, r8
24225	push	{r6, r7}
24226     as part of the prolog.  We have to undo that pushing here.  */
24227
24228  if (high_regs_pushed)
24229    {
24230      unsigned long mask = live_regs_mask & 0xff;
24231      int next_hi_reg;
24232
24233      /* The available low registers depend on the size of the value we are
24234         returning.  */
24235      if (size <= 12)
24236	mask |=  1 << 3;
24237      if (size <= 8)
24238	mask |= 1 << 2;
24239
24240      if (mask == 0)
24241	/* Oh dear!  We have no low registers into which we can pop
24242           high registers!  */
24243	internal_error
24244	  ("no low registers available for popping high registers");
24245
24246      for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
24247	if (live_regs_mask & (1 << next_hi_reg))
24248	  break;
24249
24250      while (high_regs_pushed)
24251	{
24252	  /* Find lo register(s) into which the high register(s) can
24253             be popped.  */
24254	  for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24255	    {
24256	      if (mask & (1 << regno))
24257		high_regs_pushed--;
24258	      if (high_regs_pushed == 0)
24259		break;
24260	    }
24261
24262	  mask &= (2 << regno) - 1;	/* A noop if regno == 8 */
24263
24264	  /* Pop the values into the low register(s).  */
24265	  thumb_pop (asm_out_file, mask);
24266
24267	  /* Move the value(s) into the high registers.  */
24268	  for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24269	    {
24270	      if (mask & (1 << regno))
24271		{
24272		  asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
24273			       regno);
24274
24275		  for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
24276		    if (live_regs_mask & (1 << next_hi_reg))
24277		      break;
24278		}
24279	    }
24280	}
24281      live_regs_mask &= ~0x0f00;
24282    }
24283
24284  had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
24285  live_regs_mask &= 0xff;
24286
24287  if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
24288    {
24289      /* Pop the return address into the PC.  */
24290      if (had_to_push_lr)
24291	live_regs_mask |= 1 << PC_REGNUM;
24292
24293      /* Either no argument registers were pushed or a backtrace
24294	 structure was created which includes an adjusted stack
24295	 pointer, so just pop everything.  */
24296      if (live_regs_mask)
24297	thumb_pop (asm_out_file, live_regs_mask);
24298
24299      /* We have either just popped the return address into the
24300	 PC or it is was kept in LR for the entire function.
24301	 Note that thumb_pop has already called thumb_exit if the
24302	 PC was in the list.  */
24303      if (!had_to_push_lr)
24304	thumb_exit (asm_out_file, LR_REGNUM);
24305    }
24306  else
24307    {
24308      /* Pop everything but the return address.  */
24309      if (live_regs_mask)
24310	thumb_pop (asm_out_file, live_regs_mask);
24311
24312      if (had_to_push_lr)
24313	{
24314	  if (size > 12)
24315	    {
24316	      /* We have no free low regs, so save one.  */
24317	      asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
24318			   LAST_ARG_REGNUM);
24319	    }
24320
24321	  /* Get the return address into a temporary register.  */
24322	  thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
24323
24324	  if (size > 12)
24325	    {
24326	      /* Move the return address to lr.  */
24327	      asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
24328			   LAST_ARG_REGNUM);
24329	      /* Restore the low register.  */
24330	      asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
24331			   IP_REGNUM);
24332	      regno = LR_REGNUM;
24333	    }
24334	  else
24335	    regno = LAST_ARG_REGNUM;
24336	}
24337      else
24338	regno = LR_REGNUM;
24339
24340      /* Remove the argument registers that were pushed onto the stack.  */
24341      asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
24342		   SP_REGNUM, SP_REGNUM,
24343		   crtl->args.pretend_args_size);
24344
24345      thumb_exit (asm_out_file, regno);
24346    }
24347
24348  return "";
24349}
24350
24351/* Functions to save and restore machine-specific function data.  */
24352static struct machine_function *
24353arm_init_machine_status (void)
24354{
24355  struct machine_function *machine;
24356  machine = ggc_cleared_alloc<machine_function> ();
24357
24358#if ARM_FT_UNKNOWN != 0
24359  machine->func_type = ARM_FT_UNKNOWN;
24360#endif
24361  return machine;
24362}
24363
24364/* Return an RTX indicating where the return address to the
24365   calling function can be found.  */
24366rtx
24367arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
24368{
24369  if (count != 0)
24370    return NULL_RTX;
24371
24372  return get_hard_reg_initial_val (Pmode, LR_REGNUM);
24373}
24374
24375/* Do anything needed before RTL is emitted for each function.  */
24376void
24377arm_init_expanders (void)
24378{
24379  /* Arrange to initialize and mark the machine per-function status.  */
24380  init_machine_status = arm_init_machine_status;
24381
24382  /* This is to stop the combine pass optimizing away the alignment
24383     adjustment of va_arg.  */
24384  /* ??? It is claimed that this should not be necessary.  */
24385  if (cfun)
24386    mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
24387}
24388
24389
24390/* Like arm_compute_initial_elimination offset.  Simpler because there
24391   isn't an ABI specified frame pointer for Thumb.  Instead, we set it
24392   to point at the base of the local variables after static stack
24393   space for a function has been allocated.  */
24394
24395HOST_WIDE_INT
24396thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
24397{
24398  arm_stack_offsets *offsets;
24399
24400  offsets = arm_get_frame_offsets ();
24401
24402  switch (from)
24403    {
24404    case ARG_POINTER_REGNUM:
24405      switch (to)
24406	{
24407	case STACK_POINTER_REGNUM:
24408	  return offsets->outgoing_args - offsets->saved_args;
24409
24410	case FRAME_POINTER_REGNUM:
24411	  return offsets->soft_frame - offsets->saved_args;
24412
24413	case ARM_HARD_FRAME_POINTER_REGNUM:
24414	  return offsets->saved_regs - offsets->saved_args;
24415
24416	case THUMB_HARD_FRAME_POINTER_REGNUM:
24417	  return offsets->locals_base - offsets->saved_args;
24418
24419	default:
24420	  gcc_unreachable ();
24421	}
24422      break;
24423
24424    case FRAME_POINTER_REGNUM:
24425      switch (to)
24426	{
24427	case STACK_POINTER_REGNUM:
24428	  return offsets->outgoing_args - offsets->soft_frame;
24429
24430	case ARM_HARD_FRAME_POINTER_REGNUM:
24431	  return offsets->saved_regs - offsets->soft_frame;
24432
24433	case THUMB_HARD_FRAME_POINTER_REGNUM:
24434	  return offsets->locals_base - offsets->soft_frame;
24435
24436	default:
24437	  gcc_unreachable ();
24438	}
24439      break;
24440
24441    default:
24442      gcc_unreachable ();
24443    }
24444}
24445
24446/* Generate the function's prologue.  */
24447
24448void
24449thumb1_expand_prologue (void)
24450{
24451  rtx_insn *insn;
24452
24453  HOST_WIDE_INT amount;
24454  arm_stack_offsets *offsets;
24455  unsigned long func_type;
24456  int regno;
24457  unsigned long live_regs_mask;
24458  unsigned long l_mask;
24459  unsigned high_regs_pushed = 0;
24460
24461  func_type = arm_current_func_type ();
24462
24463  /* Naked functions don't have prologues.  */
24464  if (IS_NAKED (func_type))
24465    {
24466      if (flag_stack_usage_info)
24467	current_function_static_stack_size = 0;
24468      return;
24469    }
24470
24471  if (IS_INTERRUPT (func_type))
24472    {
24473      error ("interrupt Service Routines cannot be coded in Thumb mode");
24474      return;
24475    }
24476
24477  if (is_called_in_ARM_mode (current_function_decl))
24478    emit_insn (gen_prologue_thumb1_interwork ());
24479
24480  offsets = arm_get_frame_offsets ();
24481  live_regs_mask = offsets->saved_regs_mask;
24482
24483  /* Extract a mask of the ones we can give to the Thumb's push instruction.  */
24484  l_mask = live_regs_mask & 0x40ff;
24485  /* Then count how many other high registers will need to be pushed.  */
24486  high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24487
24488  if (crtl->args.pretend_args_size)
24489    {
24490      rtx x = GEN_INT (-crtl->args.pretend_args_size);
24491
24492      if (cfun->machine->uses_anonymous_args)
24493	{
24494	  int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
24495	  unsigned long mask;
24496
24497	  mask = 1ul << (LAST_ARG_REGNUM + 1);
24498	  mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
24499
24500	  insn = thumb1_emit_multi_reg_push (mask, 0);
24501	}
24502      else
24503	{
24504	  insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24505					stack_pointer_rtx, x));
24506	}
24507      RTX_FRAME_RELATED_P (insn) = 1;
24508    }
24509
24510  if (TARGET_BACKTRACE)
24511    {
24512      HOST_WIDE_INT offset = 0;
24513      unsigned work_register;
24514      rtx work_reg, x, arm_hfp_rtx;
24515
24516      /* We have been asked to create a stack backtrace structure.
24517         The code looks like this:
24518
24519	 0   .align 2
24520	 0   func:
24521         0     sub   SP, #16         Reserve space for 4 registers.
24522	 2     push  {R7}            Push low registers.
24523         4     add   R7, SP, #20     Get the stack pointer before the push.
24524         6     str   R7, [SP, #8]    Store the stack pointer
24525					(before reserving the space).
24526         8     mov   R7, PC          Get hold of the start of this code + 12.
24527        10     str   R7, [SP, #16]   Store it.
24528        12     mov   R7, FP          Get hold of the current frame pointer.
24529        14     str   R7, [SP, #4]    Store it.
24530        16     mov   R7, LR          Get hold of the current return address.
24531        18     str   R7, [SP, #12]   Store it.
24532        20     add   R7, SP, #16     Point at the start of the
24533					backtrace structure.
24534        22     mov   FP, R7          Put this value into the frame pointer.  */
24535
24536      work_register = thumb_find_work_register (live_regs_mask);
24537      work_reg = gen_rtx_REG (SImode, work_register);
24538      arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
24539
24540      insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24541				    stack_pointer_rtx, GEN_INT (-16)));
24542      RTX_FRAME_RELATED_P (insn) = 1;
24543
24544      if (l_mask)
24545	{
24546	  insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
24547	  RTX_FRAME_RELATED_P (insn) = 1;
24548
24549	  offset = bit_count (l_mask) * UNITS_PER_WORD;
24550	}
24551
24552      x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
24553      emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
24554
24555      x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
24556      x = gen_frame_mem (SImode, x);
24557      emit_move_insn (x, work_reg);
24558
24559      /* Make sure that the instruction fetching the PC is in the right place
24560	 to calculate "start of backtrace creation code + 12".  */
24561      /* ??? The stores using the common WORK_REG ought to be enough to
24562	 prevent the scheduler from doing anything weird.  Failing that
24563	 we could always move all of the following into an UNSPEC_VOLATILE.  */
24564      if (l_mask)
24565	{
24566	  x = gen_rtx_REG (SImode, PC_REGNUM);
24567	  emit_move_insn (work_reg, x);
24568
24569	  x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
24570	  x = gen_frame_mem (SImode, x);
24571	  emit_move_insn (x, work_reg);
24572
24573	  emit_move_insn (work_reg, arm_hfp_rtx);
24574
24575	  x = plus_constant (Pmode, stack_pointer_rtx, offset);
24576	  x = gen_frame_mem (SImode, x);
24577	  emit_move_insn (x, work_reg);
24578	}
24579      else
24580	{
24581	  emit_move_insn (work_reg, arm_hfp_rtx);
24582
24583	  x = plus_constant (Pmode, stack_pointer_rtx, offset);
24584	  x = gen_frame_mem (SImode, x);
24585	  emit_move_insn (x, work_reg);
24586
24587	  x = gen_rtx_REG (SImode, PC_REGNUM);
24588	  emit_move_insn (work_reg, x);
24589
24590	  x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
24591	  x = gen_frame_mem (SImode, x);
24592	  emit_move_insn (x, work_reg);
24593	}
24594
24595      x = gen_rtx_REG (SImode, LR_REGNUM);
24596      emit_move_insn (work_reg, x);
24597
24598      x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
24599      x = gen_frame_mem (SImode, x);
24600      emit_move_insn (x, work_reg);
24601
24602      x = GEN_INT (offset + 12);
24603      emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
24604
24605      emit_move_insn (arm_hfp_rtx, work_reg);
24606    }
24607  /* Optimization:  If we are not pushing any low registers but we are going
24608     to push some high registers then delay our first push.  This will just
24609     be a push of LR and we can combine it with the push of the first high
24610     register.  */
24611  else if ((l_mask & 0xff) != 0
24612	   || (high_regs_pushed == 0 && l_mask))
24613    {
24614      unsigned long mask = l_mask;
24615      mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
24616      insn = thumb1_emit_multi_reg_push (mask, mask);
24617      RTX_FRAME_RELATED_P (insn) = 1;
24618    }
24619
24620  if (high_regs_pushed)
24621    {
24622      unsigned pushable_regs;
24623      unsigned next_hi_reg;
24624      unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
24625						 : crtl->args.info.nregs;
24626      unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
24627
24628      for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
24629	if (live_regs_mask & (1 << next_hi_reg))
24630	  break;
24631
24632      /* Here we need to mask out registers used for passing arguments
24633	 even if they can be pushed.  This is to avoid using them to stash the high
24634	 registers.  Such kind of stash may clobber the use of arguments.  */
24635      pushable_regs = l_mask & (~arg_regs_mask) & 0xff;
24636
24637      if (pushable_regs == 0)
24638	pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
24639
24640      while (high_regs_pushed > 0)
24641	{
24642	  unsigned long real_regs_mask = 0;
24643
24644	  for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
24645	    {
24646	      if (pushable_regs & (1 << regno))
24647		{
24648		  emit_move_insn (gen_rtx_REG (SImode, regno),
24649				  gen_rtx_REG (SImode, next_hi_reg));
24650
24651		  high_regs_pushed --;
24652		  real_regs_mask |= (1 << next_hi_reg);
24653
24654		  if (high_regs_pushed)
24655		    {
24656		      for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
24657			   next_hi_reg --)
24658			if (live_regs_mask & (1 << next_hi_reg))
24659			  break;
24660		    }
24661		  else
24662		    {
24663		      pushable_regs &= ~((1 << regno) - 1);
24664		      break;
24665		    }
24666		}
24667	    }
24668
24669	  /* If we had to find a work register and we have not yet
24670	     saved the LR then add it to the list of regs to push.  */
24671	  if (l_mask == (1 << LR_REGNUM))
24672	    {
24673	      pushable_regs |= l_mask;
24674	      real_regs_mask |= l_mask;
24675	      l_mask = 0;
24676	    }
24677
24678	  insn = thumb1_emit_multi_reg_push (pushable_regs, real_regs_mask);
24679	  RTX_FRAME_RELATED_P (insn) = 1;
24680	}
24681    }
24682
24683  /* Load the pic register before setting the frame pointer,
24684     so we can use r7 as a temporary work register.  */
24685  if (flag_pic && arm_pic_register != INVALID_REGNUM)
24686    arm_load_pic_register (live_regs_mask);
24687
24688  if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
24689    emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
24690		    stack_pointer_rtx);
24691
24692  if (flag_stack_usage_info)
24693    current_function_static_stack_size
24694      = offsets->outgoing_args - offsets->saved_args;
24695
24696  amount = offsets->outgoing_args - offsets->saved_regs;
24697  amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
24698  if (amount)
24699    {
24700      if (amount < 512)
24701	{
24702	  insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
24703					GEN_INT (- amount)));
24704	  RTX_FRAME_RELATED_P (insn) = 1;
24705	}
24706      else
24707	{
24708	  rtx reg, dwarf;
24709
24710	  /* The stack decrement is too big for an immediate value in a single
24711	     insn.  In theory we could issue multiple subtracts, but after
24712	     three of them it becomes more space efficient to place the full
24713	     value in the constant pool and load into a register.  (Also the
24714	     ARM debugger really likes to see only one stack decrement per
24715	     function).  So instead we look for a scratch register into which
24716	     we can load the decrement, and then we subtract this from the
24717	     stack pointer.  Unfortunately on the thumb the only available
24718	     scratch registers are the argument registers, and we cannot use
24719	     these as they may hold arguments to the function.  Instead we
24720	     attempt to locate a call preserved register which is used by this
24721	     function.  If we can find one, then we know that it will have
24722	     been pushed at the start of the prologue and so we can corrupt
24723	     it now.  */
24724	  for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
24725	    if (live_regs_mask & (1 << regno))
24726	      break;
24727
24728	  gcc_assert(regno <= LAST_LO_REGNUM);
24729
24730	  reg = gen_rtx_REG (SImode, regno);
24731
24732	  emit_insn (gen_movsi (reg, GEN_INT (- amount)));
24733
24734	  insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24735					stack_pointer_rtx, reg));
24736
24737	  dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
24738			       plus_constant (Pmode, stack_pointer_rtx,
24739					      -amount));
24740	  add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
24741	  RTX_FRAME_RELATED_P (insn) = 1;
24742	}
24743    }
24744
24745  if (frame_pointer_needed)
24746    thumb_set_frame_pointer (offsets);
24747
24748  /* If we are profiling, make sure no instructions are scheduled before
24749     the call to mcount.  Similarly if the user has requested no
24750     scheduling in the prolog.  Similarly if we want non-call exceptions
24751     using the EABI unwinder, to prevent faulting instructions from being
24752     swapped with a stack adjustment.  */
24753  if (crtl->profile || !TARGET_SCHED_PROLOG
24754      || (arm_except_unwind_info (&global_options) == UI_TARGET
24755	  && cfun->can_throw_non_call_exceptions))
24756    emit_insn (gen_blockage ());
24757
24758  cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
24759  if (live_regs_mask & 0xff)
24760    cfun->machine->lr_save_eliminated = 0;
24761}
24762
24763/* Generate pattern *pop_multiple_with_stack_update_and_return if single
24764   POP instruction can be generated.  LR should be replaced by PC.  All
24765   the checks required are already done by  USE_RETURN_INSN ().  Hence,
24766   all we really need to check here is if single register is to be
24767   returned, or multiple register return.  */
24768void
24769thumb2_expand_return (bool simple_return)
24770{
24771  int i, num_regs;
24772  unsigned long saved_regs_mask;
24773  arm_stack_offsets *offsets;
24774
24775  offsets = arm_get_frame_offsets ();
24776  saved_regs_mask = offsets->saved_regs_mask;
24777
24778  for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
24779    if (saved_regs_mask & (1 << i))
24780      num_regs++;
24781
24782  if (!simple_return && saved_regs_mask)
24783    {
24784      if (num_regs == 1)
24785        {
24786          rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
24787          rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
24788          rtx addr = gen_rtx_MEM (SImode,
24789                                  gen_rtx_POST_INC (SImode,
24790                                                    stack_pointer_rtx));
24791          set_mem_alias_set (addr, get_frame_alias_set ());
24792          XVECEXP (par, 0, 0) = ret_rtx;
24793          XVECEXP (par, 0, 1) = gen_rtx_SET (SImode, reg, addr);
24794          RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
24795          emit_jump_insn (par);
24796        }
24797      else
24798        {
24799          saved_regs_mask &= ~ (1 << LR_REGNUM);
24800          saved_regs_mask |=   (1 << PC_REGNUM);
24801          arm_emit_multi_reg_pop (saved_regs_mask);
24802        }
24803    }
24804  else
24805    {
24806      emit_jump_insn (simple_return_rtx);
24807    }
24808}
24809
24810void
24811thumb1_expand_epilogue (void)
24812{
24813  HOST_WIDE_INT amount;
24814  arm_stack_offsets *offsets;
24815  int regno;
24816
24817  /* Naked functions don't have prologues.  */
24818  if (IS_NAKED (arm_current_func_type ()))
24819    return;
24820
24821  offsets = arm_get_frame_offsets ();
24822  amount = offsets->outgoing_args - offsets->saved_regs;
24823
24824  if (frame_pointer_needed)
24825    {
24826      emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
24827      amount = offsets->locals_base - offsets->saved_regs;
24828    }
24829  amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
24830
24831  gcc_assert (amount >= 0);
24832  if (amount)
24833    {
24834      emit_insn (gen_blockage ());
24835
24836      if (amount < 512)
24837	emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
24838			       GEN_INT (amount)));
24839      else
24840	{
24841	  /* r3 is always free in the epilogue.  */
24842	  rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
24843
24844	  emit_insn (gen_movsi (reg, GEN_INT (amount)));
24845	  emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
24846	}
24847    }
24848
24849  /* Emit a USE (stack_pointer_rtx), so that
24850     the stack adjustment will not be deleted.  */
24851  emit_insn (gen_force_register_use (stack_pointer_rtx));
24852
24853  if (crtl->profile || !TARGET_SCHED_PROLOG)
24854    emit_insn (gen_blockage ());
24855
24856  /* Emit a clobber for each insn that will be restored in the epilogue,
24857     so that flow2 will get register lifetimes correct.  */
24858  for (regno = 0; regno < 13; regno++)
24859    if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
24860      emit_clobber (gen_rtx_REG (SImode, regno));
24861
24862  if (! df_regs_ever_live_p (LR_REGNUM))
24863    emit_use (gen_rtx_REG (SImode, LR_REGNUM));
24864}
24865
24866/* Epilogue code for APCS frame.  */
24867static void
24868arm_expand_epilogue_apcs_frame (bool really_return)
24869{
24870  unsigned long func_type;
24871  unsigned long saved_regs_mask;
24872  int num_regs = 0;
24873  int i;
24874  int floats_from_frame = 0;
24875  arm_stack_offsets *offsets;
24876
24877  gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
24878  func_type = arm_current_func_type ();
24879
24880  /* Get frame offsets for ARM.  */
24881  offsets = arm_get_frame_offsets ();
24882  saved_regs_mask = offsets->saved_regs_mask;
24883
24884  /* Find the offset of the floating-point save area in the frame.  */
24885  floats_from_frame
24886    = (offsets->saved_args
24887       + arm_compute_static_chain_stack_bytes ()
24888       - offsets->frame);
24889
24890  /* Compute how many core registers saved and how far away the floats are.  */
24891  for (i = 0; i <= LAST_ARM_REGNUM; i++)
24892    if (saved_regs_mask & (1 << i))
24893      {
24894        num_regs++;
24895        floats_from_frame += 4;
24896      }
24897
24898  if (TARGET_HARD_FLOAT && TARGET_VFP)
24899    {
24900      int start_reg;
24901      rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
24902
24903      /* The offset is from IP_REGNUM.  */
24904      int saved_size = arm_get_vfp_saved_size ();
24905      if (saved_size > 0)
24906        {
24907	  rtx_insn *insn;
24908          floats_from_frame += saved_size;
24909          insn = emit_insn (gen_addsi3 (ip_rtx,
24910					hard_frame_pointer_rtx,
24911					GEN_INT (-floats_from_frame)));
24912	  arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
24913				       ip_rtx, hard_frame_pointer_rtx);
24914        }
24915
24916      /* Generate VFP register multi-pop.  */
24917      start_reg = FIRST_VFP_REGNUM;
24918
24919      for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
24920        /* Look for a case where a reg does not need restoring.  */
24921        if ((!df_regs_ever_live_p (i) || call_used_regs[i])
24922            && (!df_regs_ever_live_p (i + 1)
24923                || call_used_regs[i + 1]))
24924          {
24925            if (start_reg != i)
24926              arm_emit_vfp_multi_reg_pop (start_reg,
24927                                          (i - start_reg) / 2,
24928                                          gen_rtx_REG (SImode,
24929                                                       IP_REGNUM));
24930            start_reg = i + 2;
24931          }
24932
24933      /* Restore the remaining regs that we have discovered (or possibly
24934         even all of them, if the conditional in the for loop never
24935         fired).  */
24936      if (start_reg != i)
24937        arm_emit_vfp_multi_reg_pop (start_reg,
24938                                    (i - start_reg) / 2,
24939                                    gen_rtx_REG (SImode, IP_REGNUM));
24940    }
24941
24942  if (TARGET_IWMMXT)
24943    {
24944      /* The frame pointer is guaranteed to be non-double-word aligned, as
24945         it is set to double-word-aligned old_stack_pointer - 4.  */
24946      rtx_insn *insn;
24947      int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
24948
24949      for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
24950        if (df_regs_ever_live_p (i) && !call_used_regs[i])
24951          {
24952            rtx addr = gen_frame_mem (V2SImode,
24953                                 plus_constant (Pmode, hard_frame_pointer_rtx,
24954                                                - lrm_count * 4));
24955            insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
24956            REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
24957                                               gen_rtx_REG (V2SImode, i),
24958                                               NULL_RTX);
24959            lrm_count += 2;
24960          }
24961    }
24962
24963  /* saved_regs_mask should contain IP which contains old stack pointer
24964     at the time of activation creation.  Since SP and IP are adjacent registers,
24965     we can restore the value directly into SP.  */
24966  gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
24967  saved_regs_mask &= ~(1 << IP_REGNUM);
24968  saved_regs_mask |= (1 << SP_REGNUM);
24969
24970  /* There are two registers left in saved_regs_mask - LR and PC.  We
24971     only need to restore LR (the return address), but to
24972     save time we can load it directly into PC, unless we need a
24973     special function exit sequence, or we are not really returning.  */
24974  if (really_return
24975      && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
24976      && !crtl->calls_eh_return)
24977    /* Delete LR from the register mask, so that LR on
24978       the stack is loaded into the PC in the register mask.  */
24979    saved_regs_mask &= ~(1 << LR_REGNUM);
24980  else
24981    saved_regs_mask &= ~(1 << PC_REGNUM);
24982
24983  num_regs = bit_count (saved_regs_mask);
24984  if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
24985    {
24986      rtx_insn *insn;
24987      emit_insn (gen_blockage ());
24988      /* Unwind the stack to just below the saved registers.  */
24989      insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24990				    hard_frame_pointer_rtx,
24991				    GEN_INT (- 4 * num_regs)));
24992
24993      arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
24994				   stack_pointer_rtx, hard_frame_pointer_rtx);
24995    }
24996
24997  arm_emit_multi_reg_pop (saved_regs_mask);
24998
24999  if (IS_INTERRUPT (func_type))
25000    {
25001      /* Interrupt handlers will have pushed the
25002         IP onto the stack, so restore it now.  */
25003      rtx_insn *insn;
25004      rtx addr = gen_rtx_MEM (SImode,
25005                              gen_rtx_POST_INC (SImode,
25006                              stack_pointer_rtx));
25007      set_mem_alias_set (addr, get_frame_alias_set ());
25008      insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
25009      REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25010                                         gen_rtx_REG (SImode, IP_REGNUM),
25011                                         NULL_RTX);
25012    }
25013
25014  if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
25015    return;
25016
25017  if (crtl->calls_eh_return)
25018    emit_insn (gen_addsi3 (stack_pointer_rtx,
25019			   stack_pointer_rtx,
25020			   gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
25021
25022  if (IS_STACKALIGN (func_type))
25023    /* Restore the original stack pointer.  Before prologue, the stack was
25024       realigned and the original stack pointer saved in r0.  For details,
25025       see comment in arm_expand_prologue.  */
25026    emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
25027
25028  emit_jump_insn (simple_return_rtx);
25029}
25030
25031/* Generate RTL to represent ARM epilogue.  Really_return is true if the
25032   function is not a sibcall.  */
25033void
25034arm_expand_epilogue (bool really_return)
25035{
25036  unsigned long func_type;
25037  unsigned long saved_regs_mask;
25038  int num_regs = 0;
25039  int i;
25040  int amount;
25041  arm_stack_offsets *offsets;
25042
25043  func_type = arm_current_func_type ();
25044
25045  /* Naked functions don't have epilogue.  Hence, generate return pattern, and
25046     let output_return_instruction take care of instruction emission if any.  */
25047  if (IS_NAKED (func_type)
25048      || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
25049    {
25050      if (really_return)
25051        emit_jump_insn (simple_return_rtx);
25052      return;
25053    }
25054
25055  /* If we are throwing an exception, then we really must be doing a
25056     return, so we can't tail-call.  */
25057  gcc_assert (!crtl->calls_eh_return || really_return);
25058
25059  if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
25060    {
25061      arm_expand_epilogue_apcs_frame (really_return);
25062      return;
25063    }
25064
25065  /* Get frame offsets for ARM.  */
25066  offsets = arm_get_frame_offsets ();
25067  saved_regs_mask = offsets->saved_regs_mask;
25068  num_regs = bit_count (saved_regs_mask);
25069
25070  if (frame_pointer_needed)
25071    {
25072      rtx_insn *insn;
25073      /* Restore stack pointer if necessary.  */
25074      if (TARGET_ARM)
25075        {
25076          /* In ARM mode, frame pointer points to first saved register.
25077             Restore stack pointer to last saved register.  */
25078          amount = offsets->frame - offsets->saved_regs;
25079
25080          /* Force out any pending memory operations that reference stacked data
25081             before stack de-allocation occurs.  */
25082          emit_insn (gen_blockage ());
25083	  insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25084			    hard_frame_pointer_rtx,
25085			    GEN_INT (amount)));
25086	  arm_add_cfa_adjust_cfa_note (insn, amount,
25087				       stack_pointer_rtx,
25088				       hard_frame_pointer_rtx);
25089
25090          /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25091             deleted.  */
25092          emit_insn (gen_force_register_use (stack_pointer_rtx));
25093        }
25094      else
25095        {
25096          /* In Thumb-2 mode, the frame pointer points to the last saved
25097             register.  */
25098	  amount = offsets->locals_base - offsets->saved_regs;
25099	  if (amount)
25100	    {
25101	      insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
25102				hard_frame_pointer_rtx,
25103				GEN_INT (amount)));
25104	      arm_add_cfa_adjust_cfa_note (insn, amount,
25105					   hard_frame_pointer_rtx,
25106					   hard_frame_pointer_rtx);
25107	    }
25108
25109          /* Force out any pending memory operations that reference stacked data
25110             before stack de-allocation occurs.  */
25111          emit_insn (gen_blockage ());
25112	  insn = emit_insn (gen_movsi (stack_pointer_rtx,
25113				       hard_frame_pointer_rtx));
25114	  arm_add_cfa_adjust_cfa_note (insn, 0,
25115				       stack_pointer_rtx,
25116				       hard_frame_pointer_rtx);
25117          /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25118             deleted.  */
25119          emit_insn (gen_force_register_use (stack_pointer_rtx));
25120        }
25121    }
25122  else
25123    {
25124      /* Pop off outgoing args and local frame to adjust stack pointer to
25125         last saved register.  */
25126      amount = offsets->outgoing_args - offsets->saved_regs;
25127      if (amount)
25128        {
25129	  rtx_insn *tmp;
25130          /* Force out any pending memory operations that reference stacked data
25131             before stack de-allocation occurs.  */
25132          emit_insn (gen_blockage ());
25133	  tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
25134				       stack_pointer_rtx,
25135				       GEN_INT (amount)));
25136	  arm_add_cfa_adjust_cfa_note (tmp, amount,
25137				       stack_pointer_rtx, stack_pointer_rtx);
25138          /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
25139             not deleted.  */
25140          emit_insn (gen_force_register_use (stack_pointer_rtx));
25141        }
25142    }
25143
25144  if (TARGET_HARD_FLOAT && TARGET_VFP)
25145    {
25146      /* Generate VFP register multi-pop.  */
25147      int end_reg = LAST_VFP_REGNUM + 1;
25148
25149      /* Scan the registers in reverse order.  We need to match
25150         any groupings made in the prologue and generate matching
25151         vldm operations.  The need to match groups is because,
25152         unlike pop, vldm can only do consecutive regs.  */
25153      for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
25154        /* Look for a case where a reg does not need restoring.  */
25155        if ((!df_regs_ever_live_p (i) || call_used_regs[i])
25156            && (!df_regs_ever_live_p (i + 1)
25157                || call_used_regs[i + 1]))
25158          {
25159            /* Restore the regs discovered so far (from reg+2 to
25160               end_reg).  */
25161            if (end_reg > i + 2)
25162              arm_emit_vfp_multi_reg_pop (i + 2,
25163                                          (end_reg - (i + 2)) / 2,
25164                                          stack_pointer_rtx);
25165            end_reg = i;
25166          }
25167
25168      /* Restore the remaining regs that we have discovered (or possibly
25169         even all of them, if the conditional in the for loop never
25170         fired).  */
25171      if (end_reg > i + 2)
25172        arm_emit_vfp_multi_reg_pop (i + 2,
25173                                    (end_reg - (i + 2)) / 2,
25174                                    stack_pointer_rtx);
25175    }
25176
25177  if (TARGET_IWMMXT)
25178    for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
25179      if (df_regs_ever_live_p (i) && !call_used_regs[i])
25180        {
25181          rtx_insn *insn;
25182          rtx addr = gen_rtx_MEM (V2SImode,
25183                                  gen_rtx_POST_INC (SImode,
25184                                                    stack_pointer_rtx));
25185          set_mem_alias_set (addr, get_frame_alias_set ());
25186          insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
25187          REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25188                                             gen_rtx_REG (V2SImode, i),
25189                                             NULL_RTX);
25190	  arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25191				       stack_pointer_rtx, stack_pointer_rtx);
25192        }
25193
25194  if (saved_regs_mask)
25195    {
25196      rtx insn;
25197      bool return_in_pc = false;
25198
25199      if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
25200          && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
25201          && !IS_STACKALIGN (func_type)
25202          && really_return
25203          && crtl->args.pretend_args_size == 0
25204          && saved_regs_mask & (1 << LR_REGNUM)
25205          && !crtl->calls_eh_return)
25206        {
25207          saved_regs_mask &= ~(1 << LR_REGNUM);
25208          saved_regs_mask |= (1 << PC_REGNUM);
25209          return_in_pc = true;
25210        }
25211
25212      if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
25213        {
25214          for (i = 0; i <= LAST_ARM_REGNUM; i++)
25215            if (saved_regs_mask & (1 << i))
25216              {
25217                rtx addr = gen_rtx_MEM (SImode,
25218                                        gen_rtx_POST_INC (SImode,
25219                                                          stack_pointer_rtx));
25220                set_mem_alias_set (addr, get_frame_alias_set ());
25221
25222                if (i == PC_REGNUM)
25223                  {
25224                    insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
25225                    XVECEXP (insn, 0, 0) = ret_rtx;
25226                    XVECEXP (insn, 0, 1) = gen_rtx_SET (SImode,
25227                                                        gen_rtx_REG (SImode, i),
25228                                                        addr);
25229                    RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
25230                    insn = emit_jump_insn (insn);
25231                  }
25232                else
25233                  {
25234                    insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
25235                                                 addr));
25236                    REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25237                                                       gen_rtx_REG (SImode, i),
25238                                                       NULL_RTX);
25239		    arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25240						 stack_pointer_rtx,
25241						 stack_pointer_rtx);
25242                  }
25243              }
25244        }
25245      else
25246        {
25247          if (TARGET_LDRD
25248	      && current_tune->prefer_ldrd_strd
25249              && !optimize_function_for_size_p (cfun))
25250            {
25251              if (TARGET_THUMB2)
25252                thumb2_emit_ldrd_pop (saved_regs_mask);
25253              else if (TARGET_ARM && !IS_INTERRUPT (func_type))
25254                arm_emit_ldrd_pop (saved_regs_mask);
25255              else
25256                arm_emit_multi_reg_pop (saved_regs_mask);
25257            }
25258          else
25259            arm_emit_multi_reg_pop (saved_regs_mask);
25260        }
25261
25262      if (return_in_pc)
25263        return;
25264    }
25265
25266  if (crtl->args.pretend_args_size)
25267    {
25268      int i, j;
25269      rtx dwarf = NULL_RTX;
25270      rtx_insn *tmp =
25271	emit_insn (gen_addsi3 (stack_pointer_rtx,
25272			       stack_pointer_rtx,
25273			       GEN_INT (crtl->args.pretend_args_size)));
25274
25275      RTX_FRAME_RELATED_P (tmp) = 1;
25276
25277      if (cfun->machine->uses_anonymous_args)
25278	{
25279	  /* Restore pretend args.  Refer arm_expand_prologue on how to save
25280	     pretend_args in stack.  */
25281	  int num_regs = crtl->args.pretend_args_size / 4;
25282	  saved_regs_mask = (0xf0 >> num_regs) & 0xf;
25283	  for (j = 0, i = 0; j < num_regs; i++)
25284	    if (saved_regs_mask & (1 << i))
25285	      {
25286		rtx reg = gen_rtx_REG (SImode, i);
25287		dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
25288		j++;
25289	      }
25290	  REG_NOTES (tmp) = dwarf;
25291	}
25292      arm_add_cfa_adjust_cfa_note (tmp, crtl->args.pretend_args_size,
25293				   stack_pointer_rtx, stack_pointer_rtx);
25294    }
25295
25296  if (!really_return)
25297    return;
25298
25299  if (crtl->calls_eh_return)
25300    emit_insn (gen_addsi3 (stack_pointer_rtx,
25301                           stack_pointer_rtx,
25302                           gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
25303
25304  if (IS_STACKALIGN (func_type))
25305    /* Restore the original stack pointer.  Before prologue, the stack was
25306       realigned and the original stack pointer saved in r0.  For details,
25307       see comment in arm_expand_prologue.  */
25308    emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
25309
25310  emit_jump_insn (simple_return_rtx);
25311}
25312
25313/* Implementation of insn prologue_thumb1_interwork.  This is the first
25314   "instruction" of a function called in ARM mode.  Swap to thumb mode.  */
25315
25316const char *
25317thumb1_output_interwork (void)
25318{
25319  const char * name;
25320  FILE *f = asm_out_file;
25321
25322  gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
25323  gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
25324	      == SYMBOL_REF);
25325  name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
25326
25327  /* Generate code sequence to switch us into Thumb mode.  */
25328  /* The .code 32 directive has already been emitted by
25329     ASM_DECLARE_FUNCTION_NAME.  */
25330  asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
25331  asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
25332
25333  /* Generate a label, so that the debugger will notice the
25334     change in instruction sets.  This label is also used by
25335     the assembler to bypass the ARM code when this function
25336     is called from a Thumb encoded function elsewhere in the
25337     same file.  Hence the definition of STUB_NAME here must
25338     agree with the definition in gas/config/tc-arm.c.  */
25339
25340#define STUB_NAME ".real_start_of"
25341
25342  fprintf (f, "\t.code\t16\n");
25343#ifdef ARM_PE
25344  if (arm_dllexport_name_p (name))
25345    name = arm_strip_name_encoding (name);
25346#endif
25347  asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
25348  fprintf (f, "\t.thumb_func\n");
25349  asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
25350
25351  return "";
25352}
25353
25354/* Handle the case of a double word load into a low register from
25355   a computed memory address.  The computed address may involve a
25356   register which is overwritten by the load.  */
25357const char *
25358thumb_load_double_from_address (rtx *operands)
25359{
25360  rtx addr;
25361  rtx base;
25362  rtx offset;
25363  rtx arg1;
25364  rtx arg2;
25365
25366  gcc_assert (REG_P (operands[0]));
25367  gcc_assert (MEM_P (operands[1]));
25368
25369  /* Get the memory address.  */
25370  addr = XEXP (operands[1], 0);
25371
25372  /* Work out how the memory address is computed.  */
25373  switch (GET_CODE (addr))
25374    {
25375    case REG:
25376      operands[2] = adjust_address (operands[1], SImode, 4);
25377
25378      if (REGNO (operands[0]) == REGNO (addr))
25379	{
25380	  output_asm_insn ("ldr\t%H0, %2", operands);
25381	  output_asm_insn ("ldr\t%0, %1", operands);
25382	}
25383      else
25384	{
25385	  output_asm_insn ("ldr\t%0, %1", operands);
25386	  output_asm_insn ("ldr\t%H0, %2", operands);
25387	}
25388      break;
25389
25390    case CONST:
25391      /* Compute <address> + 4 for the high order load.  */
25392      operands[2] = adjust_address (operands[1], SImode, 4);
25393
25394      output_asm_insn ("ldr\t%0, %1", operands);
25395      output_asm_insn ("ldr\t%H0, %2", operands);
25396      break;
25397
25398    case PLUS:
25399      arg1   = XEXP (addr, 0);
25400      arg2   = XEXP (addr, 1);
25401
25402      if (CONSTANT_P (arg1))
25403	base = arg2, offset = arg1;
25404      else
25405	base = arg1, offset = arg2;
25406
25407      gcc_assert (REG_P (base));
25408
25409      /* Catch the case of <address> = <reg> + <reg> */
25410      if (REG_P (offset))
25411	{
25412	  int reg_offset = REGNO (offset);
25413	  int reg_base   = REGNO (base);
25414	  int reg_dest   = REGNO (operands[0]);
25415
25416	  /* Add the base and offset registers together into the
25417             higher destination register.  */
25418	  asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
25419		       reg_dest + 1, reg_base, reg_offset);
25420
25421	  /* Load the lower destination register from the address in
25422             the higher destination register.  */
25423	  asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
25424		       reg_dest, reg_dest + 1);
25425
25426	  /* Load the higher destination register from its own address
25427             plus 4.  */
25428	  asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
25429		       reg_dest + 1, reg_dest + 1);
25430	}
25431      else
25432	{
25433	  /* Compute <address> + 4 for the high order load.  */
25434	  operands[2] = adjust_address (operands[1], SImode, 4);
25435
25436	  /* If the computed address is held in the low order register
25437	     then load the high order register first, otherwise always
25438	     load the low order register first.  */
25439	  if (REGNO (operands[0]) == REGNO (base))
25440	    {
25441	      output_asm_insn ("ldr\t%H0, %2", operands);
25442	      output_asm_insn ("ldr\t%0, %1", operands);
25443	    }
25444	  else
25445	    {
25446	      output_asm_insn ("ldr\t%0, %1", operands);
25447	      output_asm_insn ("ldr\t%H0, %2", operands);
25448	    }
25449	}
25450      break;
25451
25452    case LABEL_REF:
25453      /* With no registers to worry about we can just load the value
25454         directly.  */
25455      operands[2] = adjust_address (operands[1], SImode, 4);
25456
25457      output_asm_insn ("ldr\t%H0, %2", operands);
25458      output_asm_insn ("ldr\t%0, %1", operands);
25459      break;
25460
25461    default:
25462      gcc_unreachable ();
25463    }
25464
25465  return "";
25466}
25467
25468const char *
25469thumb_output_move_mem_multiple (int n, rtx *operands)
25470{
25471  rtx tmp;
25472
25473  switch (n)
25474    {
25475    case 2:
25476      if (REGNO (operands[4]) > REGNO (operands[5]))
25477	{
25478	  tmp = operands[4];
25479	  operands[4] = operands[5];
25480	  operands[5] = tmp;
25481	}
25482      output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
25483      output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
25484      break;
25485
25486    case 3:
25487      if (REGNO (operands[4]) > REGNO (operands[5]))
25488        std::swap (operands[4], operands[5]);
25489      if (REGNO (operands[5]) > REGNO (operands[6]))
25490        std::swap (operands[5], operands[6]);
25491      if (REGNO (operands[4]) > REGNO (operands[5]))
25492        std::swap (operands[4], operands[5]);
25493
25494      output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
25495      output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
25496      break;
25497
25498    default:
25499      gcc_unreachable ();
25500    }
25501
25502  return "";
25503}
25504
25505/* Output a call-via instruction for thumb state.  */
25506const char *
25507thumb_call_via_reg (rtx reg)
25508{
25509  int regno = REGNO (reg);
25510  rtx *labelp;
25511
25512  gcc_assert (regno < LR_REGNUM);
25513
25514  /* If we are in the normal text section we can use a single instance
25515     per compilation unit.  If we are doing function sections, then we need
25516     an entry per section, since we can't rely on reachability.  */
25517  if (in_section == text_section)
25518    {
25519      thumb_call_reg_needed = 1;
25520
25521      if (thumb_call_via_label[regno] == NULL)
25522	thumb_call_via_label[regno] = gen_label_rtx ();
25523      labelp = thumb_call_via_label + regno;
25524    }
25525  else
25526    {
25527      if (cfun->machine->call_via[regno] == NULL)
25528	cfun->machine->call_via[regno] = gen_label_rtx ();
25529      labelp = cfun->machine->call_via + regno;
25530    }
25531
25532  output_asm_insn ("bl\t%a0", labelp);
25533  return "";
25534}
25535
25536/* Routines for generating rtl.  */
25537void
25538thumb_expand_movmemqi (rtx *operands)
25539{
25540  rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
25541  rtx in  = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
25542  HOST_WIDE_INT len = INTVAL (operands[2]);
25543  HOST_WIDE_INT offset = 0;
25544
25545  while (len >= 12)
25546    {
25547      emit_insn (gen_movmem12b (out, in, out, in));
25548      len -= 12;
25549    }
25550
25551  if (len >= 8)
25552    {
25553      emit_insn (gen_movmem8b (out, in, out, in));
25554      len -= 8;
25555    }
25556
25557  if (len >= 4)
25558    {
25559      rtx reg = gen_reg_rtx (SImode);
25560      emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
25561      emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
25562      len -= 4;
25563      offset += 4;
25564    }
25565
25566  if (len >= 2)
25567    {
25568      rtx reg = gen_reg_rtx (HImode);
25569      emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
25570					      plus_constant (Pmode, in,
25571							     offset))));
25572      emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
25573								offset)),
25574			    reg));
25575      len -= 2;
25576      offset += 2;
25577    }
25578
25579  if (len)
25580    {
25581      rtx reg = gen_reg_rtx (QImode);
25582      emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
25583					      plus_constant (Pmode, in,
25584							     offset))));
25585      emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
25586								offset)),
25587			    reg));
25588    }
25589}
25590
25591void
25592thumb_reload_out_hi (rtx *operands)
25593{
25594  emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
25595}
25596
25597/* Handle reading a half-word from memory during reload.  */
25598void
25599thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
25600{
25601  gcc_unreachable ();
25602}
25603
25604/* Return the length of a function name prefix
25605    that starts with the character 'c'.  */
25606static int
25607arm_get_strip_length (int c)
25608{
25609  switch (c)
25610    {
25611    ARM_NAME_ENCODING_LENGTHS
25612      default: return 0;
25613    }
25614}
25615
25616/* Return a pointer to a function's name with any
25617   and all prefix encodings stripped from it.  */
25618const char *
25619arm_strip_name_encoding (const char *name)
25620{
25621  int skip;
25622
25623  while ((skip = arm_get_strip_length (* name)))
25624    name += skip;
25625
25626  return name;
25627}
25628
25629/* If there is a '*' anywhere in the name's prefix, then
25630   emit the stripped name verbatim, otherwise prepend an
25631   underscore if leading underscores are being used.  */
25632void
25633arm_asm_output_labelref (FILE *stream, const char *name)
25634{
25635  int skip;
25636  int verbatim = 0;
25637
25638  while ((skip = arm_get_strip_length (* name)))
25639    {
25640      verbatim |= (*name == '*');
25641      name += skip;
25642    }
25643
25644  if (verbatim)
25645    fputs (name, stream);
25646  else
25647    asm_fprintf (stream, "%U%s", name);
25648}
25649
25650/* This function is used to emit an EABI tag and its associated value.
25651   We emit the numerical value of the tag in case the assembler does not
25652   support textual tags.  (Eg gas prior to 2.20).  If requested we include
25653   the tag name in a comment so that anyone reading the assembler output
25654   will know which tag is being set.
25655
25656   This function is not static because arm-c.c needs it too.  */
25657
25658void
25659arm_emit_eabi_attribute (const char *name, int num, int val)
25660{
25661  asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
25662  if (flag_verbose_asm || flag_debug_asm)
25663    asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
25664  asm_fprintf (asm_out_file, "\n");
25665}
25666
25667/* This function is used to print CPU tuning information as comment
25668   in assembler file.  Pointers are not printed for now.  */
25669
25670void
25671arm_print_tune_info (void)
25672{
25673  asm_fprintf (asm_out_file, "\t@.tune parameters\n");
25674  asm_fprintf (asm_out_file, "\t\t@constant_limit:\t%d\n",
25675	       current_tune->constant_limit);
25676  asm_fprintf (asm_out_file, "\t\t@max_insns_skipped:\t%d\n",
25677	       current_tune->max_insns_skipped);
25678  asm_fprintf (asm_out_file, "\t\t@num_prefetch_slots:\t%d\n",
25679	       current_tune->num_prefetch_slots);
25680  asm_fprintf (asm_out_file, "\t\t@l1_cache_size:\t%d\n",
25681	       current_tune->l1_cache_size);
25682  asm_fprintf (asm_out_file, "\t\t@l1_cache_line_size:\t%d\n",
25683	       current_tune->l1_cache_line_size);
25684  asm_fprintf (asm_out_file, "\t\t@prefer_constant_pool:\t%d\n",
25685	       (int) current_tune->prefer_constant_pool);
25686  asm_fprintf (asm_out_file, "\t\t@branch_cost:\t(s:speed, p:predictable)\n");
25687  asm_fprintf (asm_out_file, "\t\t\t\ts&p\tcost\n");
25688  asm_fprintf (asm_out_file, "\t\t\t\t00\t%d\n",
25689	       current_tune->branch_cost (false, false));
25690  asm_fprintf (asm_out_file, "\t\t\t\t01\t%d\n",
25691	       current_tune->branch_cost (false, true));
25692  asm_fprintf (asm_out_file, "\t\t\t\t10\t%d\n",
25693	       current_tune->branch_cost (true, false));
25694  asm_fprintf (asm_out_file, "\t\t\t\t11\t%d\n",
25695	       current_tune->branch_cost (true, true));
25696  asm_fprintf (asm_out_file, "\t\t@prefer_ldrd_strd:\t%d\n",
25697	       (int) current_tune->prefer_ldrd_strd);
25698  asm_fprintf (asm_out_file, "\t\t@logical_op_non_short_circuit:\t[%d,%d]\n",
25699	       (int) current_tune->logical_op_non_short_circuit[0],
25700	       (int) current_tune->logical_op_non_short_circuit[1]);
25701  asm_fprintf (asm_out_file, "\t\t@prefer_neon_for_64bits:\t%d\n",
25702	       (int) current_tune->prefer_neon_for_64bits);
25703  asm_fprintf (asm_out_file,
25704	       "\t\t@disparage_flag_setting_t16_encodings:\t%d\n",
25705	       (int) current_tune->disparage_flag_setting_t16_encodings);
25706  asm_fprintf (asm_out_file,
25707	       "\t\t@disparage_partial_flag_setting_t16_encodings:\t%d\n",
25708	       (int) current_tune
25709	               ->disparage_partial_flag_setting_t16_encodings);
25710  asm_fprintf (asm_out_file, "\t\t@string_ops_prefer_neon:\t%d\n",
25711	       (int) current_tune->string_ops_prefer_neon);
25712  asm_fprintf (asm_out_file, "\t\t@max_insns_inline_memset:\t%d\n",
25713	       current_tune->max_insns_inline_memset);
25714  asm_fprintf (asm_out_file, "\t\t@fuseable_ops:\t%u\n",
25715	       current_tune->fuseable_ops);
25716  asm_fprintf (asm_out_file, "\t\t@sched_autopref:\t%d\n",
25717	       (int) current_tune->sched_autopref);
25718}
25719
25720static void
25721arm_file_start (void)
25722{
25723  int val;
25724
25725  if (TARGET_UNIFIED_ASM)
25726    asm_fprintf (asm_out_file, "\t.syntax unified\n");
25727
25728  if (TARGET_BPABI)
25729    {
25730      const char *fpu_name;
25731      if (arm_selected_arch)
25732        {
25733	  /* armv7ve doesn't support any extensions.  */
25734	  if (strcmp (arm_selected_arch->name, "armv7ve") == 0)
25735	    {
25736	      /* Keep backward compatability for assemblers
25737		 which don't support armv7ve.  */
25738	      asm_fprintf (asm_out_file, "\t.arch armv7-a\n");
25739	      asm_fprintf (asm_out_file, "\t.arch_extension virt\n");
25740	      asm_fprintf (asm_out_file, "\t.arch_extension idiv\n");
25741	      asm_fprintf (asm_out_file, "\t.arch_extension sec\n");
25742	      asm_fprintf (asm_out_file, "\t.arch_extension mp\n");
25743	    }
25744	  else
25745	    {
25746	      const char* pos = strchr (arm_selected_arch->name, '+');
25747	      if (pos)
25748		{
25749		  char buf[15];
25750		  gcc_assert (strlen (arm_selected_arch->name)
25751			      <= sizeof (buf) / sizeof (*pos));
25752		  strncpy (buf, arm_selected_arch->name,
25753				(pos - arm_selected_arch->name) * sizeof (*pos));
25754		  buf[pos - arm_selected_arch->name] = '\0';
25755		  asm_fprintf (asm_out_file, "\t.arch %s\n", buf);
25756		  asm_fprintf (asm_out_file, "\t.arch_extension %s\n", pos + 1);
25757		}
25758	      else
25759		asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name);
25760	    }
25761        }
25762      else if (strncmp (arm_selected_cpu->name, "generic", 7) == 0)
25763	asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_cpu->name + 8);
25764      else
25765	{
25766	  const char* truncated_name
25767	    = arm_rewrite_selected_cpu (arm_selected_cpu->name);
25768	  asm_fprintf (asm_out_file, "\t.cpu %s\n", truncated_name);
25769	}
25770
25771      if (print_tune_info)
25772	arm_print_tune_info ();
25773
25774      if (TARGET_SOFT_FLOAT)
25775	{
25776	  fpu_name = "softvfp";
25777	}
25778      else
25779	{
25780	  fpu_name = arm_fpu_desc->name;
25781	  if (arm_fpu_desc->model == ARM_FP_MODEL_VFP)
25782	    {
25783	      if (TARGET_HARD_FLOAT && TARGET_VFP_SINGLE)
25784		arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
25785
25786	      if (TARGET_HARD_FLOAT_ABI)
25787		arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
25788	    }
25789	}
25790      asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
25791
25792      /* Some of these attributes only apply when the corresponding features
25793         are used.  However we don't have any easy way of figuring this out.
25794	 Conservatively record the setting that would have been used.  */
25795
25796      if (flag_rounding_math)
25797	arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
25798
25799      if (!flag_unsafe_math_optimizations)
25800	{
25801	  arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
25802	  arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
25803	}
25804      if (flag_signaling_nans)
25805	arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
25806
25807      arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
25808			   flag_finite_math_only ? 1 : 3);
25809
25810      arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
25811      arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
25812      arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
25813			       flag_short_enums ? 1 : 2);
25814
25815      /* Tag_ABI_optimization_goals.  */
25816      if (optimize_size)
25817	val = 4;
25818      else if (optimize >= 2)
25819	val = 2;
25820      else if (optimize)
25821	val = 1;
25822      else
25823	val = 6;
25824      arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
25825
25826      arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
25827			       unaligned_access);
25828
25829      if (arm_fp16_format)
25830	arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
25831			     (int) arm_fp16_format);
25832
25833      if (arm_lang_output_object_attributes_hook)
25834	arm_lang_output_object_attributes_hook();
25835    }
25836
25837  default_file_start ();
25838}
25839
25840static void
25841arm_file_end (void)
25842{
25843  int regno;
25844
25845  if (NEED_INDICATE_EXEC_STACK)
25846    /* Add .note.GNU-stack.  */
25847    file_end_indicate_exec_stack ();
25848
25849  if (! thumb_call_reg_needed)
25850    return;
25851
25852  switch_to_section (text_section);
25853  asm_fprintf (asm_out_file, "\t.code 16\n");
25854  ASM_OUTPUT_ALIGN (asm_out_file, 1);
25855
25856  for (regno = 0; regno < LR_REGNUM; regno++)
25857    {
25858      rtx label = thumb_call_via_label[regno];
25859
25860      if (label != 0)
25861	{
25862	  targetm.asm_out.internal_label (asm_out_file, "L",
25863					  CODE_LABEL_NUMBER (label));
25864	  asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
25865	}
25866    }
25867}
25868
25869#ifndef ARM_PE
25870/* Symbols in the text segment can be accessed without indirecting via the
25871   constant pool; it may take an extra binary operation, but this is still
25872   faster than indirecting via memory.  Don't do this when not optimizing,
25873   since we won't be calculating al of the offsets necessary to do this
25874   simplification.  */
25875
25876static void
25877arm_encode_section_info (tree decl, rtx rtl, int first)
25878{
25879  if (optimize > 0 && TREE_CONSTANT (decl))
25880    SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
25881
25882  default_encode_section_info (decl, rtl, first);
25883}
25884#endif /* !ARM_PE */
25885
25886static void
25887arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
25888{
25889  if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
25890      && !strcmp (prefix, "L"))
25891    {
25892      arm_ccfsm_state = 0;
25893      arm_target_insn = NULL;
25894    }
25895  default_internal_label (stream, prefix, labelno);
25896}
25897
25898/* Output code to add DELTA to the first argument, and then jump
25899   to FUNCTION.  Used for C++ multiple inheritance.  */
25900static void
25901arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
25902		     HOST_WIDE_INT delta,
25903		     HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
25904		     tree function)
25905{
25906  static int thunk_label = 0;
25907  char label[256];
25908  char labelpc[256];
25909  int mi_delta = delta;
25910  const char *const mi_op = mi_delta < 0 ? "sub" : "add";
25911  int shift = 0;
25912  int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
25913                    ? 1 : 0);
25914  if (mi_delta < 0)
25915    mi_delta = - mi_delta;
25916
25917  final_start_function (emit_barrier (), file, 1);
25918
25919  if (TARGET_THUMB1)
25920    {
25921      int labelno = thunk_label++;
25922      ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
25923      /* Thunks are entered in arm mode when avaiable.  */
25924      if (TARGET_THUMB1_ONLY)
25925	{
25926	  /* push r3 so we can use it as a temporary.  */
25927	  /* TODO: Omit this save if r3 is not used.  */
25928	  fputs ("\tpush {r3}\n", file);
25929	  fputs ("\tldr\tr3, ", file);
25930	}
25931      else
25932	{
25933	  fputs ("\tldr\tr12, ", file);
25934	}
25935      assemble_name (file, label);
25936      fputc ('\n', file);
25937      if (flag_pic)
25938	{
25939	  /* If we are generating PIC, the ldr instruction below loads
25940	     "(target - 7) - .LTHUNKPCn" into r12.  The pc reads as
25941	     the address of the add + 8, so we have:
25942
25943	     r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
25944	         = target + 1.
25945
25946	     Note that we have "+ 1" because some versions of GNU ld
25947	     don't set the low bit of the result for R_ARM_REL32
25948	     relocations against thumb function symbols.
25949	     On ARMv6M this is +4, not +8.  */
25950	  ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
25951	  assemble_name (file, labelpc);
25952	  fputs (":\n", file);
25953	  if (TARGET_THUMB1_ONLY)
25954	    {
25955	      /* This is 2 insns after the start of the thunk, so we know it
25956	         is 4-byte aligned.  */
25957	      fputs ("\tadd\tr3, pc, r3\n", file);
25958	      fputs ("\tmov r12, r3\n", file);
25959	    }
25960	  else
25961	    fputs ("\tadd\tr12, pc, r12\n", file);
25962	}
25963      else if (TARGET_THUMB1_ONLY)
25964	fputs ("\tmov r12, r3\n", file);
25965    }
25966  if (TARGET_THUMB1_ONLY)
25967    {
25968      if (mi_delta > 255)
25969	{
25970	  fputs ("\tldr\tr3, ", file);
25971	  assemble_name (file, label);
25972	  fputs ("+4\n", file);
25973	  asm_fprintf (file, "\t%ss\t%r, %r, r3\n",
25974		       mi_op, this_regno, this_regno);
25975	}
25976      else if (mi_delta != 0)
25977	{
25978	  /* Thumb1 unified syntax requires s suffix in instruction name when
25979	     one of the operands is immediate.  */
25980	  asm_fprintf (file, "\t%ss\t%r, %r, #%d\n",
25981		       mi_op, this_regno, this_regno,
25982		       mi_delta);
25983	}
25984    }
25985  else
25986    {
25987      /* TODO: Use movw/movt for large constants when available.  */
25988      while (mi_delta != 0)
25989	{
25990	  if ((mi_delta & (3 << shift)) == 0)
25991	    shift += 2;
25992	  else
25993	    {
25994	      asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
25995			   mi_op, this_regno, this_regno,
25996			   mi_delta & (0xff << shift));
25997	      mi_delta &= ~(0xff << shift);
25998	      shift += 8;
25999	    }
26000	}
26001    }
26002  if (TARGET_THUMB1)
26003    {
26004      if (TARGET_THUMB1_ONLY)
26005	fputs ("\tpop\t{r3}\n", file);
26006
26007      fprintf (file, "\tbx\tr12\n");
26008      ASM_OUTPUT_ALIGN (file, 2);
26009      assemble_name (file, label);
26010      fputs (":\n", file);
26011      if (flag_pic)
26012	{
26013	  /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn".  */
26014	  rtx tem = XEXP (DECL_RTL (function), 0);
26015	  /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
26016	     pipeline offset is four rather than eight.  Adjust the offset
26017	     accordingly.  */
26018	  tem = plus_constant (GET_MODE (tem), tem,
26019			       TARGET_THUMB1_ONLY ? -3 : -7);
26020	  tem = gen_rtx_MINUS (GET_MODE (tem),
26021			       tem,
26022			       gen_rtx_SYMBOL_REF (Pmode,
26023						   ggc_strdup (labelpc)));
26024	  assemble_integer (tem, 4, BITS_PER_WORD, 1);
26025	}
26026      else
26027	/* Output ".word .LTHUNKn".  */
26028	assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
26029
26030      if (TARGET_THUMB1_ONLY && mi_delta > 255)
26031	assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
26032    }
26033  else
26034    {
26035      fputs ("\tb\t", file);
26036      assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
26037      if (NEED_PLT_RELOC)
26038        fputs ("(PLT)", file);
26039      fputc ('\n', file);
26040    }
26041
26042  final_end_function ();
26043}
26044
26045int
26046arm_emit_vector_const (FILE *file, rtx x)
26047{
26048  int i;
26049  const char * pattern;
26050
26051  gcc_assert (GET_CODE (x) == CONST_VECTOR);
26052
26053  switch (GET_MODE (x))
26054    {
26055    case V2SImode: pattern = "%08x"; break;
26056    case V4HImode: pattern = "%04x"; break;
26057    case V8QImode: pattern = "%02x"; break;
26058    default:       gcc_unreachable ();
26059    }
26060
26061  fprintf (file, "0x");
26062  for (i = CONST_VECTOR_NUNITS (x); i--;)
26063    {
26064      rtx element;
26065
26066      element = CONST_VECTOR_ELT (x, i);
26067      fprintf (file, pattern, INTVAL (element));
26068    }
26069
26070  return 1;
26071}
26072
26073/* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
26074   HFmode constant pool entries are actually loaded with ldr.  */
26075void
26076arm_emit_fp16_const (rtx c)
26077{
26078  REAL_VALUE_TYPE r;
26079  long bits;
26080
26081  REAL_VALUE_FROM_CONST_DOUBLE (r, c);
26082  bits = real_to_target (NULL, &r, HFmode);
26083  if (WORDS_BIG_ENDIAN)
26084    assemble_zeros (2);
26085  assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
26086  if (!WORDS_BIG_ENDIAN)
26087    assemble_zeros (2);
26088}
26089
26090const char *
26091arm_output_load_gr (rtx *operands)
26092{
26093  rtx reg;
26094  rtx offset;
26095  rtx wcgr;
26096  rtx sum;
26097
26098  if (!MEM_P (operands [1])
26099      || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
26100      || !REG_P (reg = XEXP (sum, 0))
26101      || !CONST_INT_P (offset = XEXP (sum, 1))
26102      || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
26103    return "wldrw%?\t%0, %1";
26104
26105  /* Fix up an out-of-range load of a GR register.  */
26106  output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
26107  wcgr = operands[0];
26108  operands[0] = reg;
26109  output_asm_insn ("ldr%?\t%0, %1", operands);
26110
26111  operands[0] = wcgr;
26112  operands[1] = reg;
26113  output_asm_insn ("tmcr%?\t%0, %1", operands);
26114  output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
26115
26116  return "";
26117}
26118
26119/* Worker function for TARGET_SETUP_INCOMING_VARARGS.
26120
26121   On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
26122   named arg and all anonymous args onto the stack.
26123   XXX I know the prologue shouldn't be pushing registers, but it is faster
26124   that way.  */
26125
26126static void
26127arm_setup_incoming_varargs (cumulative_args_t pcum_v,
26128			    machine_mode mode,
26129			    tree type,
26130			    int *pretend_size,
26131			    int second_time ATTRIBUTE_UNUSED)
26132{
26133  CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
26134  int nregs;
26135
26136  cfun->machine->uses_anonymous_args = 1;
26137  if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
26138    {
26139      nregs = pcum->aapcs_ncrn;
26140      if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
26141	nregs++;
26142    }
26143  else
26144    nregs = pcum->nregs;
26145
26146  if (nregs < NUM_ARG_REGS)
26147    *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
26148}
26149
26150/* We can't rely on the caller doing the proper promotion when
26151   using APCS or ATPCS.  */
26152
26153static bool
26154arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
26155{
26156    return !TARGET_AAPCS_BASED;
26157}
26158
26159static machine_mode
26160arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
26161                           machine_mode mode,
26162                           int *punsignedp ATTRIBUTE_UNUSED,
26163                           const_tree fntype ATTRIBUTE_UNUSED,
26164                           int for_return ATTRIBUTE_UNUSED)
26165{
26166  if (GET_MODE_CLASS (mode) == MODE_INT
26167      && GET_MODE_SIZE (mode) < 4)
26168    return SImode;
26169
26170  return mode;
26171}
26172
26173/* AAPCS based ABIs use short enums by default.  */
26174
26175static bool
26176arm_default_short_enums (void)
26177{
26178  return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
26179}
26180
26181
26182/* AAPCS requires that anonymous bitfields affect structure alignment.  */
26183
26184static bool
26185arm_align_anon_bitfield (void)
26186{
26187  return TARGET_AAPCS_BASED;
26188}
26189
26190
26191/* The generic C++ ABI says 64-bit (long long).  The EABI says 32-bit.  */
26192
26193static tree
26194arm_cxx_guard_type (void)
26195{
26196  return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
26197}
26198
26199
26200/* The EABI says test the least significant bit of a guard variable.  */
26201
26202static bool
26203arm_cxx_guard_mask_bit (void)
26204{
26205  return TARGET_AAPCS_BASED;
26206}
26207
26208
26209/* The EABI specifies that all array cookies are 8 bytes long.  */
26210
26211static tree
26212arm_get_cookie_size (tree type)
26213{
26214  tree size;
26215
26216  if (!TARGET_AAPCS_BASED)
26217    return default_cxx_get_cookie_size (type);
26218
26219  size = build_int_cst (sizetype, 8);
26220  return size;
26221}
26222
26223
26224/* The EABI says that array cookies should also contain the element size.  */
26225
26226static bool
26227arm_cookie_has_size (void)
26228{
26229  return TARGET_AAPCS_BASED;
26230}
26231
26232
26233/* The EABI says constructors and destructors should return a pointer to
26234   the object constructed/destroyed.  */
26235
26236static bool
26237arm_cxx_cdtor_returns_this (void)
26238{
26239  return TARGET_AAPCS_BASED;
26240}
26241
26242/* The EABI says that an inline function may never be the key
26243   method.  */
26244
26245static bool
26246arm_cxx_key_method_may_be_inline (void)
26247{
26248  return !TARGET_AAPCS_BASED;
26249}
26250
26251static void
26252arm_cxx_determine_class_data_visibility (tree decl)
26253{
26254  if (!TARGET_AAPCS_BASED
26255      || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
26256    return;
26257
26258  /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
26259     is exported.  However, on systems without dynamic vague linkage,
26260     \S 3.2.5.6 says that COMDAT class data has hidden linkage.  */
26261  if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
26262    DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
26263  else
26264    DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
26265  DECL_VISIBILITY_SPECIFIED (decl) = 1;
26266}
26267
26268static bool
26269arm_cxx_class_data_always_comdat (void)
26270{
26271  /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
26272     vague linkage if the class has no key function.  */
26273  return !TARGET_AAPCS_BASED;
26274}
26275
26276
26277/* The EABI says __aeabi_atexit should be used to register static
26278   destructors.  */
26279
26280static bool
26281arm_cxx_use_aeabi_atexit (void)
26282{
26283  return TARGET_AAPCS_BASED;
26284}
26285
26286
26287void
26288arm_set_return_address (rtx source, rtx scratch)
26289{
26290  arm_stack_offsets *offsets;
26291  HOST_WIDE_INT delta;
26292  rtx addr;
26293  unsigned long saved_regs;
26294
26295  offsets = arm_get_frame_offsets ();
26296  saved_regs = offsets->saved_regs_mask;
26297
26298  if ((saved_regs & (1 << LR_REGNUM)) == 0)
26299    emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
26300  else
26301    {
26302      if (frame_pointer_needed)
26303	addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
26304      else
26305	{
26306	  /* LR will be the first saved register.  */
26307	  delta = offsets->outgoing_args - (offsets->frame + 4);
26308
26309
26310	  if (delta >= 4096)
26311	    {
26312	      emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
26313				     GEN_INT (delta & ~4095)));
26314	      addr = scratch;
26315	      delta &= 4095;
26316	    }
26317	  else
26318	    addr = stack_pointer_rtx;
26319
26320	  addr = plus_constant (Pmode, addr, delta);
26321	}
26322      /* The store needs to be marked as frame related in order to prevent
26323	 DSE from deleting it as dead if it is based on fp.  */
26324      rtx insn = emit_move_insn (gen_frame_mem (Pmode, addr), source);
26325      RTX_FRAME_RELATED_P (insn) = 1;
26326      add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (Pmode, LR_REGNUM));
26327    }
26328}
26329
26330
26331void
26332thumb_set_return_address (rtx source, rtx scratch)
26333{
26334  arm_stack_offsets *offsets;
26335  HOST_WIDE_INT delta;
26336  HOST_WIDE_INT limit;
26337  int reg;
26338  rtx addr;
26339  unsigned long mask;
26340
26341  emit_use (source);
26342
26343  offsets = arm_get_frame_offsets ();
26344  mask = offsets->saved_regs_mask;
26345  if (mask & (1 << LR_REGNUM))
26346    {
26347      limit = 1024;
26348      /* Find the saved regs.  */
26349      if (frame_pointer_needed)
26350	{
26351	  delta = offsets->soft_frame - offsets->saved_args;
26352	  reg = THUMB_HARD_FRAME_POINTER_REGNUM;
26353	  if (TARGET_THUMB1)
26354	    limit = 128;
26355	}
26356      else
26357	{
26358	  delta = offsets->outgoing_args - offsets->saved_args;
26359	  reg = SP_REGNUM;
26360	}
26361      /* Allow for the stack frame.  */
26362      if (TARGET_THUMB1 && TARGET_BACKTRACE)
26363	delta -= 16;
26364      /* The link register is always the first saved register.  */
26365      delta -= 4;
26366
26367      /* Construct the address.  */
26368      addr = gen_rtx_REG (SImode, reg);
26369      if (delta > limit)
26370	{
26371	  emit_insn (gen_movsi (scratch, GEN_INT (delta)));
26372	  emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
26373	  addr = scratch;
26374	}
26375      else
26376	addr = plus_constant (Pmode, addr, delta);
26377
26378      /* The store needs to be marked as frame related in order to prevent
26379	 DSE from deleting it as dead if it is based on fp.  */
26380      rtx insn = emit_move_insn (gen_frame_mem (Pmode, addr), source);
26381      RTX_FRAME_RELATED_P (insn) = 1;
26382      add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (Pmode, LR_REGNUM));
26383    }
26384  else
26385    emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
26386}
26387
26388/* Implements target hook vector_mode_supported_p.  */
26389bool
26390arm_vector_mode_supported_p (machine_mode mode)
26391{
26392  /* Neon also supports V2SImode, etc. listed in the clause below.  */
26393  if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
26394      || mode == V4HFmode || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
26395    return true;
26396
26397  if ((TARGET_NEON || TARGET_IWMMXT)
26398      && ((mode == V2SImode)
26399	  || (mode == V4HImode)
26400	  || (mode == V8QImode)))
26401    return true;
26402
26403  if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
26404      || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
26405      || mode == V2HAmode))
26406    return true;
26407
26408  return false;
26409}
26410
26411/* Implements target hook array_mode_supported_p.  */
26412
26413static bool
26414arm_array_mode_supported_p (machine_mode mode,
26415			    unsigned HOST_WIDE_INT nelems)
26416{
26417  if (TARGET_NEON
26418      && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
26419      && (nelems >= 2 && nelems <= 4))
26420    return true;
26421
26422  return false;
26423}
26424
26425/* Use the option -mvectorize-with-neon-double to override the use of quardword
26426   registers when autovectorizing for Neon, at least until multiple vector
26427   widths are supported properly by the middle-end.  */
26428
26429static machine_mode
26430arm_preferred_simd_mode (machine_mode mode)
26431{
26432  if (TARGET_NEON)
26433    switch (mode)
26434      {
26435      case SFmode:
26436	return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
26437      case SImode:
26438	return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
26439      case HImode:
26440	return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
26441      case QImode:
26442	return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
26443      case DImode:
26444	if (!TARGET_NEON_VECTORIZE_DOUBLE)
26445	  return V2DImode;
26446	break;
26447
26448      default:;
26449      }
26450
26451  if (TARGET_REALLY_IWMMXT)
26452    switch (mode)
26453      {
26454      case SImode:
26455	return V2SImode;
26456      case HImode:
26457	return V4HImode;
26458      case QImode:
26459	return V8QImode;
26460
26461      default:;
26462      }
26463
26464  return word_mode;
26465}
26466
26467/* Implement TARGET_CLASS_LIKELY_SPILLED_P.
26468
26469   We need to define this for LO_REGS on Thumb-1.  Otherwise we can end up
26470   using r0-r4 for function arguments, r7 for the stack frame and don't have
26471   enough left over to do doubleword arithmetic.  For Thumb-2 all the
26472   potentially problematic instructions accept high registers so this is not
26473   necessary.  Care needs to be taken to avoid adding new Thumb-2 patterns
26474   that require many low registers.  */
26475static bool
26476arm_class_likely_spilled_p (reg_class_t rclass)
26477{
26478  if ((TARGET_THUMB1 && rclass == LO_REGS)
26479      || rclass  == CC_REG)
26480    return true;
26481
26482  return false;
26483}
26484
26485/* Implements target hook small_register_classes_for_mode_p.  */
26486bool
26487arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
26488{
26489  return TARGET_THUMB1;
26490}
26491
26492/* Implement TARGET_SHIFT_TRUNCATION_MASK.  SImode shifts use normal
26493   ARM insns and therefore guarantee that the shift count is modulo 256.
26494   DImode shifts (those implemented by lib1funcs.S or by optabs.c)
26495   guarantee no particular behavior for out-of-range counts.  */
26496
26497static unsigned HOST_WIDE_INT
26498arm_shift_truncation_mask (machine_mode mode)
26499{
26500  return mode == SImode ? 255 : 0;
26501}
26502
26503
26504/* Map internal gcc register numbers to DWARF2 register numbers.  */
26505
26506unsigned int
26507arm_dbx_register_number (unsigned int regno)
26508{
26509  if (regno < 16)
26510    return regno;
26511
26512  if (IS_VFP_REGNUM (regno))
26513    {
26514      /* See comment in arm_dwarf_register_span.  */
26515      if (VFP_REGNO_OK_FOR_SINGLE (regno))
26516	return 64 + regno - FIRST_VFP_REGNUM;
26517      else
26518	return 256 + (regno - FIRST_VFP_REGNUM) / 2;
26519    }
26520
26521  if (IS_IWMMXT_GR_REGNUM (regno))
26522    return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
26523
26524  if (IS_IWMMXT_REGNUM (regno))
26525    return 112 + regno - FIRST_IWMMXT_REGNUM;
26526
26527  gcc_unreachable ();
26528}
26529
26530/* Dwarf models VFPv3 registers as 32 64-bit registers.
26531   GCC models tham as 64 32-bit registers, so we need to describe this to
26532   the DWARF generation code.  Other registers can use the default.  */
26533static rtx
26534arm_dwarf_register_span (rtx rtl)
26535{
26536  machine_mode mode;
26537  unsigned regno;
26538  rtx parts[16];
26539  int nregs;
26540  int i;
26541
26542  regno = REGNO (rtl);
26543  if (!IS_VFP_REGNUM (regno))
26544    return NULL_RTX;
26545
26546  /* XXX FIXME: The EABI defines two VFP register ranges:
26547	64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
26548	256-287: D0-D31
26549     The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
26550     corresponding D register.  Until GDB supports this, we shall use the
26551     legacy encodings.  We also use these encodings for D0-D15 for
26552     compatibility with older debuggers.  */
26553  mode = GET_MODE (rtl);
26554  if (GET_MODE_SIZE (mode) < 8)
26555    return NULL_RTX;
26556
26557  if (VFP_REGNO_OK_FOR_SINGLE (regno))
26558    {
26559      nregs = GET_MODE_SIZE (mode) / 4;
26560      for (i = 0; i < nregs; i += 2)
26561	if (TARGET_BIG_END)
26562	  {
26563	    parts[i] = gen_rtx_REG (SImode, regno + i + 1);
26564	    parts[i + 1] = gen_rtx_REG (SImode, regno + i);
26565	  }
26566	else
26567	  {
26568	    parts[i] = gen_rtx_REG (SImode, regno + i);
26569	    parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
26570	  }
26571    }
26572  else
26573    {
26574      nregs = GET_MODE_SIZE (mode) / 8;
26575      for (i = 0; i < nregs; i++)
26576	parts[i] = gen_rtx_REG (DImode, regno + i);
26577    }
26578
26579  return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
26580}
26581
26582#if ARM_UNWIND_INFO
26583/* Emit unwind directives for a store-multiple instruction or stack pointer
26584   push during alignment.
26585   These should only ever be generated by the function prologue code, so
26586   expect them to have a particular form.
26587   The store-multiple instruction sometimes pushes pc as the last register,
26588   although it should not be tracked into unwind information, or for -Os
26589   sometimes pushes some dummy registers before first register that needs
26590   to be tracked in unwind information; such dummy registers are there just
26591   to avoid separate stack adjustment, and will not be restored in the
26592   epilogue.  */
26593
26594static void
26595arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
26596{
26597  int i;
26598  HOST_WIDE_INT offset;
26599  HOST_WIDE_INT nregs;
26600  int reg_size;
26601  unsigned reg;
26602  unsigned lastreg;
26603  unsigned padfirst = 0, padlast = 0;
26604  rtx e;
26605
26606  e = XVECEXP (p, 0, 0);
26607  gcc_assert (GET_CODE (e) == SET);
26608
26609  /* First insn will adjust the stack pointer.  */
26610  gcc_assert (GET_CODE (e) == SET
26611	      && REG_P (SET_DEST (e))
26612	      && REGNO (SET_DEST (e)) == SP_REGNUM
26613	      && GET_CODE (SET_SRC (e)) == PLUS);
26614
26615  offset = -INTVAL (XEXP (SET_SRC (e), 1));
26616  nregs = XVECLEN (p, 0) - 1;
26617  gcc_assert (nregs);
26618
26619  reg = REGNO (SET_SRC (XVECEXP (p, 0, 1)));
26620  if (reg < 16)
26621    {
26622      /* For -Os dummy registers can be pushed at the beginning to
26623	 avoid separate stack pointer adjustment.  */
26624      e = XVECEXP (p, 0, 1);
26625      e = XEXP (SET_DEST (e), 0);
26626      if (GET_CODE (e) == PLUS)
26627	padfirst = INTVAL (XEXP (e, 1));
26628      gcc_assert (padfirst == 0 || optimize_size);
26629      /* The function prologue may also push pc, but not annotate it as it is
26630	 never restored.  We turn this into a stack pointer adjustment.  */
26631      e = XVECEXP (p, 0, nregs);
26632      e = XEXP (SET_DEST (e), 0);
26633      if (GET_CODE (e) == PLUS)
26634	padlast = offset - INTVAL (XEXP (e, 1)) - 4;
26635      else
26636	padlast = offset - 4;
26637      gcc_assert (padlast == 0 || padlast == 4);
26638      if (padlast == 4)
26639	fprintf (asm_out_file, "\t.pad #4\n");
26640      reg_size = 4;
26641      fprintf (asm_out_file, "\t.save {");
26642    }
26643  else if (IS_VFP_REGNUM (reg))
26644    {
26645      reg_size = 8;
26646      fprintf (asm_out_file, "\t.vsave {");
26647    }
26648  else
26649    /* Unknown register type.  */
26650    gcc_unreachable ();
26651
26652  /* If the stack increment doesn't match the size of the saved registers,
26653     something has gone horribly wrong.  */
26654  gcc_assert (offset == padfirst + nregs * reg_size + padlast);
26655
26656  offset = padfirst;
26657  lastreg = 0;
26658  /* The remaining insns will describe the stores.  */
26659  for (i = 1; i <= nregs; i++)
26660    {
26661      /* Expect (set (mem <addr>) (reg)).
26662         Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)).  */
26663      e = XVECEXP (p, 0, i);
26664      gcc_assert (GET_CODE (e) == SET
26665		  && MEM_P (SET_DEST (e))
26666		  && REG_P (SET_SRC (e)));
26667
26668      reg = REGNO (SET_SRC (e));
26669      gcc_assert (reg >= lastreg);
26670
26671      if (i != 1)
26672	fprintf (asm_out_file, ", ");
26673      /* We can't use %r for vfp because we need to use the
26674	 double precision register names.  */
26675      if (IS_VFP_REGNUM (reg))
26676	asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
26677      else
26678	asm_fprintf (asm_out_file, "%r", reg);
26679
26680#ifdef ENABLE_CHECKING
26681      /* Check that the addresses are consecutive.  */
26682      e = XEXP (SET_DEST (e), 0);
26683      if (GET_CODE (e) == PLUS)
26684	gcc_assert (REG_P (XEXP (e, 0))
26685		    && REGNO (XEXP (e, 0)) == SP_REGNUM
26686		    && CONST_INT_P (XEXP (e, 1))
26687		    && offset == INTVAL (XEXP (e, 1)));
26688      else
26689	gcc_assert (i == 1
26690		    && REG_P (e)
26691		    && REGNO (e) == SP_REGNUM);
26692      offset += reg_size;
26693#endif
26694    }
26695  fprintf (asm_out_file, "}\n");
26696  if (padfirst)
26697    fprintf (asm_out_file, "\t.pad #%d\n", padfirst);
26698}
26699
26700/*  Emit unwind directives for a SET.  */
26701
26702static void
26703arm_unwind_emit_set (FILE * asm_out_file, rtx p)
26704{
26705  rtx e0;
26706  rtx e1;
26707  unsigned reg;
26708
26709  e0 = XEXP (p, 0);
26710  e1 = XEXP (p, 1);
26711  switch (GET_CODE (e0))
26712    {
26713    case MEM:
26714      /* Pushing a single register.  */
26715      if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
26716	  || !REG_P (XEXP (XEXP (e0, 0), 0))
26717	  || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
26718	abort ();
26719
26720      asm_fprintf (asm_out_file, "\t.save ");
26721      if (IS_VFP_REGNUM (REGNO (e1)))
26722	asm_fprintf(asm_out_file, "{d%d}\n",
26723		    (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
26724      else
26725	asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
26726      break;
26727
26728    case REG:
26729      if (REGNO (e0) == SP_REGNUM)
26730	{
26731	  /* A stack increment.  */
26732	  if (GET_CODE (e1) != PLUS
26733	      || !REG_P (XEXP (e1, 0))
26734	      || REGNO (XEXP (e1, 0)) != SP_REGNUM
26735	      || !CONST_INT_P (XEXP (e1, 1)))
26736	    abort ();
26737
26738	  asm_fprintf (asm_out_file, "\t.pad #%wd\n",
26739		       -INTVAL (XEXP (e1, 1)));
26740	}
26741      else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
26742	{
26743	  HOST_WIDE_INT offset;
26744
26745	  if (GET_CODE (e1) == PLUS)
26746	    {
26747	      if (!REG_P (XEXP (e1, 0))
26748		  || !CONST_INT_P (XEXP (e1, 1)))
26749		abort ();
26750	      reg = REGNO (XEXP (e1, 0));
26751	      offset = INTVAL (XEXP (e1, 1));
26752	      asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
26753			   HARD_FRAME_POINTER_REGNUM, reg,
26754			   offset);
26755	    }
26756	  else if (REG_P (e1))
26757	    {
26758	      reg = REGNO (e1);
26759	      asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
26760			   HARD_FRAME_POINTER_REGNUM, reg);
26761	    }
26762	  else
26763	    abort ();
26764	}
26765      else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
26766	{
26767	  /* Move from sp to reg.  */
26768	  asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
26769	}
26770     else if (GET_CODE (e1) == PLUS
26771	      && REG_P (XEXP (e1, 0))
26772	      && REGNO (XEXP (e1, 0)) == SP_REGNUM
26773	      && CONST_INT_P (XEXP (e1, 1)))
26774	{
26775	  /* Set reg to offset from sp.  */
26776	  asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
26777		       REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
26778	}
26779      else
26780	abort ();
26781      break;
26782
26783    default:
26784      abort ();
26785    }
26786}
26787
26788
26789/* Emit unwind directives for the given insn.  */
26790
26791static void
26792arm_unwind_emit (FILE * asm_out_file, rtx_insn *insn)
26793{
26794  rtx note, pat;
26795  bool handled_one = false;
26796
26797  if (arm_except_unwind_info (&global_options) != UI_TARGET)
26798    return;
26799
26800  if (!(flag_unwind_tables || crtl->uses_eh_lsda)
26801      && (TREE_NOTHROW (current_function_decl)
26802	  || crtl->all_throwers_are_sibcalls))
26803    return;
26804
26805  if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
26806    return;
26807
26808  for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
26809    {
26810      switch (REG_NOTE_KIND (note))
26811	{
26812	case REG_FRAME_RELATED_EXPR:
26813	  pat = XEXP (note, 0);
26814	  goto found;
26815
26816	case REG_CFA_REGISTER:
26817	  pat = XEXP (note, 0);
26818	  if (pat == NULL)
26819	    {
26820	      pat = PATTERN (insn);
26821	      if (GET_CODE (pat) == PARALLEL)
26822		pat = XVECEXP (pat, 0, 0);
26823	    }
26824
26825	  /* Only emitted for IS_STACKALIGN re-alignment.  */
26826	  {
26827	    rtx dest, src;
26828	    unsigned reg;
26829
26830	    src = SET_SRC (pat);
26831	    dest = SET_DEST (pat);
26832
26833	    gcc_assert (src == stack_pointer_rtx);
26834	    reg = REGNO (dest);
26835	    asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
26836			 reg + 0x90, reg);
26837	  }
26838	  handled_one = true;
26839	  break;
26840
26841	/* The INSN is generated in epilogue.  It is set as RTX_FRAME_RELATED_P
26842	   to get correct dwarf information for shrink-wrap.  We should not
26843	   emit unwind information for it because these are used either for
26844	   pretend arguments or notes to adjust sp and restore registers from
26845	   stack.  */
26846	case REG_CFA_DEF_CFA:
26847	case REG_CFA_ADJUST_CFA:
26848	case REG_CFA_RESTORE:
26849	  return;
26850
26851	case REG_CFA_EXPRESSION:
26852	case REG_CFA_OFFSET:
26853	  /* ??? Only handling here what we actually emit.  */
26854	  gcc_unreachable ();
26855
26856	default:
26857	  break;
26858	}
26859    }
26860  if (handled_one)
26861    return;
26862  pat = PATTERN (insn);
26863 found:
26864
26865  switch (GET_CODE (pat))
26866    {
26867    case SET:
26868      arm_unwind_emit_set (asm_out_file, pat);
26869      break;
26870
26871    case SEQUENCE:
26872      /* Store multiple.  */
26873      arm_unwind_emit_sequence (asm_out_file, pat);
26874      break;
26875
26876    default:
26877      abort();
26878    }
26879}
26880
26881
26882/* Output a reference from a function exception table to the type_info
26883   object X.  The EABI specifies that the symbol should be relocated by
26884   an R_ARM_TARGET2 relocation.  */
26885
26886static bool
26887arm_output_ttype (rtx x)
26888{
26889  fputs ("\t.word\t", asm_out_file);
26890  output_addr_const (asm_out_file, x);
26891  /* Use special relocations for symbol references.  */
26892  if (!CONST_INT_P (x))
26893    fputs ("(TARGET2)", asm_out_file);
26894  fputc ('\n', asm_out_file);
26895
26896  return TRUE;
26897}
26898
26899/* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY.  */
26900
26901static void
26902arm_asm_emit_except_personality (rtx personality)
26903{
26904  fputs ("\t.personality\t", asm_out_file);
26905  output_addr_const (asm_out_file, personality);
26906  fputc ('\n', asm_out_file);
26907}
26908
26909/* Implement TARGET_ASM_INITIALIZE_SECTIONS.  */
26910
26911static void
26912arm_asm_init_sections (void)
26913{
26914  exception_section = get_unnamed_section (0, output_section_asm_op,
26915					   "\t.handlerdata");
26916}
26917#endif /* ARM_UNWIND_INFO */
26918
26919/* Output unwind directives for the start/end of a function.  */
26920
26921void
26922arm_output_fn_unwind (FILE * f, bool prologue)
26923{
26924  if (arm_except_unwind_info (&global_options) != UI_TARGET)
26925    return;
26926
26927  if (prologue)
26928    fputs ("\t.fnstart\n", f);
26929  else
26930    {
26931      /* If this function will never be unwound, then mark it as such.
26932         The came condition is used in arm_unwind_emit to suppress
26933	 the frame annotations.  */
26934      if (!(flag_unwind_tables || crtl->uses_eh_lsda)
26935	  && (TREE_NOTHROW (current_function_decl)
26936	      || crtl->all_throwers_are_sibcalls))
26937	fputs("\t.cantunwind\n", f);
26938
26939      fputs ("\t.fnend\n", f);
26940    }
26941}
26942
26943static bool
26944arm_emit_tls_decoration (FILE *fp, rtx x)
26945{
26946  enum tls_reloc reloc;
26947  rtx val;
26948
26949  val = XVECEXP (x, 0, 0);
26950  reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
26951
26952  output_addr_const (fp, val);
26953
26954  switch (reloc)
26955    {
26956    case TLS_GD32:
26957      fputs ("(tlsgd)", fp);
26958      break;
26959    case TLS_LDM32:
26960      fputs ("(tlsldm)", fp);
26961      break;
26962    case TLS_LDO32:
26963      fputs ("(tlsldo)", fp);
26964      break;
26965    case TLS_IE32:
26966      fputs ("(gottpoff)", fp);
26967      break;
26968    case TLS_LE32:
26969      fputs ("(tpoff)", fp);
26970      break;
26971    case TLS_DESCSEQ:
26972      fputs ("(tlsdesc)", fp);
26973      break;
26974    default:
26975      gcc_unreachable ();
26976    }
26977
26978  switch (reloc)
26979    {
26980    case TLS_GD32:
26981    case TLS_LDM32:
26982    case TLS_IE32:
26983    case TLS_DESCSEQ:
26984      fputs (" + (. - ", fp);
26985      output_addr_const (fp, XVECEXP (x, 0, 2));
26986      /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
26987      fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
26988      output_addr_const (fp, XVECEXP (x, 0, 3));
26989      fputc (')', fp);
26990      break;
26991    default:
26992      break;
26993    }
26994
26995  return TRUE;
26996}
26997
26998/* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL.  */
26999
27000static void
27001arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
27002{
27003  gcc_assert (size == 4);
27004  fputs ("\t.word\t", file);
27005  output_addr_const (file, x);
27006  fputs ("(tlsldo)", file);
27007}
27008
27009/* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA.  */
27010
27011static bool
27012arm_output_addr_const_extra (FILE *fp, rtx x)
27013{
27014  if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
27015    return arm_emit_tls_decoration (fp, x);
27016  else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
27017    {
27018      char label[256];
27019      int labelno = INTVAL (XVECEXP (x, 0, 0));
27020
27021      ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
27022      assemble_name_raw (fp, label);
27023
27024      return TRUE;
27025    }
27026  else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
27027    {
27028      assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
27029      if (GOT_PCREL)
27030	fputs ("+.", fp);
27031      fputs ("-(", fp);
27032      output_addr_const (fp, XVECEXP (x, 0, 0));
27033      fputc (')', fp);
27034      return TRUE;
27035    }
27036  else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
27037    {
27038      output_addr_const (fp, XVECEXP (x, 0, 0));
27039      if (GOT_PCREL)
27040        fputs ("+.", fp);
27041      fputs ("-(", fp);
27042      output_addr_const (fp, XVECEXP (x, 0, 1));
27043      fputc (')', fp);
27044      return TRUE;
27045    }
27046  else if (GET_CODE (x) == CONST_VECTOR)
27047    return arm_emit_vector_const (fp, x);
27048
27049  return FALSE;
27050}
27051
27052/* Output assembly for a shift instruction.
27053   SET_FLAGS determines how the instruction modifies the condition codes.
27054   0 - Do not set condition codes.
27055   1 - Set condition codes.
27056   2 - Use smallest instruction.  */
27057const char *
27058arm_output_shift(rtx * operands, int set_flags)
27059{
27060  char pattern[100];
27061  static const char flag_chars[3] = {'?', '.', '!'};
27062  const char *shift;
27063  HOST_WIDE_INT val;
27064  char c;
27065
27066  c = flag_chars[set_flags];
27067  if (TARGET_UNIFIED_ASM)
27068    {
27069      shift = shift_op(operands[3], &val);
27070      if (shift)
27071	{
27072	  if (val != -1)
27073	    operands[2] = GEN_INT(val);
27074	  sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
27075	}
27076      else
27077	sprintf (pattern, "mov%%%c\t%%0, %%1", c);
27078    }
27079  else
27080    sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
27081  output_asm_insn (pattern, operands);
27082  return "";
27083}
27084
27085/* Output assembly for a WMMX immediate shift instruction.  */
27086const char *
27087arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
27088{
27089  int shift = INTVAL (operands[2]);
27090  char templ[50];
27091  machine_mode opmode = GET_MODE (operands[0]);
27092
27093  gcc_assert (shift >= 0);
27094
27095  /* If the shift value in the register versions is > 63 (for D qualifier),
27096     31 (for W qualifier) or 15 (for H qualifier).  */
27097  if (((opmode == V4HImode) && (shift > 15))
27098	|| ((opmode == V2SImode) && (shift > 31))
27099	|| ((opmode == DImode) && (shift > 63)))
27100  {
27101    if (wror_or_wsra)
27102      {
27103        sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
27104        output_asm_insn (templ, operands);
27105        if (opmode == DImode)
27106          {
27107	    sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
27108	    output_asm_insn (templ, operands);
27109          }
27110      }
27111    else
27112      {
27113        /* The destination register will contain all zeros.  */
27114        sprintf (templ, "wzero\t%%0");
27115        output_asm_insn (templ, operands);
27116      }
27117    return "";
27118  }
27119
27120  if ((opmode == DImode) && (shift > 32))
27121    {
27122      sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
27123      output_asm_insn (templ, operands);
27124      sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
27125      output_asm_insn (templ, operands);
27126    }
27127  else
27128    {
27129      sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
27130      output_asm_insn (templ, operands);
27131    }
27132  return "";
27133}
27134
27135/* Output assembly for a WMMX tinsr instruction.  */
27136const char *
27137arm_output_iwmmxt_tinsr (rtx *operands)
27138{
27139  int mask = INTVAL (operands[3]);
27140  int i;
27141  char templ[50];
27142  int units = mode_nunits[GET_MODE (operands[0])];
27143  gcc_assert ((mask & (mask - 1)) == 0);
27144  for (i = 0; i < units; ++i)
27145    {
27146      if ((mask & 0x01) == 1)
27147        {
27148          break;
27149        }
27150      mask >>= 1;
27151    }
27152  gcc_assert (i < units);
27153  {
27154    switch (GET_MODE (operands[0]))
27155      {
27156      case V8QImode:
27157	sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
27158	break;
27159      case V4HImode:
27160	sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
27161	break;
27162      case V2SImode:
27163	sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
27164	break;
27165      default:
27166	gcc_unreachable ();
27167	break;
27168      }
27169    output_asm_insn (templ, operands);
27170  }
27171  return "";
27172}
27173
27174/* Output a Thumb-1 casesi dispatch sequence.  */
27175const char *
27176thumb1_output_casesi (rtx *operands)
27177{
27178  rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[0])));
27179
27180  gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
27181
27182  switch (GET_MODE(diff_vec))
27183    {
27184    case QImode:
27185      return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
27186	      "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
27187    case HImode:
27188      return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
27189	      "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
27190    case SImode:
27191      return "bl\t%___gnu_thumb1_case_si";
27192    default:
27193      gcc_unreachable ();
27194    }
27195}
27196
27197/* Output a Thumb-2 casesi instruction.  */
27198const char *
27199thumb2_output_casesi (rtx *operands)
27200{
27201  rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
27202
27203  gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
27204
27205  output_asm_insn ("cmp\t%0, %1", operands);
27206  output_asm_insn ("bhi\t%l3", operands);
27207  switch (GET_MODE(diff_vec))
27208    {
27209    case QImode:
27210      return "tbb\t[%|pc, %0]";
27211    case HImode:
27212      return "tbh\t[%|pc, %0, lsl #1]";
27213    case SImode:
27214      if (flag_pic)
27215	{
27216	  output_asm_insn ("adr\t%4, %l2", operands);
27217	  output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
27218	  output_asm_insn ("add\t%4, %4, %5", operands);
27219	  return "bx\t%4";
27220	}
27221      else
27222	{
27223	  output_asm_insn ("adr\t%4, %l2", operands);
27224	  return "ldr\t%|pc, [%4, %0, lsl #2]";
27225	}
27226    default:
27227      gcc_unreachable ();
27228    }
27229}
27230
27231/* Most ARM cores are single issue, but some newer ones can dual issue.
27232   The scheduler descriptions rely on this being correct.  */
27233static int
27234arm_issue_rate (void)
27235{
27236  switch (arm_tune)
27237    {
27238    case xgene1:
27239      return 4;
27240
27241    case cortexa15:
27242    case cortexa57:
27243    case exynosm1:
27244      return 3;
27245
27246    case cortexm7:
27247    case cortexr4:
27248    case cortexr4f:
27249    case cortexr5:
27250    case genericv7a:
27251    case cortexa5:
27252    case cortexa7:
27253    case cortexa8:
27254    case cortexa9:
27255    case cortexa12:
27256    case cortexa17:
27257    case cortexa53:
27258    case fa726te:
27259    case marvell_pj4:
27260      return 2;
27261
27262    default:
27263      return 1;
27264    }
27265}
27266
27267/* Return how many instructions should scheduler lookahead to choose the
27268   best one.  */
27269static int
27270arm_first_cycle_multipass_dfa_lookahead (void)
27271{
27272  int issue_rate = arm_issue_rate ();
27273
27274  return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
27275}
27276
27277/* Enable modeling of L2 auto-prefetcher.  */
27278static int
27279arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn, int ready_index)
27280{
27281  return autopref_multipass_dfa_lookahead_guard (insn, ready_index);
27282}
27283
27284const char *
27285arm_mangle_type (const_tree type)
27286{
27287  /* The ARM ABI documents (10th October 2008) say that "__va_list"
27288     has to be managled as if it is in the "std" namespace.  */
27289  if (TARGET_AAPCS_BASED
27290      && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
27291    return "St9__va_list";
27292
27293  /* Half-precision float.  */
27294  if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
27295    return "Dh";
27296
27297  /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
27298     builtin type.  */
27299  if (TYPE_NAME (type) != NULL)
27300    return arm_mangle_builtin_type (type);
27301
27302  /* Use the default mangling.  */
27303  return NULL;
27304}
27305
27306/* Order of allocation of core registers for Thumb: this allocation is
27307   written over the corresponding initial entries of the array
27308   initialized with REG_ALLOC_ORDER.  We allocate all low registers
27309   first.  Saving and restoring a low register is usually cheaper than
27310   using a call-clobbered high register.  */
27311
27312static const int thumb_core_reg_alloc_order[] =
27313{
27314   3,  2,  1,  0,  4,  5,  6,  7,
27315  14, 12,  8,  9, 10, 11
27316};
27317
27318/* Adjust register allocation order when compiling for Thumb.  */
27319
27320void
27321arm_order_regs_for_local_alloc (void)
27322{
27323  const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
27324  memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
27325  if (TARGET_THUMB)
27326    memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
27327            sizeof (thumb_core_reg_alloc_order));
27328}
27329
27330/* Implement TARGET_FRAME_POINTER_REQUIRED.  */
27331
27332bool
27333arm_frame_pointer_required (void)
27334{
27335  return (cfun->has_nonlocal_label
27336          || SUBTARGET_FRAME_POINTER_REQUIRED
27337          || (TARGET_ARM && TARGET_APCS_FRAME && ! leaf_function_p ()));
27338}
27339
27340/* Only thumb1 can't support conditional execution, so return true if
27341   the target is not thumb1.  */
27342static bool
27343arm_have_conditional_execution (void)
27344{
27345  return !TARGET_THUMB1;
27346}
27347
27348/* The AAPCS sets the maximum alignment of a vector to 64 bits.  */
27349static HOST_WIDE_INT
27350arm_vector_alignment (const_tree type)
27351{
27352  HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
27353
27354  if (TARGET_AAPCS_BASED)
27355    align = MIN (align, 64);
27356
27357  return align;
27358}
27359
27360static unsigned int
27361arm_autovectorize_vector_sizes (void)
27362{
27363  return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : (16 | 8);
27364}
27365
27366static bool
27367arm_vector_alignment_reachable (const_tree type, bool is_packed)
27368{
27369  /* Vectors which aren't in packed structures will not be less aligned than
27370     the natural alignment of their element type, so this is safe.  */
27371  if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
27372    return !is_packed;
27373
27374  return default_builtin_vector_alignment_reachable (type, is_packed);
27375}
27376
27377static bool
27378arm_builtin_support_vector_misalignment (machine_mode mode,
27379					 const_tree type, int misalignment,
27380					 bool is_packed)
27381{
27382  if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
27383    {
27384      HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
27385
27386      if (is_packed)
27387        return align == 1;
27388
27389      /* If the misalignment is unknown, we should be able to handle the access
27390	 so long as it is not to a member of a packed data structure.  */
27391      if (misalignment == -1)
27392        return true;
27393
27394      /* Return true if the misalignment is a multiple of the natural alignment
27395         of the vector's element type.  This is probably always going to be
27396	 true in practice, since we've already established that this isn't a
27397	 packed access.  */
27398      return ((misalignment % align) == 0);
27399    }
27400
27401  return default_builtin_support_vector_misalignment (mode, type, misalignment,
27402						      is_packed);
27403}
27404
27405static void
27406arm_conditional_register_usage (void)
27407{
27408  int regno;
27409
27410  if (TARGET_THUMB1 && optimize_size)
27411    {
27412      /* When optimizing for size on Thumb-1, it's better not
27413        to use the HI regs, because of the overhead of
27414        stacking them.  */
27415      for (regno = FIRST_HI_REGNUM; regno <= LAST_HI_REGNUM; ++regno)
27416	fixed_regs[regno] = call_used_regs[regno] = 1;
27417    }
27418
27419  /* The link register can be clobbered by any branch insn,
27420     but we have no way to track that at present, so mark
27421     it as unavailable.  */
27422  if (TARGET_THUMB1)
27423    fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
27424
27425  if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP)
27426    {
27427      /* VFPv3 registers are disabled when earlier VFP
27428	 versions are selected due to the definition of
27429	 LAST_VFP_REGNUM.  */
27430      for (regno = FIRST_VFP_REGNUM;
27431	   regno <= LAST_VFP_REGNUM; ++ regno)
27432	{
27433	  fixed_regs[regno] = 0;
27434	  call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
27435	    || regno >= FIRST_VFP_REGNUM + 32;
27436	}
27437    }
27438
27439  if (TARGET_REALLY_IWMMXT)
27440    {
27441      regno = FIRST_IWMMXT_GR_REGNUM;
27442      /* The 2002/10/09 revision of the XScale ABI has wCG0
27443         and wCG1 as call-preserved registers.  The 2002/11/21
27444         revision changed this so that all wCG registers are
27445         scratch registers.  */
27446      for (regno = FIRST_IWMMXT_GR_REGNUM;
27447	   regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
27448	fixed_regs[regno] = 0;
27449      /* The XScale ABI has wR0 - wR9 as scratch registers,
27450	 the rest as call-preserved registers.  */
27451      for (regno = FIRST_IWMMXT_REGNUM;
27452	   regno <= LAST_IWMMXT_REGNUM; ++ regno)
27453	{
27454	  fixed_regs[regno] = 0;
27455	  call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
27456	}
27457    }
27458
27459  if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
27460    {
27461      fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
27462      call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
27463    }
27464  else if (TARGET_APCS_STACK)
27465    {
27466      fixed_regs[10]     = 1;
27467      call_used_regs[10] = 1;
27468    }
27469  /* -mcaller-super-interworking reserves r11 for calls to
27470     _interwork_r11_call_via_rN().  Making the register global
27471     is an easy way of ensuring that it remains valid for all
27472     calls.  */
27473  if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
27474      || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
27475    {
27476      fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
27477      call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
27478      if (TARGET_CALLER_INTERWORKING)
27479	global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
27480    }
27481  SUBTARGET_CONDITIONAL_REGISTER_USAGE
27482}
27483
27484static reg_class_t
27485arm_preferred_rename_class (reg_class_t rclass)
27486{
27487  /* Thumb-2 instructions using LO_REGS may be smaller than instructions
27488     using GENERIC_REGS.  During register rename pass, we prefer LO_REGS,
27489     and code size can be reduced.  */
27490  if (TARGET_THUMB2 && rclass == GENERAL_REGS)
27491    return LO_REGS;
27492  else
27493    return NO_REGS;
27494}
27495
27496/* Compute the atrribute "length" of insn "*push_multi".
27497   So this function MUST be kept in sync with that insn pattern.  */
27498int
27499arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
27500{
27501  int i, regno, hi_reg;
27502  int num_saves = XVECLEN (parallel_op, 0);
27503
27504  /* ARM mode.  */
27505  if (TARGET_ARM)
27506    return 4;
27507  /* Thumb1 mode.  */
27508  if (TARGET_THUMB1)
27509    return 2;
27510
27511  /* Thumb2 mode.  */
27512  regno = REGNO (first_op);
27513  hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
27514  for (i = 1; i < num_saves && !hi_reg; i++)
27515    {
27516      regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
27517      hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
27518    }
27519
27520  if (!hi_reg)
27521    return 2;
27522  return 4;
27523}
27524
27525/* Compute the number of instructions emitted by output_move_double.  */
27526int
27527arm_count_output_move_double_insns (rtx *operands)
27528{
27529  int count;
27530  rtx ops[2];
27531  /* output_move_double may modify the operands array, so call it
27532     here on a copy of the array.  */
27533  ops[0] = operands[0];
27534  ops[1] = operands[1];
27535  output_move_double (ops, false, &count);
27536  return count;
27537}
27538
27539int
27540vfp3_const_double_for_fract_bits (rtx operand)
27541{
27542  REAL_VALUE_TYPE r0;
27543
27544  if (!CONST_DOUBLE_P (operand))
27545    return 0;
27546
27547  REAL_VALUE_FROM_CONST_DOUBLE (r0, operand);
27548  if (exact_real_inverse (DFmode, &r0)
27549      && !REAL_VALUE_NEGATIVE (r0))
27550    {
27551      if (exact_real_truncate (DFmode, &r0))
27552	{
27553	  HOST_WIDE_INT value = real_to_integer (&r0);
27554	  value = value & 0xffffffff;
27555	  if ((value != 0) && ( (value & (value - 1)) == 0))
27556	    return int_log2 (value);
27557	}
27558    }
27559  return 0;
27560}
27561
27562/* If X is a CONST_DOUBLE with a value that is a power of 2 whose
27563   log2 is in [1, 32], return that log2.  Otherwise return -1.
27564   This is used in the patterns for vcvt.s32.f32 floating-point to
27565   fixed-point conversions.  */
27566
27567int
27568vfp3_const_double_for_bits (rtx x)
27569{
27570  if (!CONST_DOUBLE_P (x))
27571    return -1;
27572
27573  REAL_VALUE_TYPE r;
27574
27575  REAL_VALUE_FROM_CONST_DOUBLE (r, x);
27576  if (REAL_VALUE_NEGATIVE (r)
27577      || REAL_VALUE_ISNAN (r)
27578      || REAL_VALUE_ISINF (r)
27579      || !real_isinteger (&r, SFmode))
27580    return -1;
27581
27582  HOST_WIDE_INT hwint = exact_log2 (real_to_integer (&r));
27583
27584  /* The exact_log2 above will have returned -1 if this is
27585     not an exact log2.  */
27586  if (!IN_RANGE (hwint, 1, 32))
27587    return -1;
27588
27589  return hwint;
27590}
27591
27592
27593/* Emit a memory barrier around an atomic sequence according to MODEL.  */
27594
27595static void
27596arm_pre_atomic_barrier (enum memmodel model)
27597{
27598  if (need_atomic_barrier_p (model, true))
27599    emit_insn (gen_memory_barrier ());
27600}
27601
27602static void
27603arm_post_atomic_barrier (enum memmodel model)
27604{
27605  if (need_atomic_barrier_p (model, false))
27606    emit_insn (gen_memory_barrier ());
27607}
27608
27609/* Emit the load-exclusive and store-exclusive instructions.
27610   Use acquire and release versions if necessary.  */
27611
27612static void
27613arm_emit_load_exclusive (machine_mode mode, rtx rval, rtx mem, bool acq)
27614{
27615  rtx (*gen) (rtx, rtx);
27616
27617  if (acq)
27618    {
27619      switch (mode)
27620        {
27621        case QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
27622        case HImode: gen = gen_arm_load_acquire_exclusivehi; break;
27623        case SImode: gen = gen_arm_load_acquire_exclusivesi; break;
27624        case DImode: gen = gen_arm_load_acquire_exclusivedi; break;
27625        default:
27626          gcc_unreachable ();
27627        }
27628    }
27629  else
27630    {
27631      switch (mode)
27632        {
27633        case QImode: gen = gen_arm_load_exclusiveqi; break;
27634        case HImode: gen = gen_arm_load_exclusivehi; break;
27635        case SImode: gen = gen_arm_load_exclusivesi; break;
27636        case DImode: gen = gen_arm_load_exclusivedi; break;
27637        default:
27638          gcc_unreachable ();
27639        }
27640    }
27641
27642  emit_insn (gen (rval, mem));
27643}
27644
27645static void
27646arm_emit_store_exclusive (machine_mode mode, rtx bval, rtx rval,
27647                          rtx mem, bool rel)
27648{
27649  rtx (*gen) (rtx, rtx, rtx);
27650
27651  if (rel)
27652    {
27653      switch (mode)
27654        {
27655        case QImode: gen = gen_arm_store_release_exclusiveqi; break;
27656        case HImode: gen = gen_arm_store_release_exclusivehi; break;
27657        case SImode: gen = gen_arm_store_release_exclusivesi; break;
27658        case DImode: gen = gen_arm_store_release_exclusivedi; break;
27659        default:
27660          gcc_unreachable ();
27661        }
27662    }
27663  else
27664    {
27665      switch (mode)
27666        {
27667        case QImode: gen = gen_arm_store_exclusiveqi; break;
27668        case HImode: gen = gen_arm_store_exclusivehi; break;
27669        case SImode: gen = gen_arm_store_exclusivesi; break;
27670        case DImode: gen = gen_arm_store_exclusivedi; break;
27671        default:
27672          gcc_unreachable ();
27673        }
27674    }
27675
27676  emit_insn (gen (bval, rval, mem));
27677}
27678
27679/* Mark the previous jump instruction as unlikely.  */
27680
27681static void
27682emit_unlikely_jump (rtx insn)
27683{
27684  int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
27685
27686  insn = emit_jump_insn (insn);
27687  add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
27688}
27689
27690/* Expand a compare and swap pattern.  */
27691
27692void
27693arm_expand_compare_and_swap (rtx operands[])
27694{
27695  rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
27696  machine_mode mode;
27697  rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
27698
27699  bval = operands[0];
27700  rval = operands[1];
27701  mem = operands[2];
27702  oldval = operands[3];
27703  newval = operands[4];
27704  is_weak = operands[5];
27705  mod_s = operands[6];
27706  mod_f = operands[7];
27707  mode = GET_MODE (mem);
27708
27709  /* Normally the succ memory model must be stronger than fail, but in the
27710     unlikely event of fail being ACQUIRE and succ being RELEASE we need to
27711     promote succ to ACQ_REL so that we don't lose the acquire semantics.  */
27712
27713  if (TARGET_HAVE_LDACQ
27714      && is_mm_acquire (memmodel_from_int (INTVAL (mod_f)))
27715      && is_mm_release (memmodel_from_int (INTVAL (mod_s))))
27716    mod_s = GEN_INT (MEMMODEL_ACQ_REL);
27717
27718  switch (mode)
27719    {
27720    case QImode:
27721    case HImode:
27722      /* For narrow modes, we're going to perform the comparison in SImode,
27723	 so do the zero-extension now.  */
27724      rval = gen_reg_rtx (SImode);
27725      oldval = convert_modes (SImode, mode, oldval, true);
27726      /* FALLTHRU */
27727
27728    case SImode:
27729      /* Force the value into a register if needed.  We waited until after
27730	 the zero-extension above to do this properly.  */
27731      if (!arm_add_operand (oldval, SImode))
27732	oldval = force_reg (SImode, oldval);
27733      break;
27734
27735    case DImode:
27736      if (!cmpdi_operand (oldval, mode))
27737	oldval = force_reg (mode, oldval);
27738      break;
27739
27740    default:
27741      gcc_unreachable ();
27742    }
27743
27744  switch (mode)
27745    {
27746    case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
27747    case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
27748    case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
27749    case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
27750    default:
27751      gcc_unreachable ();
27752    }
27753
27754  emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
27755
27756  if (mode == QImode || mode == HImode)
27757    emit_move_insn (operands[1], gen_lowpart (mode, rval));
27758
27759  /* In all cases, we arrange for success to be signaled by Z set.
27760     This arrangement allows for the boolean result to be used directly
27761     in a subsequent branch, post optimization.  */
27762  x = gen_rtx_REG (CCmode, CC_REGNUM);
27763  x = gen_rtx_EQ (SImode, x, const0_rtx);
27764  emit_insn (gen_rtx_SET (VOIDmode, bval, x));
27765}
27766
27767/* Split a compare and swap pattern.  It is IMPLEMENTATION DEFINED whether
27768   another memory store between the load-exclusive and store-exclusive can
27769   reset the monitor from Exclusive to Open state.  This means we must wait
27770   until after reload to split the pattern, lest we get a register spill in
27771   the middle of the atomic sequence.  */
27772
27773void
27774arm_split_compare_and_swap (rtx operands[])
27775{
27776  rtx rval, mem, oldval, newval, scratch;
27777  machine_mode mode;
27778  enum memmodel mod_s, mod_f;
27779  bool is_weak;
27780  rtx_code_label *label1, *label2;
27781  rtx x, cond;
27782
27783  rval = operands[0];
27784  mem = operands[1];
27785  oldval = operands[2];
27786  newval = operands[3];
27787  is_weak = (operands[4] != const0_rtx);
27788  mod_s = memmodel_from_int (INTVAL (operands[5]));
27789  mod_f = memmodel_from_int (INTVAL (operands[6]));
27790  scratch = operands[7];
27791  mode = GET_MODE (mem);
27792
27793  bool is_armv8_sync = arm_arch8 && is_mm_sync (mod_s);
27794
27795  bool use_acquire = TARGET_HAVE_LDACQ
27796                     && !(is_mm_relaxed (mod_s) || is_mm_consume (mod_s)
27797			  || is_mm_release (mod_s));
27798
27799  bool use_release = TARGET_HAVE_LDACQ
27800                     && !(is_mm_relaxed (mod_s) || is_mm_consume (mod_s)
27801			  || is_mm_acquire (mod_s));
27802
27803  /* For ARMv8, the load-acquire is too weak for __sync memory orders.  Instead,
27804     a full barrier is emitted after the store-release.  */
27805  if (is_armv8_sync)
27806    use_acquire = false;
27807
27808  /* Checks whether a barrier is needed and emits one accordingly.  */
27809  if (!(use_acquire || use_release))
27810    arm_pre_atomic_barrier (mod_s);
27811
27812  label1 = NULL;
27813  if (!is_weak)
27814    {
27815      label1 = gen_label_rtx ();
27816      emit_label (label1);
27817    }
27818  label2 = gen_label_rtx ();
27819
27820  arm_emit_load_exclusive (mode, rval, mem, use_acquire);
27821
27822  cond = arm_gen_compare_reg (NE, rval, oldval, scratch);
27823  x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
27824  x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
27825			    gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
27826  emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
27827
27828  arm_emit_store_exclusive (mode, scratch, mem, newval, use_release);
27829
27830  /* Weak or strong, we want EQ to be true for success, so that we
27831     match the flags that we got from the compare above.  */
27832  cond = gen_rtx_REG (CCmode, CC_REGNUM);
27833  x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
27834  emit_insn (gen_rtx_SET (VOIDmode, cond, x));
27835
27836  if (!is_weak)
27837    {
27838      x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
27839      x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
27840				gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
27841      emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
27842    }
27843
27844  if (!is_mm_relaxed (mod_f))
27845    emit_label (label2);
27846
27847  /* Checks whether a barrier is needed and emits one accordingly.  */
27848  if (is_armv8_sync
27849      || !(use_acquire || use_release))
27850    arm_post_atomic_barrier (mod_s);
27851
27852  if (is_mm_relaxed (mod_f))
27853    emit_label (label2);
27854}
27855
27856void
27857arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
27858		     rtx value, rtx model_rtx, rtx cond)
27859{
27860  enum memmodel model = memmodel_from_int (INTVAL (model_rtx));
27861  machine_mode mode = GET_MODE (mem);
27862  machine_mode wmode = (mode == DImode ? DImode : SImode);
27863  rtx_code_label *label;
27864  rtx x;
27865
27866  bool is_armv8_sync = arm_arch8 && is_mm_sync (model);
27867
27868  bool use_acquire = TARGET_HAVE_LDACQ
27869                     && !(is_mm_relaxed (model) || is_mm_consume (model)
27870			  || is_mm_release (model));
27871
27872  bool use_release = TARGET_HAVE_LDACQ
27873                     && !(is_mm_relaxed (model) || is_mm_consume (model)
27874			  || is_mm_acquire (model));
27875
27876  /* For ARMv8, a load-acquire is too weak for __sync memory orders.  Instead,
27877     a full barrier is emitted after the store-release.  */
27878  if (is_armv8_sync)
27879    use_acquire = false;
27880
27881  /* Checks whether a barrier is needed and emits one accordingly.  */
27882  if (!(use_acquire || use_release))
27883    arm_pre_atomic_barrier (model);
27884
27885  label = gen_label_rtx ();
27886  emit_label (label);
27887
27888  if (new_out)
27889    new_out = gen_lowpart (wmode, new_out);
27890  if (old_out)
27891    old_out = gen_lowpart (wmode, old_out);
27892  else
27893    old_out = new_out;
27894  value = simplify_gen_subreg (wmode, value, mode, 0);
27895
27896  arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
27897
27898  switch (code)
27899    {
27900    case SET:
27901      new_out = value;
27902      break;
27903
27904    case NOT:
27905      x = gen_rtx_AND (wmode, old_out, value);
27906      emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
27907      x = gen_rtx_NOT (wmode, new_out);
27908      emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
27909      break;
27910
27911    case MINUS:
27912      if (CONST_INT_P (value))
27913	{
27914	  value = GEN_INT (-INTVAL (value));
27915	  code = PLUS;
27916	}
27917      /* FALLTHRU */
27918
27919    case PLUS:
27920      if (mode == DImode)
27921	{
27922	  /* DImode plus/minus need to clobber flags.  */
27923	  /* The adddi3 and subdi3 patterns are incorrectly written so that
27924	     they require matching operands, even when we could easily support
27925	     three operands.  Thankfully, this can be fixed up post-splitting,
27926	     as the individual add+adc patterns do accept three operands and
27927	     post-reload cprop can make these moves go away.  */
27928	  emit_move_insn (new_out, old_out);
27929	  if (code == PLUS)
27930	    x = gen_adddi3 (new_out, new_out, value);
27931	  else
27932	    x = gen_subdi3 (new_out, new_out, value);
27933	  emit_insn (x);
27934	  break;
27935	}
27936      /* FALLTHRU */
27937
27938    default:
27939      x = gen_rtx_fmt_ee (code, wmode, old_out, value);
27940      emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
27941      break;
27942    }
27943
27944  arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
27945                            use_release);
27946
27947  x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
27948  emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
27949
27950  /* Checks whether a barrier is needed and emits one accordingly.  */
27951  if (is_armv8_sync
27952      || !(use_acquire || use_release))
27953    arm_post_atomic_barrier (model);
27954}
27955
27956#define MAX_VECT_LEN 16
27957
27958struct expand_vec_perm_d
27959{
27960  rtx target, op0, op1;
27961  unsigned char perm[MAX_VECT_LEN];
27962  machine_mode vmode;
27963  unsigned char nelt;
27964  bool one_vector_p;
27965  bool testing_p;
27966};
27967
27968/* Generate a variable permutation.  */
27969
27970static void
27971arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
27972{
27973  machine_mode vmode = GET_MODE (target);
27974  bool one_vector_p = rtx_equal_p (op0, op1);
27975
27976  gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
27977  gcc_checking_assert (GET_MODE (op0) == vmode);
27978  gcc_checking_assert (GET_MODE (op1) == vmode);
27979  gcc_checking_assert (GET_MODE (sel) == vmode);
27980  gcc_checking_assert (TARGET_NEON);
27981
27982  if (one_vector_p)
27983    {
27984      if (vmode == V8QImode)
27985	emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
27986      else
27987	emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
27988    }
27989  else
27990    {
27991      rtx pair;
27992
27993      if (vmode == V8QImode)
27994	{
27995	  pair = gen_reg_rtx (V16QImode);
27996	  emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
27997	  pair = gen_lowpart (TImode, pair);
27998	  emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
27999	}
28000      else
28001	{
28002	  pair = gen_reg_rtx (OImode);
28003	  emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
28004	  emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
28005	}
28006    }
28007}
28008
28009void
28010arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
28011{
28012  machine_mode vmode = GET_MODE (target);
28013  unsigned int i, nelt = GET_MODE_NUNITS (vmode);
28014  bool one_vector_p = rtx_equal_p (op0, op1);
28015  rtx rmask[MAX_VECT_LEN], mask;
28016
28017  /* TODO: ARM's VTBL indexing is little-endian.  In order to handle GCC's
28018     numbering of elements for big-endian, we must reverse the order.  */
28019  gcc_checking_assert (!BYTES_BIG_ENDIAN);
28020
28021  /* The VTBL instruction does not use a modulo index, so we must take care
28022     of that ourselves.  */
28023  mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
28024  for (i = 0; i < nelt; ++i)
28025    rmask[i] = mask;
28026  mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
28027  sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
28028
28029  arm_expand_vec_perm_1 (target, op0, op1, sel);
28030}
28031
28032/* Generate or test for an insn that supports a constant permutation.  */
28033
28034/* Recognize patterns for the VUZP insns.  */
28035
28036static bool
28037arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
28038{
28039  unsigned int i, odd, mask, nelt = d->nelt;
28040  rtx out0, out1, in0, in1, x;
28041  rtx (*gen)(rtx, rtx, rtx, rtx);
28042
28043  if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
28044    return false;
28045
28046  /* Note that these are little-endian tests.  Adjust for big-endian later.  */
28047  if (d->perm[0] == 0)
28048    odd = 0;
28049  else if (d->perm[0] == 1)
28050    odd = 1;
28051  else
28052    return false;
28053  mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28054
28055  for (i = 0; i < nelt; i++)
28056    {
28057      unsigned elt = (i * 2 + odd) & mask;
28058      if (d->perm[i] != elt)
28059	return false;
28060    }
28061
28062  /* Success!  */
28063  if (d->testing_p)
28064    return true;
28065
28066  switch (d->vmode)
28067    {
28068    case V16QImode: gen = gen_neon_vuzpv16qi_internal; break;
28069    case V8QImode:  gen = gen_neon_vuzpv8qi_internal;  break;
28070    case V8HImode:  gen = gen_neon_vuzpv8hi_internal;  break;
28071    case V4HImode:  gen = gen_neon_vuzpv4hi_internal;  break;
28072    case V4SImode:  gen = gen_neon_vuzpv4si_internal;  break;
28073    case V2SImode:  gen = gen_neon_vuzpv2si_internal;  break;
28074    case V2SFmode:  gen = gen_neon_vuzpv2sf_internal;  break;
28075    case V4SFmode:  gen = gen_neon_vuzpv4sf_internal;  break;
28076    default:
28077      gcc_unreachable ();
28078    }
28079
28080  in0 = d->op0;
28081  in1 = d->op1;
28082  if (BYTES_BIG_ENDIAN)
28083    {
28084      x = in0, in0 = in1, in1 = x;
28085      odd = !odd;
28086    }
28087
28088  out0 = d->target;
28089  out1 = gen_reg_rtx (d->vmode);
28090  if (odd)
28091    x = out0, out0 = out1, out1 = x;
28092
28093  emit_insn (gen (out0, in0, in1, out1));
28094  return true;
28095}
28096
28097/* Recognize patterns for the VZIP insns.  */
28098
28099static bool
28100arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
28101{
28102  unsigned int i, high, mask, nelt = d->nelt;
28103  rtx out0, out1, in0, in1, x;
28104  rtx (*gen)(rtx, rtx, rtx, rtx);
28105
28106  if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
28107    return false;
28108
28109  /* Note that these are little-endian tests.  Adjust for big-endian later.  */
28110  high = nelt / 2;
28111  if (d->perm[0] == high)
28112    ;
28113  else if (d->perm[0] == 0)
28114    high = 0;
28115  else
28116    return false;
28117  mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28118
28119  for (i = 0; i < nelt / 2; i++)
28120    {
28121      unsigned elt = (i + high) & mask;
28122      if (d->perm[i * 2] != elt)
28123	return false;
28124      elt = (elt + nelt) & mask;
28125      if (d->perm[i * 2 + 1] != elt)
28126	return false;
28127    }
28128
28129  /* Success!  */
28130  if (d->testing_p)
28131    return true;
28132
28133  switch (d->vmode)
28134    {
28135    case V16QImode: gen = gen_neon_vzipv16qi_internal; break;
28136    case V8QImode:  gen = gen_neon_vzipv8qi_internal;  break;
28137    case V8HImode:  gen = gen_neon_vzipv8hi_internal;  break;
28138    case V4HImode:  gen = gen_neon_vzipv4hi_internal;  break;
28139    case V4SImode:  gen = gen_neon_vzipv4si_internal;  break;
28140    case V2SImode:  gen = gen_neon_vzipv2si_internal;  break;
28141    case V2SFmode:  gen = gen_neon_vzipv2sf_internal;  break;
28142    case V4SFmode:  gen = gen_neon_vzipv4sf_internal;  break;
28143    default:
28144      gcc_unreachable ();
28145    }
28146
28147  in0 = d->op0;
28148  in1 = d->op1;
28149  if (BYTES_BIG_ENDIAN)
28150    {
28151      x = in0, in0 = in1, in1 = x;
28152      high = !high;
28153    }
28154
28155  out0 = d->target;
28156  out1 = gen_reg_rtx (d->vmode);
28157  if (high)
28158    x = out0, out0 = out1, out1 = x;
28159
28160  emit_insn (gen (out0, in0, in1, out1));
28161  return true;
28162}
28163
28164/* Recognize patterns for the VREV insns.  */
28165
28166static bool
28167arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
28168{
28169  unsigned int i, j, diff, nelt = d->nelt;
28170  rtx (*gen)(rtx, rtx);
28171
28172  if (!d->one_vector_p)
28173    return false;
28174
28175  diff = d->perm[0];
28176  switch (diff)
28177    {
28178    case 7:
28179      switch (d->vmode)
28180	{
28181	case V16QImode: gen = gen_neon_vrev64v16qi; break;
28182	case V8QImode:  gen = gen_neon_vrev64v8qi;  break;
28183	default:
28184	  return false;
28185	}
28186      break;
28187    case 3:
28188      switch (d->vmode)
28189	{
28190	case V16QImode: gen = gen_neon_vrev32v16qi; break;
28191	case V8QImode:  gen = gen_neon_vrev32v8qi;  break;
28192	case V8HImode:  gen = gen_neon_vrev64v8hi;  break;
28193	case V4HImode:  gen = gen_neon_vrev64v4hi;  break;
28194	default:
28195	  return false;
28196	}
28197      break;
28198    case 1:
28199      switch (d->vmode)
28200	{
28201	case V16QImode: gen = gen_neon_vrev16v16qi; break;
28202	case V8QImode:  gen = gen_neon_vrev16v8qi;  break;
28203	case V8HImode:  gen = gen_neon_vrev32v8hi;  break;
28204	case V4HImode:  gen = gen_neon_vrev32v4hi;  break;
28205	case V4SImode:  gen = gen_neon_vrev64v4si;  break;
28206	case V2SImode:  gen = gen_neon_vrev64v2si;  break;
28207	case V4SFmode:  gen = gen_neon_vrev64v4sf;  break;
28208	case V2SFmode:  gen = gen_neon_vrev64v2sf;  break;
28209	default:
28210	  return false;
28211	}
28212      break;
28213    default:
28214      return false;
28215    }
28216
28217  for (i = 0; i < nelt ; i += diff + 1)
28218    for (j = 0; j <= diff; j += 1)
28219      {
28220	/* This is guaranteed to be true as the value of diff
28221	   is 7, 3, 1 and we should have enough elements in the
28222	   queue to generate this. Getting a vector mask with a
28223	   value of diff other than these values implies that
28224	   something is wrong by the time we get here.  */
28225	gcc_assert (i + j < nelt);
28226	if (d->perm[i + j] != i + diff - j)
28227	  return false;
28228      }
28229
28230  /* Success! */
28231  if (d->testing_p)
28232    return true;
28233
28234  emit_insn (gen (d->target, d->op0));
28235  return true;
28236}
28237
28238/* Recognize patterns for the VTRN insns.  */
28239
28240static bool
28241arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
28242{
28243  unsigned int i, odd, mask, nelt = d->nelt;
28244  rtx out0, out1, in0, in1, x;
28245  rtx (*gen)(rtx, rtx, rtx, rtx);
28246
28247  if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
28248    return false;
28249
28250  /* Note that these are little-endian tests.  Adjust for big-endian later.  */
28251  if (d->perm[0] == 0)
28252    odd = 0;
28253  else if (d->perm[0] == 1)
28254    odd = 1;
28255  else
28256    return false;
28257  mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28258
28259  for (i = 0; i < nelt; i += 2)
28260    {
28261      if (d->perm[i] != i + odd)
28262	return false;
28263      if (d->perm[i + 1] != ((i + nelt + odd) & mask))
28264	return false;
28265    }
28266
28267  /* Success!  */
28268  if (d->testing_p)
28269    return true;
28270
28271  switch (d->vmode)
28272    {
28273    case V16QImode: gen = gen_neon_vtrnv16qi_internal; break;
28274    case V8QImode:  gen = gen_neon_vtrnv8qi_internal;  break;
28275    case V8HImode:  gen = gen_neon_vtrnv8hi_internal;  break;
28276    case V4HImode:  gen = gen_neon_vtrnv4hi_internal;  break;
28277    case V4SImode:  gen = gen_neon_vtrnv4si_internal;  break;
28278    case V2SImode:  gen = gen_neon_vtrnv2si_internal;  break;
28279    case V2SFmode:  gen = gen_neon_vtrnv2sf_internal;  break;
28280    case V4SFmode:  gen = gen_neon_vtrnv4sf_internal;  break;
28281    default:
28282      gcc_unreachable ();
28283    }
28284
28285  in0 = d->op0;
28286  in1 = d->op1;
28287  if (BYTES_BIG_ENDIAN)
28288    {
28289      x = in0, in0 = in1, in1 = x;
28290      odd = !odd;
28291    }
28292
28293  out0 = d->target;
28294  out1 = gen_reg_rtx (d->vmode);
28295  if (odd)
28296    x = out0, out0 = out1, out1 = x;
28297
28298  emit_insn (gen (out0, in0, in1, out1));
28299  return true;
28300}
28301
28302/* Recognize patterns for the VEXT insns.  */
28303
28304static bool
28305arm_evpc_neon_vext (struct expand_vec_perm_d *d)
28306{
28307  unsigned int i, nelt = d->nelt;
28308  rtx (*gen) (rtx, rtx, rtx, rtx);
28309  rtx offset;
28310
28311  unsigned int location;
28312
28313  unsigned int next  = d->perm[0] + 1;
28314
28315  /* TODO: Handle GCC's numbering of elements for big-endian.  */
28316  if (BYTES_BIG_ENDIAN)
28317    return false;
28318
28319  /* Check if the extracted indexes are increasing by one.  */
28320  for (i = 1; i < nelt; next++, i++)
28321    {
28322      /* If we hit the most significant element of the 2nd vector in
28323	 the previous iteration, no need to test further.  */
28324      if (next == 2 * nelt)
28325	return false;
28326
28327      /* If we are operating on only one vector: it could be a
28328	 rotation.  If there are only two elements of size < 64, let
28329	 arm_evpc_neon_vrev catch it.  */
28330      if (d->one_vector_p && (next == nelt))
28331	{
28332	  if ((nelt == 2) && (d->vmode != V2DImode))
28333	    return false;
28334	  else
28335	    next = 0;
28336	}
28337
28338      if (d->perm[i] != next)
28339	return false;
28340    }
28341
28342  location = d->perm[0];
28343
28344  switch (d->vmode)
28345    {
28346    case V16QImode: gen = gen_neon_vextv16qi; break;
28347    case V8QImode: gen = gen_neon_vextv8qi; break;
28348    case V4HImode: gen = gen_neon_vextv4hi; break;
28349    case V8HImode: gen = gen_neon_vextv8hi; break;
28350    case V2SImode: gen = gen_neon_vextv2si; break;
28351    case V4SImode: gen = gen_neon_vextv4si; break;
28352    case V2SFmode: gen = gen_neon_vextv2sf; break;
28353    case V4SFmode: gen = gen_neon_vextv4sf; break;
28354    case V2DImode: gen = gen_neon_vextv2di; break;
28355    default:
28356      return false;
28357    }
28358
28359  /* Success! */
28360  if (d->testing_p)
28361    return true;
28362
28363  offset = GEN_INT (location);
28364  emit_insn (gen (d->target, d->op0, d->op1, offset));
28365  return true;
28366}
28367
28368/* The NEON VTBL instruction is a fully variable permuation that's even
28369   stronger than what we expose via VEC_PERM_EXPR.  What it doesn't do
28370   is mask the index operand as VEC_PERM_EXPR requires.  Therefore we
28371   can do slightly better by expanding this as a constant where we don't
28372   have to apply a mask.  */
28373
28374static bool
28375arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
28376{
28377  rtx rperm[MAX_VECT_LEN], sel;
28378  machine_mode vmode = d->vmode;
28379  unsigned int i, nelt = d->nelt;
28380
28381  /* TODO: ARM's VTBL indexing is little-endian.  In order to handle GCC's
28382     numbering of elements for big-endian, we must reverse the order.  */
28383  if (BYTES_BIG_ENDIAN)
28384    return false;
28385
28386  if (d->testing_p)
28387    return true;
28388
28389  /* Generic code will try constant permutation twice.  Once with the
28390     original mode and again with the elements lowered to QImode.
28391     So wait and don't do the selector expansion ourselves.  */
28392  if (vmode != V8QImode && vmode != V16QImode)
28393    return false;
28394
28395  for (i = 0; i < nelt; ++i)
28396    rperm[i] = GEN_INT (d->perm[i]);
28397  sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
28398  sel = force_reg (vmode, sel);
28399
28400  arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
28401  return true;
28402}
28403
28404static bool
28405arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
28406{
28407  /* Check if the input mask matches vext before reordering the
28408     operands.  */
28409  if (TARGET_NEON)
28410    if (arm_evpc_neon_vext (d))
28411      return true;
28412
28413  /* The pattern matching functions above are written to look for a small
28414     number to begin the sequence (0, 1, N/2).  If we begin with an index
28415     from the second operand, we can swap the operands.  */
28416  if (d->perm[0] >= d->nelt)
28417    {
28418      unsigned i, nelt = d->nelt;
28419      rtx x;
28420
28421      for (i = 0; i < nelt; ++i)
28422	d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
28423
28424      x = d->op0;
28425      d->op0 = d->op1;
28426      d->op1 = x;
28427    }
28428
28429  if (TARGET_NEON)
28430    {
28431      if (arm_evpc_neon_vuzp (d))
28432	return true;
28433      if (arm_evpc_neon_vzip (d))
28434	return true;
28435      if (arm_evpc_neon_vrev (d))
28436	return true;
28437      if (arm_evpc_neon_vtrn (d))
28438	return true;
28439      return arm_evpc_neon_vtbl (d);
28440    }
28441  return false;
28442}
28443
28444/* Expand a vec_perm_const pattern.  */
28445
28446bool
28447arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
28448{
28449  struct expand_vec_perm_d d;
28450  int i, nelt, which;
28451
28452  d.target = target;
28453  d.op0 = op0;
28454  d.op1 = op1;
28455
28456  d.vmode = GET_MODE (target);
28457  gcc_assert (VECTOR_MODE_P (d.vmode));
28458  d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
28459  d.testing_p = false;
28460
28461  for (i = which = 0; i < nelt; ++i)
28462    {
28463      rtx e = XVECEXP (sel, 0, i);
28464      int ei = INTVAL (e) & (2 * nelt - 1);
28465      which |= (ei < nelt ? 1 : 2);
28466      d.perm[i] = ei;
28467    }
28468
28469  switch (which)
28470    {
28471    default:
28472      gcc_unreachable();
28473
28474    case 3:
28475      d.one_vector_p = false;
28476      if (!rtx_equal_p (op0, op1))
28477	break;
28478
28479      /* The elements of PERM do not suggest that only the first operand
28480	 is used, but both operands are identical.  Allow easier matching
28481	 of the permutation by folding the permutation into the single
28482	 input vector.  */
28483      /* FALLTHRU */
28484    case 2:
28485      for (i = 0; i < nelt; ++i)
28486        d.perm[i] &= nelt - 1;
28487      d.op0 = op1;
28488      d.one_vector_p = true;
28489      break;
28490
28491    case 1:
28492      d.op1 = op0;
28493      d.one_vector_p = true;
28494      break;
28495    }
28496
28497  return arm_expand_vec_perm_const_1 (&d);
28498}
28499
28500/* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK.  */
28501
28502static bool
28503arm_vectorize_vec_perm_const_ok (machine_mode vmode,
28504				 const unsigned char *sel)
28505{
28506  struct expand_vec_perm_d d;
28507  unsigned int i, nelt, which;
28508  bool ret;
28509
28510  d.vmode = vmode;
28511  d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
28512  d.testing_p = true;
28513  memcpy (d.perm, sel, nelt);
28514
28515  /* Categorize the set of elements in the selector.  */
28516  for (i = which = 0; i < nelt; ++i)
28517    {
28518      unsigned char e = d.perm[i];
28519      gcc_assert (e < 2 * nelt);
28520      which |= (e < nelt ? 1 : 2);
28521    }
28522
28523  /* For all elements from second vector, fold the elements to first.  */
28524  if (which == 2)
28525    for (i = 0; i < nelt; ++i)
28526      d.perm[i] -= nelt;
28527
28528  /* Check whether the mask can be applied to the vector type.  */
28529  d.one_vector_p = (which != 3);
28530
28531  d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
28532  d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
28533  if (!d.one_vector_p)
28534    d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
28535
28536  start_sequence ();
28537  ret = arm_expand_vec_perm_const_1 (&d);
28538  end_sequence ();
28539
28540  return ret;
28541}
28542
28543bool
28544arm_autoinc_modes_ok_p (machine_mode mode, enum arm_auto_incmodes code)
28545{
28546  /* If we are soft float and we do not have ldrd
28547     then all auto increment forms are ok.  */
28548  if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
28549    return true;
28550
28551  switch (code)
28552    {
28553      /* Post increment and Pre Decrement are supported for all
28554	 instruction forms except for vector forms.  */
28555    case ARM_POST_INC:
28556    case ARM_PRE_DEC:
28557      if (VECTOR_MODE_P (mode))
28558	{
28559	  if (code != ARM_PRE_DEC)
28560	    return true;
28561	  else
28562	    return false;
28563	}
28564
28565      return true;
28566
28567    case ARM_POST_DEC:
28568    case ARM_PRE_INC:
28569      /* Without LDRD and mode size greater than
28570	 word size, there is no point in auto-incrementing
28571         because ldm and stm will not have these forms.  */
28572      if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
28573	return false;
28574
28575      /* Vector and floating point modes do not support
28576	 these auto increment forms.  */
28577      if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
28578	return false;
28579
28580      return true;
28581
28582    default:
28583      return false;
28584
28585    }
28586
28587  return false;
28588}
28589
28590/* The default expansion of general 64-bit shifts in core-regs is suboptimal,
28591   on ARM, since we know that shifts by negative amounts are no-ops.
28592   Additionally, the default expansion code is not available or suitable
28593   for post-reload insn splits (this can occur when the register allocator
28594   chooses not to do a shift in NEON).
28595
28596   This function is used in both initial expand and post-reload splits, and
28597   handles all kinds of 64-bit shifts.
28598
28599   Input requirements:
28600    - It is safe for the input and output to be the same register, but
28601      early-clobber rules apply for the shift amount and scratch registers.
28602    - Shift by register requires both scratch registers.  In all other cases
28603      the scratch registers may be NULL.
28604    - Ashiftrt by a register also clobbers the CC register.  */
28605void
28606arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
28607			       rtx amount, rtx scratch1, rtx scratch2)
28608{
28609  rtx out_high = gen_highpart (SImode, out);
28610  rtx out_low = gen_lowpart (SImode, out);
28611  rtx in_high = gen_highpart (SImode, in);
28612  rtx in_low = gen_lowpart (SImode, in);
28613
28614  /* Terminology:
28615	in = the register pair containing the input value.
28616	out = the destination register pair.
28617	up = the high- or low-part of each pair.
28618	down = the opposite part to "up".
28619     In a shift, we can consider bits to shift from "up"-stream to
28620     "down"-stream, so in a left-shift "up" is the low-part and "down"
28621     is the high-part of each register pair.  */
28622
28623  rtx out_up   = code == ASHIFT ? out_low : out_high;
28624  rtx out_down = code == ASHIFT ? out_high : out_low;
28625  rtx in_up   = code == ASHIFT ? in_low : in_high;
28626  rtx in_down = code == ASHIFT ? in_high : in_low;
28627
28628  gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
28629  gcc_assert (out
28630	      && (REG_P (out) || GET_CODE (out) == SUBREG)
28631	      && GET_MODE (out) == DImode);
28632  gcc_assert (in
28633	      && (REG_P (in) || GET_CODE (in) == SUBREG)
28634	      && GET_MODE (in) == DImode);
28635  gcc_assert (amount
28636	      && (((REG_P (amount) || GET_CODE (amount) == SUBREG)
28637		   && GET_MODE (amount) == SImode)
28638		  || CONST_INT_P (amount)));
28639  gcc_assert (scratch1 == NULL
28640	      || (GET_CODE (scratch1) == SCRATCH)
28641	      || (GET_MODE (scratch1) == SImode
28642		  && REG_P (scratch1)));
28643  gcc_assert (scratch2 == NULL
28644	      || (GET_CODE (scratch2) == SCRATCH)
28645	      || (GET_MODE (scratch2) == SImode
28646		  && REG_P (scratch2)));
28647  gcc_assert (!REG_P (out) || !REG_P (amount)
28648	      || !HARD_REGISTER_P (out)
28649	      || (REGNO (out) != REGNO (amount)
28650		  && REGNO (out) + 1 != REGNO (amount)));
28651
28652  /* Macros to make following code more readable.  */
28653  #define SUB_32(DEST,SRC) \
28654	    gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
28655  #define RSB_32(DEST,SRC) \
28656	    gen_subsi3 ((DEST), GEN_INT (32), (SRC))
28657  #define SUB_S_32(DEST,SRC) \
28658	    gen_addsi3_compare0 ((DEST), (SRC), \
28659				 GEN_INT (-32))
28660  #define SET(DEST,SRC) \
28661	    gen_rtx_SET (SImode, (DEST), (SRC))
28662  #define SHIFT(CODE,SRC,AMOUNT) \
28663	    gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
28664  #define LSHIFT(CODE,SRC,AMOUNT) \
28665	    gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
28666			    SImode, (SRC), (AMOUNT))
28667  #define REV_LSHIFT(CODE,SRC,AMOUNT) \
28668	    gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
28669			    SImode, (SRC), (AMOUNT))
28670  #define ORR(A,B) \
28671	    gen_rtx_IOR (SImode, (A), (B))
28672  #define BRANCH(COND,LABEL) \
28673	    gen_arm_cond_branch ((LABEL), \
28674				 gen_rtx_ ## COND (CCmode, cc_reg, \
28675						   const0_rtx), \
28676				 cc_reg)
28677
28678  /* Shifts by register and shifts by constant are handled separately.  */
28679  if (CONST_INT_P (amount))
28680    {
28681      /* We have a shift-by-constant.  */
28682
28683      /* First, handle out-of-range shift amounts.
28684	 In both cases we try to match the result an ARM instruction in a
28685	 shift-by-register would give.  This helps reduce execution
28686	 differences between optimization levels, but it won't stop other
28687         parts of the compiler doing different things.  This is "undefined
28688         behaviour, in any case.  */
28689      if (INTVAL (amount) <= 0)
28690	emit_insn (gen_movdi (out, in));
28691      else if (INTVAL (amount) >= 64)
28692	{
28693	  if (code == ASHIFTRT)
28694	    {
28695	      rtx const31_rtx = GEN_INT (31);
28696	      emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
28697	      emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
28698	    }
28699	  else
28700	    emit_insn (gen_movdi (out, const0_rtx));
28701	}
28702
28703      /* Now handle valid shifts. */
28704      else if (INTVAL (amount) < 32)
28705	{
28706	  /* Shifts by a constant less than 32.  */
28707	  rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
28708
28709	  emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
28710	  emit_insn (SET (out_down,
28711			  ORR (REV_LSHIFT (code, in_up, reverse_amount),
28712			       out_down)));
28713	  emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
28714	}
28715      else
28716	{
28717	  /* Shifts by a constant greater than 31.  */
28718	  rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
28719
28720	  emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
28721	  if (code == ASHIFTRT)
28722	    emit_insn (gen_ashrsi3 (out_up, in_up,
28723				    GEN_INT (31)));
28724	  else
28725	    emit_insn (SET (out_up, const0_rtx));
28726	}
28727    }
28728  else
28729    {
28730      /* We have a shift-by-register.  */
28731      rtx cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
28732
28733      /* This alternative requires the scratch registers.  */
28734      gcc_assert (scratch1 && REG_P (scratch1));
28735      gcc_assert (scratch2 && REG_P (scratch2));
28736
28737      /* We will need the values "amount-32" and "32-amount" later.
28738         Swapping them around now allows the later code to be more general. */
28739      switch (code)
28740	{
28741	case ASHIFT:
28742	  emit_insn (SUB_32 (scratch1, amount));
28743	  emit_insn (RSB_32 (scratch2, amount));
28744	  break;
28745	case ASHIFTRT:
28746	  emit_insn (RSB_32 (scratch1, amount));
28747	  /* Also set CC = amount > 32.  */
28748	  emit_insn (SUB_S_32 (scratch2, amount));
28749	  break;
28750	case LSHIFTRT:
28751	  emit_insn (RSB_32 (scratch1, amount));
28752	  emit_insn (SUB_32 (scratch2, amount));
28753	  break;
28754	default:
28755	  gcc_unreachable ();
28756	}
28757
28758      /* Emit code like this:
28759
28760	 arithmetic-left:
28761	    out_down = in_down << amount;
28762	    out_down = (in_up << (amount - 32)) | out_down;
28763	    out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
28764	    out_up = in_up << amount;
28765
28766	 arithmetic-right:
28767	    out_down = in_down >> amount;
28768	    out_down = (in_up << (32 - amount)) | out_down;
28769	    if (amount < 32)
28770	      out_down = ((signed)in_up >> (amount - 32)) | out_down;
28771	    out_up = in_up << amount;
28772
28773	 logical-right:
28774	    out_down = in_down >> amount;
28775	    out_down = (in_up << (32 - amount)) | out_down;
28776	    if (amount < 32)
28777	      out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
28778	    out_up = in_up << amount;
28779
28780	  The ARM and Thumb2 variants are the same but implemented slightly
28781	  differently.  If this were only called during expand we could just
28782	  use the Thumb2 case and let combine do the right thing, but this
28783	  can also be called from post-reload splitters.  */
28784
28785      emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
28786
28787      if (!TARGET_THUMB2)
28788	{
28789	  /* Emit code for ARM mode.  */
28790	  emit_insn (SET (out_down,
28791			  ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
28792	  if (code == ASHIFTRT)
28793	    {
28794	      rtx_code_label *done_label = gen_label_rtx ();
28795	      emit_jump_insn (BRANCH (LT, done_label));
28796	      emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
28797					     out_down)));
28798	      emit_label (done_label);
28799	    }
28800	  else
28801	    emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
28802					   out_down)));
28803	}
28804      else
28805	{
28806	  /* Emit code for Thumb2 mode.
28807	     Thumb2 can't do shift and or in one insn.  */
28808	  emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
28809	  emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
28810
28811	  if (code == ASHIFTRT)
28812	    {
28813	      rtx_code_label *done_label = gen_label_rtx ();
28814	      emit_jump_insn (BRANCH (LT, done_label));
28815	      emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
28816	      emit_insn (SET (out_down, ORR (out_down, scratch2)));
28817	      emit_label (done_label);
28818	    }
28819	  else
28820	    {
28821	      emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
28822	      emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
28823	    }
28824	}
28825
28826      emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
28827    }
28828
28829  #undef SUB_32
28830  #undef RSB_32
28831  #undef SUB_S_32
28832  #undef SET
28833  #undef SHIFT
28834  #undef LSHIFT
28835  #undef REV_LSHIFT
28836  #undef ORR
28837  #undef BRANCH
28838}
28839
28840/* Returns true if the pattern is a valid symbolic address, which is either a
28841   symbol_ref or (symbol_ref + addend).
28842
28843   According to the ARM ELF ABI, the initial addend of REL-type relocations
28844   processing MOVW and MOVT instructions is formed by interpreting the 16-bit
28845   literal field of the instruction as a 16-bit signed value in the range
28846   -32768 <= A < 32768.  */
28847
28848bool
28849arm_valid_symbolic_address_p (rtx addr)
28850{
28851  rtx xop0, xop1 = NULL_RTX;
28852  rtx tmp = addr;
28853
28854  if (GET_CODE (tmp) == SYMBOL_REF || GET_CODE (tmp) == LABEL_REF)
28855    return true;
28856
28857  /* (const (plus: symbol_ref const_int))  */
28858  if (GET_CODE (addr) == CONST)
28859    tmp = XEXP (addr, 0);
28860
28861  if (GET_CODE (tmp) == PLUS)
28862    {
28863      xop0 = XEXP (tmp, 0);
28864      xop1 = XEXP (tmp, 1);
28865
28866      if (GET_CODE (xop0) == SYMBOL_REF && CONST_INT_P (xop1))
28867	  return IN_RANGE (INTVAL (xop1), -0x8000, 0x7fff);
28868    }
28869
28870  return false;
28871}
28872
28873/* Returns true if a valid comparison operation and makes
28874   the operands in a form that is valid.  */
28875bool
28876arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
28877{
28878  enum rtx_code code = GET_CODE (*comparison);
28879  int code_int;
28880  machine_mode mode = (GET_MODE (*op1) == VOIDmode)
28881    ? GET_MODE (*op2) : GET_MODE (*op1);
28882
28883  gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
28884
28885  if (code == UNEQ || code == LTGT)
28886    return false;
28887
28888  code_int = (int)code;
28889  arm_canonicalize_comparison (&code_int, op1, op2, 0);
28890  PUT_CODE (*comparison, (enum rtx_code)code_int);
28891
28892  switch (mode)
28893    {
28894    case SImode:
28895      if (!arm_add_operand (*op1, mode))
28896	*op1 = force_reg (mode, *op1);
28897      if (!arm_add_operand (*op2, mode))
28898	*op2 = force_reg (mode, *op2);
28899      return true;
28900
28901    case DImode:
28902      if (!cmpdi_operand (*op1, mode))
28903	*op1 = force_reg (mode, *op1);
28904      if (!cmpdi_operand (*op2, mode))
28905	*op2 = force_reg (mode, *op2);
28906      return true;
28907
28908    case SFmode:
28909    case DFmode:
28910      if (!arm_float_compare_operand (*op1, mode))
28911	*op1 = force_reg (mode, *op1);
28912      if (!arm_float_compare_operand (*op2, mode))
28913	*op2 = force_reg (mode, *op2);
28914      return true;
28915    default:
28916      break;
28917    }
28918
28919  return false;
28920
28921}
28922
28923/* Maximum number of instructions to set block of memory.  */
28924static int
28925arm_block_set_max_insns (void)
28926{
28927  if (optimize_function_for_size_p (cfun))
28928    return 4;
28929  else
28930    return current_tune->max_insns_inline_memset;
28931}
28932
28933/* Return TRUE if it's profitable to set block of memory for
28934   non-vectorized case.  VAL is the value to set the memory
28935   with.  LENGTH is the number of bytes to set.  ALIGN is the
28936   alignment of the destination memory in bytes.  UNALIGNED_P
28937   is TRUE if we can only set the memory with instructions
28938   meeting alignment requirements.  USE_STRD_P is TRUE if we
28939   can use strd to set the memory.  */
28940static bool
28941arm_block_set_non_vect_profit_p (rtx val,
28942				 unsigned HOST_WIDE_INT length,
28943				 unsigned HOST_WIDE_INT align,
28944				 bool unaligned_p, bool use_strd_p)
28945{
28946  int num = 0;
28947  /* For leftovers in bytes of 0-7, we can set the memory block using
28948     strb/strh/str with minimum instruction number.  */
28949  const int leftover[8] = {0, 1, 1, 2, 1, 2, 2, 3};
28950
28951  if (unaligned_p)
28952    {
28953      num = arm_const_inline_cost (SET, val);
28954      num += length / align + length % align;
28955    }
28956  else if (use_strd_p)
28957    {
28958      num = arm_const_double_inline_cost (val);
28959      num += (length >> 3) + leftover[length & 7];
28960    }
28961  else
28962    {
28963      num = arm_const_inline_cost (SET, val);
28964      num += (length >> 2) + leftover[length & 3];
28965    }
28966
28967  /* We may be able to combine last pair STRH/STRB into a single STR
28968     by shifting one byte back.  */
28969  if (unaligned_access && length > 3 && (length & 3) == 3)
28970    num--;
28971
28972  return (num <= arm_block_set_max_insns ());
28973}
28974
28975/* Return TRUE if it's profitable to set block of memory for
28976   vectorized case.  LENGTH is the number of bytes to set.
28977   ALIGN is the alignment of destination memory in bytes.
28978   MODE is the vector mode used to set the memory.  */
28979static bool
28980arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length,
28981			     unsigned HOST_WIDE_INT align,
28982			     machine_mode mode)
28983{
28984  int num;
28985  bool unaligned_p = ((align & 3) != 0);
28986  unsigned int nelt = GET_MODE_NUNITS (mode);
28987
28988  /* Instruction loading constant value.  */
28989  num = 1;
28990  /* Instructions storing the memory.  */
28991  num += (length + nelt - 1) / nelt;
28992  /* Instructions adjusting the address expression.  Only need to
28993     adjust address expression if it's 4 bytes aligned and bytes
28994     leftover can only be stored by mis-aligned store instruction.  */
28995  if (!unaligned_p && (length & 3) != 0)
28996    num++;
28997
28998  /* Store the first 16 bytes using vst1:v16qi for the aligned case.  */
28999  if (!unaligned_p && mode == V16QImode)
29000    num--;
29001
29002  return (num <= arm_block_set_max_insns ());
29003}
29004
29005/* Set a block of memory using vectorization instructions for the
29006   unaligned case.  We fill the first LENGTH bytes of the memory
29007   area starting from DSTBASE with byte constant VALUE.  ALIGN is
29008   the alignment requirement of memory.  Return TRUE if succeeded.  */
29009static bool
29010arm_block_set_unaligned_vect (rtx dstbase,
29011			      unsigned HOST_WIDE_INT length,
29012			      unsigned HOST_WIDE_INT value,
29013			      unsigned HOST_WIDE_INT align)
29014{
29015  unsigned int i, j, nelt_v16, nelt_v8, nelt_mode;
29016  rtx dst, mem;
29017  rtx val_elt, val_vec, reg;
29018  rtx rval[MAX_VECT_LEN];
29019  rtx (*gen_func) (rtx, rtx);
29020  machine_mode mode;
29021  unsigned HOST_WIDE_INT v = value;
29022  unsigned int offset = 0;
29023  gcc_assert ((align & 0x3) != 0);
29024  nelt_v8 = GET_MODE_NUNITS (V8QImode);
29025  nelt_v16 = GET_MODE_NUNITS (V16QImode);
29026  if (length >= nelt_v16)
29027    {
29028      mode = V16QImode;
29029      gen_func = gen_movmisalignv16qi;
29030    }
29031  else
29032    {
29033      mode = V8QImode;
29034      gen_func = gen_movmisalignv8qi;
29035    }
29036  nelt_mode = GET_MODE_NUNITS (mode);
29037  gcc_assert (length >= nelt_mode);
29038  /* Skip if it isn't profitable.  */
29039  if (!arm_block_set_vect_profit_p (length, align, mode))
29040    return false;
29041
29042  dst = copy_addr_to_reg (XEXP (dstbase, 0));
29043  mem = adjust_automodify_address (dstbase, mode, dst, offset);
29044
29045  v = sext_hwi (v, BITS_PER_WORD);
29046  val_elt = GEN_INT (v);
29047  for (j = 0; j < nelt_mode; j++)
29048    rval[j] = val_elt;
29049
29050  reg = gen_reg_rtx (mode);
29051  val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
29052  /* Emit instruction loading the constant value.  */
29053  emit_move_insn (reg, val_vec);
29054
29055  /* Handle nelt_mode bytes in a vector.  */
29056  for (i = 0; (i + nelt_mode <= length); i += nelt_mode)
29057    {
29058      emit_insn ((*gen_func) (mem, reg));
29059      if (i + 2 * nelt_mode <= length)
29060	{
29061	  emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
29062	  offset += nelt_mode;
29063	  mem = adjust_automodify_address (dstbase, mode, dst, offset);
29064	}
29065    }
29066
29067  /* If there are not less than nelt_v8 bytes leftover, we must be in
29068     V16QI mode.  */
29069  gcc_assert ((i + nelt_v8) > length || mode == V16QImode);
29070
29071  /* Handle (8, 16) bytes leftover.  */
29072  if (i + nelt_v8 < length)
29073    {
29074      emit_insn (gen_add2_insn (dst, GEN_INT (length - i)));
29075      offset += length - i;
29076      mem = adjust_automodify_address (dstbase, mode, dst, offset);
29077
29078      /* We are shifting bytes back, set the alignment accordingly.  */
29079      if ((length & 1) != 0 && align >= 2)
29080	set_mem_align (mem, BITS_PER_UNIT);
29081
29082      emit_insn (gen_movmisalignv16qi (mem, reg));
29083    }
29084  /* Handle (0, 8] bytes leftover.  */
29085  else if (i < length && i + nelt_v8 >= length)
29086    {
29087      if (mode == V16QImode)
29088	reg = gen_lowpart (V8QImode, reg);
29089
29090      emit_insn (gen_add2_insn (dst, GEN_INT ((length - i)
29091					      + (nelt_mode - nelt_v8))));
29092      offset += (length - i) + (nelt_mode - nelt_v8);
29093      mem = adjust_automodify_address (dstbase, V8QImode, dst, offset);
29094
29095      /* We are shifting bytes back, set the alignment accordingly.  */
29096      if ((length & 1) != 0 && align >= 2)
29097	set_mem_align (mem, BITS_PER_UNIT);
29098
29099      emit_insn (gen_movmisalignv8qi (mem, reg));
29100    }
29101
29102  return true;
29103}
29104
29105/* Set a block of memory using vectorization instructions for the
29106   aligned case.  We fill the first LENGTH bytes of the memory area
29107   starting from DSTBASE with byte constant VALUE.  ALIGN is the
29108   alignment requirement of memory.  Return TRUE if succeeded.  */
29109static bool
29110arm_block_set_aligned_vect (rtx dstbase,
29111			    unsigned HOST_WIDE_INT length,
29112			    unsigned HOST_WIDE_INT value,
29113			    unsigned HOST_WIDE_INT align)
29114{
29115  unsigned int i, j, nelt_v8, nelt_v16, nelt_mode;
29116  rtx dst, addr, mem;
29117  rtx val_elt, val_vec, reg;
29118  rtx rval[MAX_VECT_LEN];
29119  machine_mode mode;
29120  unsigned HOST_WIDE_INT v = value;
29121  unsigned int offset = 0;
29122
29123  gcc_assert ((align & 0x3) == 0);
29124  nelt_v8 = GET_MODE_NUNITS (V8QImode);
29125  nelt_v16 = GET_MODE_NUNITS (V16QImode);
29126  if (length >= nelt_v16 && unaligned_access && !BYTES_BIG_ENDIAN)
29127    mode = V16QImode;
29128  else
29129    mode = V8QImode;
29130
29131  nelt_mode = GET_MODE_NUNITS (mode);
29132  gcc_assert (length >= nelt_mode);
29133  /* Skip if it isn't profitable.  */
29134  if (!arm_block_set_vect_profit_p (length, align, mode))
29135    return false;
29136
29137  dst = copy_addr_to_reg (XEXP (dstbase, 0));
29138
29139  v = sext_hwi (v, BITS_PER_WORD);
29140  val_elt = GEN_INT (v);
29141  for (j = 0; j < nelt_mode; j++)
29142    rval[j] = val_elt;
29143
29144  reg = gen_reg_rtx (mode);
29145  val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
29146  /* Emit instruction loading the constant value.  */
29147  emit_move_insn (reg, val_vec);
29148
29149  i = 0;
29150  /* Handle first 16 bytes specially using vst1:v16qi instruction.  */
29151  if (mode == V16QImode)
29152    {
29153      mem = adjust_automodify_address (dstbase, mode, dst, offset);
29154      emit_insn (gen_movmisalignv16qi (mem, reg));
29155      i += nelt_mode;
29156      /* Handle (8, 16) bytes leftover using vst1:v16qi again.  */
29157      if (i + nelt_v8 < length && i + nelt_v16 > length)
29158	{
29159	  emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
29160	  offset += length - nelt_mode;
29161	  mem = adjust_automodify_address (dstbase, mode, dst, offset);
29162	  /* We are shifting bytes back, set the alignment accordingly.  */
29163	  if ((length & 0x3) == 0)
29164	    set_mem_align (mem, BITS_PER_UNIT * 4);
29165	  else if ((length & 0x1) == 0)
29166	    set_mem_align (mem, BITS_PER_UNIT * 2);
29167	  else
29168	    set_mem_align (mem, BITS_PER_UNIT);
29169
29170	  emit_insn (gen_movmisalignv16qi (mem, reg));
29171	  return true;
29172	}
29173      /* Fall through for bytes leftover.  */
29174      mode = V8QImode;
29175      nelt_mode = GET_MODE_NUNITS (mode);
29176      reg = gen_lowpart (V8QImode, reg);
29177    }
29178
29179  /* Handle 8 bytes in a vector.  */
29180  for (; (i + nelt_mode <= length); i += nelt_mode)
29181    {
29182      addr = plus_constant (Pmode, dst, i);
29183      mem = adjust_automodify_address (dstbase, mode, addr, offset + i);
29184      emit_move_insn (mem, reg);
29185    }
29186
29187  /* Handle single word leftover by shifting 4 bytes back.  We can
29188     use aligned access for this case.  */
29189  if (i + UNITS_PER_WORD == length)
29190    {
29191      addr = plus_constant (Pmode, dst, i - UNITS_PER_WORD);
29192      offset += i - UNITS_PER_WORD;
29193      mem = adjust_automodify_address (dstbase, mode, addr, offset);
29194      /* We are shifting 4 bytes back, set the alignment accordingly.  */
29195      if (align > UNITS_PER_WORD)
29196	set_mem_align (mem, BITS_PER_UNIT * UNITS_PER_WORD);
29197
29198      emit_move_insn (mem, reg);
29199    }
29200  /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
29201     We have to use unaligned access for this case.  */
29202  else if (i < length)
29203    {
29204      emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
29205      offset += length - nelt_mode;
29206      mem = adjust_automodify_address (dstbase, mode, dst, offset);
29207      /* We are shifting bytes back, set the alignment accordingly.  */
29208      if ((length & 1) == 0)
29209	set_mem_align (mem, BITS_PER_UNIT * 2);
29210      else
29211	set_mem_align (mem, BITS_PER_UNIT);
29212
29213      emit_insn (gen_movmisalignv8qi (mem, reg));
29214    }
29215
29216  return true;
29217}
29218
29219/* Set a block of memory using plain strh/strb instructions, only
29220   using instructions allowed by ALIGN on processor.  We fill the
29221   first LENGTH bytes of the memory area starting from DSTBASE
29222   with byte constant VALUE.  ALIGN is the alignment requirement
29223   of memory.  */
29224static bool
29225arm_block_set_unaligned_non_vect (rtx dstbase,
29226				  unsigned HOST_WIDE_INT length,
29227				  unsigned HOST_WIDE_INT value,
29228				  unsigned HOST_WIDE_INT align)
29229{
29230  unsigned int i;
29231  rtx dst, addr, mem;
29232  rtx val_exp, val_reg, reg;
29233  machine_mode mode;
29234  HOST_WIDE_INT v = value;
29235
29236  gcc_assert (align == 1 || align == 2);
29237
29238  if (align == 2)
29239    v |= (value << BITS_PER_UNIT);
29240
29241  v = sext_hwi (v, BITS_PER_WORD);
29242  val_exp = GEN_INT (v);
29243  /* Skip if it isn't profitable.  */
29244  if (!arm_block_set_non_vect_profit_p (val_exp, length,
29245					align, true, false))
29246    return false;
29247
29248  dst = copy_addr_to_reg (XEXP (dstbase, 0));
29249  mode = (align == 2 ? HImode : QImode);
29250  val_reg = force_reg (SImode, val_exp);
29251  reg = gen_lowpart (mode, val_reg);
29252
29253  for (i = 0; (i + GET_MODE_SIZE (mode) <= length); i += GET_MODE_SIZE (mode))
29254    {
29255      addr = plus_constant (Pmode, dst, i);
29256      mem = adjust_automodify_address (dstbase, mode, addr, i);
29257      emit_move_insn (mem, reg);
29258    }
29259
29260  /* Handle single byte leftover.  */
29261  if (i + 1 == length)
29262    {
29263      reg = gen_lowpart (QImode, val_reg);
29264      addr = plus_constant (Pmode, dst, i);
29265      mem = adjust_automodify_address (dstbase, QImode, addr, i);
29266      emit_move_insn (mem, reg);
29267      i++;
29268    }
29269
29270  gcc_assert (i == length);
29271  return true;
29272}
29273
29274/* Set a block of memory using plain strd/str/strh/strb instructions,
29275   to permit unaligned copies on processors which support unaligned
29276   semantics for those instructions.  We fill the first LENGTH bytes
29277   of the memory area starting from DSTBASE with byte constant VALUE.
29278   ALIGN is the alignment requirement of memory.  */
29279static bool
29280arm_block_set_aligned_non_vect (rtx dstbase,
29281				unsigned HOST_WIDE_INT length,
29282				unsigned HOST_WIDE_INT value,
29283				unsigned HOST_WIDE_INT align)
29284{
29285  unsigned int i;
29286  rtx dst, addr, mem;
29287  rtx val_exp, val_reg, reg;
29288  unsigned HOST_WIDE_INT v;
29289  bool use_strd_p;
29290
29291  use_strd_p = (length >= 2 * UNITS_PER_WORD && (align & 3) == 0
29292		&& TARGET_LDRD && current_tune->prefer_ldrd_strd);
29293
29294  v = (value | (value << 8) | (value << 16) | (value << 24));
29295  if (length < UNITS_PER_WORD)
29296    v &= (0xFFFFFFFF >> (UNITS_PER_WORD - length) * BITS_PER_UNIT);
29297
29298  if (use_strd_p)
29299    v |= (v << BITS_PER_WORD);
29300  else
29301    v = sext_hwi (v, BITS_PER_WORD);
29302
29303  val_exp = GEN_INT (v);
29304  /* Skip if it isn't profitable.  */
29305  if (!arm_block_set_non_vect_profit_p (val_exp, length,
29306					align, false, use_strd_p))
29307    {
29308      if (!use_strd_p)
29309	return false;
29310
29311      /* Try without strd.  */
29312      v = (v >> BITS_PER_WORD);
29313      v = sext_hwi (v, BITS_PER_WORD);
29314      val_exp = GEN_INT (v);
29315      use_strd_p = false;
29316      if (!arm_block_set_non_vect_profit_p (val_exp, length,
29317					    align, false, use_strd_p))
29318	return false;
29319    }
29320
29321  i = 0;
29322  dst = copy_addr_to_reg (XEXP (dstbase, 0));
29323  /* Handle double words using strd if possible.  */
29324  if (use_strd_p)
29325    {
29326      val_reg = force_reg (DImode, val_exp);
29327      reg = val_reg;
29328      for (; (i + 8 <= length); i += 8)
29329	{
29330	  addr = plus_constant (Pmode, dst, i);
29331	  mem = adjust_automodify_address (dstbase, DImode, addr, i);
29332	  emit_move_insn (mem, reg);
29333	}
29334    }
29335  else
29336    val_reg = force_reg (SImode, val_exp);
29337
29338  /* Handle words.  */
29339  reg = (use_strd_p ? gen_lowpart (SImode, val_reg) : val_reg);
29340  for (; (i + 4 <= length); i += 4)
29341    {
29342      addr = plus_constant (Pmode, dst, i);
29343      mem = adjust_automodify_address (dstbase, SImode, addr, i);
29344      if ((align & 3) == 0)
29345	emit_move_insn (mem, reg);
29346      else
29347	emit_insn (gen_unaligned_storesi (mem, reg));
29348    }
29349
29350  /* Merge last pair of STRH and STRB into a STR if possible.  */
29351  if (unaligned_access && i > 0 && (i + 3) == length)
29352    {
29353      addr = plus_constant (Pmode, dst, i - 1);
29354      mem = adjust_automodify_address (dstbase, SImode, addr, i - 1);
29355      /* We are shifting one byte back, set the alignment accordingly.  */
29356      if ((align & 1) == 0)
29357	set_mem_align (mem, BITS_PER_UNIT);
29358
29359      /* Most likely this is an unaligned access, and we can't tell at
29360	 compilation time.  */
29361      emit_insn (gen_unaligned_storesi (mem, reg));
29362      return true;
29363    }
29364
29365  /* Handle half word leftover.  */
29366  if (i + 2 <= length)
29367    {
29368      reg = gen_lowpart (HImode, val_reg);
29369      addr = plus_constant (Pmode, dst, i);
29370      mem = adjust_automodify_address (dstbase, HImode, addr, i);
29371      if ((align & 1) == 0)
29372	emit_move_insn (mem, reg);
29373      else
29374	emit_insn (gen_unaligned_storehi (mem, reg));
29375
29376      i += 2;
29377    }
29378
29379  /* Handle single byte leftover.  */
29380  if (i + 1 == length)
29381    {
29382      reg = gen_lowpart (QImode, val_reg);
29383      addr = plus_constant (Pmode, dst, i);
29384      mem = adjust_automodify_address (dstbase, QImode, addr, i);
29385      emit_move_insn (mem, reg);
29386    }
29387
29388  return true;
29389}
29390
29391/* Set a block of memory using vectorization instructions for both
29392   aligned and unaligned cases.  We fill the first LENGTH bytes of
29393   the memory area starting from DSTBASE with byte constant VALUE.
29394   ALIGN is the alignment requirement of memory.  */
29395static bool
29396arm_block_set_vect (rtx dstbase,
29397		    unsigned HOST_WIDE_INT length,
29398		    unsigned HOST_WIDE_INT value,
29399		    unsigned HOST_WIDE_INT align)
29400{
29401  /* Check whether we need to use unaligned store instruction.  */
29402  if (((align & 3) != 0 || (length & 3) != 0)
29403      /* Check whether unaligned store instruction is available.  */
29404      && (!unaligned_access || BYTES_BIG_ENDIAN))
29405    return false;
29406
29407  if ((align & 3) == 0)
29408    return arm_block_set_aligned_vect (dstbase, length, value, align);
29409  else
29410    return arm_block_set_unaligned_vect (dstbase, length, value, align);
29411}
29412
29413/* Expand string store operation.  Firstly we try to do that by using
29414   vectorization instructions, then try with ARM unaligned access and
29415   double-word store if profitable.  OPERANDS[0] is the destination,
29416   OPERANDS[1] is the number of bytes, operands[2] is the value to
29417   initialize the memory, OPERANDS[3] is the known alignment of the
29418   destination.  */
29419bool
29420arm_gen_setmem (rtx *operands)
29421{
29422  rtx dstbase = operands[0];
29423  unsigned HOST_WIDE_INT length;
29424  unsigned HOST_WIDE_INT value;
29425  unsigned HOST_WIDE_INT align;
29426
29427  if (!CONST_INT_P (operands[2]) || !CONST_INT_P (operands[1]))
29428    return false;
29429
29430  length = UINTVAL (operands[1]);
29431  if (length > 64)
29432    return false;
29433
29434  value = (UINTVAL (operands[2]) & 0xFF);
29435  align = UINTVAL (operands[3]);
29436  if (TARGET_NEON && length >= 8
29437      && current_tune->string_ops_prefer_neon
29438      && arm_block_set_vect (dstbase, length, value, align))
29439    return true;
29440
29441  if (!unaligned_access && (align & 3) != 0)
29442    return arm_block_set_unaligned_non_vect (dstbase, length, value, align);
29443
29444  return arm_block_set_aligned_non_vect (dstbase, length, value, align);
29445}
29446
29447
29448static bool
29449arm_macro_fusion_p (void)
29450{
29451  return current_tune->fuseable_ops != ARM_FUSE_NOTHING;
29452}
29453
29454
29455static bool
29456aarch_macro_fusion_pair_p (rtx_insn* prev, rtx_insn* curr)
29457{
29458  rtx set_dest;
29459  rtx prev_set = single_set (prev);
29460  rtx curr_set = single_set (curr);
29461
29462  if (!prev_set
29463      || !curr_set)
29464    return false;
29465
29466  if (any_condjump_p (curr))
29467    return false;
29468
29469  if (!arm_macro_fusion_p ())
29470    return false;
29471
29472  if (current_tune->fuseable_ops & ARM_FUSE_MOVW_MOVT)
29473    {
29474      /* We are trying to fuse
29475         movw imm / movt imm
29476         instructions as a group that gets scheduled together.  */
29477
29478      set_dest = SET_DEST (curr_set);
29479
29480      if (GET_MODE (set_dest) != SImode)
29481        return false;
29482
29483      /* We are trying to match:
29484         prev (movw)  == (set (reg r0) (const_int imm16))
29485         curr (movt) == (set (zero_extract (reg r0)
29486                                           (const_int 16)
29487                                           (const_int 16))
29488                             (const_int imm16_1))
29489         or
29490         prev (movw) == (set (reg r1)
29491                              (high (symbol_ref ("SYM"))))
29492         curr (movt) == (set (reg r0)
29493                             (lo_sum (reg r1)
29494                                     (symbol_ref ("SYM"))))  */
29495      if (GET_CODE (set_dest) == ZERO_EXTRACT)
29496        {
29497          if (CONST_INT_P (SET_SRC (curr_set))
29498              && CONST_INT_P (SET_SRC (prev_set))
29499              && REG_P (XEXP (set_dest, 0))
29500              && REG_P (SET_DEST (prev_set))
29501              && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
29502            return true;
29503        }
29504      else if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
29505               && REG_P (SET_DEST (curr_set))
29506               && REG_P (SET_DEST (prev_set))
29507               && GET_CODE (SET_SRC (prev_set)) == HIGH
29508               && REGNO (SET_DEST (curr_set)) == REGNO (SET_DEST (prev_set)))
29509             return true;
29510    }
29511  return false;
29512}
29513
29514/* Implement the TARGET_ASAN_SHADOW_OFFSET hook.  */
29515
29516static unsigned HOST_WIDE_INT
29517arm_asan_shadow_offset (void)
29518{
29519  return (unsigned HOST_WIDE_INT) 1 << 29;
29520}
29521
29522
29523/* This is a temporary fix for PR60655.  Ideally we need
29524   to handle most of these cases in the generic part but
29525   currently we reject minus (..) (sym_ref).  We try to
29526   ameliorate the case with minus (sym_ref1) (sym_ref2)
29527   where they are in the same section.  */
29528
29529static bool
29530arm_const_not_ok_for_debug_p (rtx p)
29531{
29532  tree decl_op0 = NULL;
29533  tree decl_op1 = NULL;
29534
29535  if (GET_CODE (p) == MINUS)
29536    {
29537      if (GET_CODE (XEXP (p, 1)) == SYMBOL_REF)
29538	{
29539	  decl_op1 = SYMBOL_REF_DECL (XEXP (p, 1));
29540	  if (decl_op1
29541	      && GET_CODE (XEXP (p, 0)) == SYMBOL_REF
29542	      && (decl_op0 = SYMBOL_REF_DECL (XEXP (p, 0))))
29543	    {
29544	      if ((TREE_CODE (decl_op1) == VAR_DECL
29545		   || TREE_CODE (decl_op1) == CONST_DECL)
29546		  && (TREE_CODE (decl_op0) == VAR_DECL
29547		      || TREE_CODE (decl_op0) == CONST_DECL))
29548		return (get_variable_section (decl_op1, false)
29549			!= get_variable_section (decl_op0, false));
29550
29551	      if (TREE_CODE (decl_op1) == LABEL_DECL
29552		  && TREE_CODE (decl_op0) == LABEL_DECL)
29553		return (DECL_CONTEXT (decl_op1)
29554			!= DECL_CONTEXT (decl_op0));
29555	    }
29556
29557	  return true;
29558	}
29559    }
29560
29561  return false;
29562}
29563
29564/* return TRUE if x is a reference to a value in a constant pool */
29565extern bool
29566arm_is_constant_pool_ref (rtx x)
29567{
29568  return (MEM_P (x)
29569	  && GET_CODE (XEXP (x, 0)) == SYMBOL_REF
29570	  && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)));
29571}
29572
29573/* If MEM is in the form of [base+offset], extract the two parts
29574   of address and set to BASE and OFFSET, otherwise return false
29575   after clearing BASE and OFFSET.  */
29576
29577static bool
29578extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
29579{
29580  rtx addr;
29581
29582  gcc_assert (MEM_P (mem));
29583
29584  addr = XEXP (mem, 0);
29585
29586  /* Strip off const from addresses like (const (addr)).  */
29587  if (GET_CODE (addr) == CONST)
29588    addr = XEXP (addr, 0);
29589
29590  if (GET_CODE (addr) == REG)
29591    {
29592      *base = addr;
29593      *offset = const0_rtx;
29594      return true;
29595    }
29596
29597  if (GET_CODE (addr) == PLUS
29598      && GET_CODE (XEXP (addr, 0)) == REG
29599      && CONST_INT_P (XEXP (addr, 1)))
29600    {
29601      *base = XEXP (addr, 0);
29602      *offset = XEXP (addr, 1);
29603      return true;
29604    }
29605
29606  *base = NULL_RTX;
29607  *offset = NULL_RTX;
29608
29609  return false;
29610}
29611
29612/* If INSN is a load or store of address in the form of [base+offset],
29613   extract the two parts and set to BASE and OFFSET.  IS_LOAD is set
29614   to TRUE if it's a load.  Return TRUE if INSN is such an instruction,
29615   otherwise return FALSE.  */
29616
29617static bool
29618fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, bool *is_load)
29619{
29620  rtx x, dest, src;
29621
29622  gcc_assert (INSN_P (insn));
29623  x = PATTERN (insn);
29624  if (GET_CODE (x) != SET)
29625    return false;
29626
29627  src = SET_SRC (x);
29628  dest = SET_DEST (x);
29629  if (GET_CODE (src) == REG && GET_CODE (dest) == MEM)
29630    {
29631      *is_load = false;
29632      extract_base_offset_in_addr (dest, base, offset);
29633    }
29634  else if (GET_CODE (src) == MEM && GET_CODE (dest) == REG)
29635    {
29636      *is_load = true;
29637      extract_base_offset_in_addr (src, base, offset);
29638    }
29639  else
29640    return false;
29641
29642  return (*base != NULL_RTX && *offset != NULL_RTX);
29643}
29644
29645/* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
29646
29647   Currently we only support to fuse ldr or str instructions, so FUSION_PRI
29648   and PRI are only calculated for these instructions.  For other instruction,
29649   FUSION_PRI and PRI are simply set to MAX_PRI.  In the future, other kind
29650   instruction fusion can be supported by returning different priorities.
29651
29652   It's important that irrelevant instructions get the largest FUSION_PRI.  */
29653
29654static void
29655arm_sched_fusion_priority (rtx_insn *insn, int max_pri,
29656			   int *fusion_pri, int *pri)
29657{
29658  int tmp, off_val;
29659  bool is_load;
29660  rtx base, offset;
29661
29662  gcc_assert (INSN_P (insn));
29663
29664  tmp = max_pri - 1;
29665  if (!fusion_load_store (insn, &base, &offset, &is_load))
29666    {
29667      *pri = tmp;
29668      *fusion_pri = tmp;
29669      return;
29670    }
29671
29672  /* Load goes first.  */
29673  if (is_load)
29674    *fusion_pri = tmp - 1;
29675  else
29676    *fusion_pri = tmp - 2;
29677
29678  tmp /= 2;
29679
29680  /* INSN with smaller base register goes first.  */
29681  tmp -= ((REGNO (base) & 0xff) << 20);
29682
29683  /* INSN with smaller offset goes first.  */
29684  off_val = (int)(INTVAL (offset));
29685  if (off_val >= 0)
29686    tmp -= (off_val & 0xfffff);
29687  else
29688    tmp += ((- off_val) & 0xfffff);
29689
29690  *pri = tmp;
29691  return;
29692}
29693#include "gt-arm.h"
29694