1/* Output routines for GCC for Renesas / SuperH SH. 2 Copyright (C) 1993-2015 Free Software Foundation, Inc. 3 Contributed by Steve Chamberlain (sac@cygnus.com). 4 Improved by Jim Wilson (wilson@cygnus.com). 5 6This file is part of GCC. 7 8GCC is free software; you can redistribute it and/or modify 9it under the terms of the GNU General Public License as published by 10the Free Software Foundation; either version 3, or (at your option) 11any later version. 12 13GCC is distributed in the hope that it will be useful, 14but WITHOUT ANY WARRANTY; without even the implied warranty of 15MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16GNU General Public License for more details. 17 18You should have received a copy of the GNU General Public License 19along with GCC; see the file COPYING3. If not see 20<http://www.gnu.org/licenses/>. */ 21 22#include <sstream> 23#include <vector> 24 25#include "config.h" 26#include "system.h" 27#include "coretypes.h" 28#include "tm.h" 29#include "insn-config.h" 30#include "rtl.h" 31#include "hash-set.h" 32#include "machmode.h" 33#include "vec.h" 34#include "double-int.h" 35#include "input.h" 36#include "alias.h" 37#include "symtab.h" 38#include "wide-int.h" 39#include "inchash.h" 40#include "tree.h" 41#include "fold-const.h" 42#include "stringpool.h" 43#include "stor-layout.h" 44#include "calls.h" 45#include "varasm.h" 46#include "flags.h" 47#include "hashtab.h" 48#include "hard-reg-set.h" 49#include "function.h" 50#include "statistics.h" 51#include "real.h" 52#include "fixed-value.h" 53#include "expmed.h" 54#include "dojump.h" 55#include "explow.h" 56#include "emit-rtl.h" 57#include "stmt.h" 58#include "expr.h" 59#include "insn-codes.h" 60#include "optabs.h" 61#include "reload.h" 62#include "regs.h" 63#include "output.h" 64#include "insn-attr.h" 65#include "diagnostic-core.h" 66#include "recog.h" 67#include "dwarf2.h" 68#include "tm_p.h" 69#include "target.h" 70#include "target-def.h" 71#include "langhooks.h" 72#include "predict.h" 73#include "dominance.h" 74#include "cfg.h" 75#include "cfgrtl.h" 76#include "cfganal.h" 77#include "lcm.h" 78#include "cfgbuild.h" 79#include "cfgcleanup.h" 80#include "basic-block.h" 81#include "df.h" 82#include "intl.h" 83#include "sched-int.h" 84#include "params.h" 85#include "ggc.h" 86#include "hash-table.h" 87#include "tree-ssa-alias.h" 88#include "internal-fn.h" 89#include "gimple-fold.h" 90#include "tree-eh.h" 91#include "gimple-expr.h" 92#include "is-a.h" 93#include "gimple.h" 94#include "gimplify.h" 95#include "cfgloop.h" 96#include "alloc-pool.h" 97#include "tm-constrs.h" 98#include "opts.h" 99#include "tree-pass.h" 100#include "pass_manager.h" 101#include "context.h" 102#include "builtins.h" 103#include "rtl-iter.h" 104 105int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch; 106 107/* These are some macros to abstract register modes. */ 108#define CONST_OK_FOR_I10(VALUE) (((HOST_WIDE_INT)(VALUE)) >= -512 \ 109 && ((HOST_WIDE_INT)(VALUE)) <= 511) 110 111#define CONST_OK_FOR_ADD(size) \ 112 (TARGET_SHMEDIA ? CONST_OK_FOR_I10 (size) : CONST_OK_FOR_I08 (size)) 113#define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi)) 114#define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3)) 115#define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3)) 116 117/* Used to simplify the logic below. Find the attributes wherever 118 they may be. */ 119#define SH_ATTRIBUTES(decl) \ 120 (TYPE_P (decl)) ? TYPE_ATTRIBUTES (decl) \ 121 : DECL_ATTRIBUTES (decl) \ 122 ? (DECL_ATTRIBUTES (decl)) \ 123 : TYPE_ATTRIBUTES (TREE_TYPE (decl)) 124 125/* Set to 1 by expand_prologue() when the function is an interrupt handler. */ 126int current_function_interrupt; 127 128tree sh_deferred_function_attributes; 129tree *sh_deferred_function_attributes_tail = &sh_deferred_function_attributes; 130 131/* Global variables for machine-dependent things. */ 132 133/* Which cpu are we scheduling for. */ 134enum processor_type sh_cpu; 135 136/* Definitions used in ready queue reordering for first scheduling pass. */ 137 138/* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID. */ 139static short *regmode_weight[2]; 140 141/* Total SFmode and SImode weights of scheduled insns. */ 142static int curr_regmode_pressure[2]; 143 144/* Number of r0 life regions. */ 145static int r0_life_regions; 146 147/* If true, skip cycles for Q -> R movement. */ 148static int skip_cycles = 0; 149 150/* Cached value of can_issue_more. This is cached in sh_variable_issue hook 151 and returned from sh_reorder2. */ 152static short cached_can_issue_more; 153 154/* Unique number for UNSPEC_BBR pattern. */ 155static unsigned int unspec_bbr_uid = 1; 156 157/* Provides the class number of the smallest class containing 158 reg number. */ 159enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] = 160{ 161 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, 162 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, 163 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, 164 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, 165 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, 166 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, 167 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, 168 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, 169 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, 170 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, 171 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, 172 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, 173 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, 174 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, 175 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, 176 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, 177 FP0_REGS,FP_REGS, FP_REGS, FP_REGS, 178 FP_REGS, FP_REGS, FP_REGS, FP_REGS, 179 FP_REGS, FP_REGS, FP_REGS, FP_REGS, 180 FP_REGS, FP_REGS, FP_REGS, FP_REGS, 181 FP_REGS, FP_REGS, FP_REGS, FP_REGS, 182 FP_REGS, FP_REGS, FP_REGS, FP_REGS, 183 FP_REGS, FP_REGS, FP_REGS, FP_REGS, 184 FP_REGS, FP_REGS, FP_REGS, FP_REGS, 185 FP_REGS, FP_REGS, FP_REGS, FP_REGS, 186 FP_REGS, FP_REGS, FP_REGS, FP_REGS, 187 FP_REGS, FP_REGS, FP_REGS, FP_REGS, 188 FP_REGS, FP_REGS, FP_REGS, FP_REGS, 189 FP_REGS, FP_REGS, FP_REGS, FP_REGS, 190 FP_REGS, FP_REGS, FP_REGS, FP_REGS, 191 FP_REGS, FP_REGS, FP_REGS, FP_REGS, 192 FP_REGS, FP_REGS, FP_REGS, FP_REGS, 193 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS, 194 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS, 195 DF_REGS, DF_REGS, DF_REGS, DF_REGS, 196 DF_REGS, DF_REGS, DF_REGS, DF_REGS, 197 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS, 198 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS, 199 GENERAL_REGS, GENERAL_REGS, 200}; 201 202char sh_register_names[FIRST_PSEUDO_REGISTER] \ 203 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER; 204 205char sh_additional_register_names[ADDREGNAMES_SIZE] \ 206 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1] 207 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER; 208 209int assembler_dialect; 210 211static bool shmedia_space_reserved_for_target_registers; 212 213static void split_branches (rtx_insn *); 214static int branch_dest (rtx); 215static void print_slot (rtx_sequence *); 216static rtx_code_label *add_constant (rtx, machine_mode, rtx); 217static void dump_table (rtx_insn *, rtx_insn *); 218static bool broken_move (rtx_insn *); 219static bool mova_p (rtx_insn *); 220static rtx_insn *find_barrier (int, rtx_insn *, rtx_insn *); 221static bool noncall_uses_reg (rtx, rtx_insn *, rtx *); 222static rtx_insn *gen_block_redirect (rtx_insn *, int, int); 223static void sh_reorg (void); 224static void sh_option_override (void); 225static void sh_override_options_after_change (void); 226static void output_stack_adjust (int, rtx, int, HARD_REG_SET *, bool); 227static rtx_insn *frame_insn (rtx); 228static rtx push (int); 229static void pop (int); 230static void push_regs (HARD_REG_SET *, int); 231static int calc_live_regs (HARD_REG_SET *); 232static HOST_WIDE_INT rounded_frame_size (int); 233static bool sh_frame_pointer_required (void); 234static void sh_emit_mode_set (int, int, int, HARD_REG_SET); 235static int sh_mode_needed (int, rtx_insn *); 236static int sh_mode_after (int, int, rtx_insn *); 237static int sh_mode_entry (int); 238static int sh_mode_exit (int); 239static int sh_mode_priority (int entity, int n); 240 241static rtx mark_constant_pool_use (rtx); 242static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree, 243 int, bool *); 244static tree sh_handle_resbank_handler_attribute (tree *, tree, 245 tree, int, bool *); 246static tree sh2a_handle_function_vector_handler_attribute (tree *, tree, 247 tree, int, bool *); 248static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *); 249static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *); 250static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *); 251static void sh_print_operand (FILE *, rtx, int); 252static void sh_print_operand_address (FILE *, rtx); 253static bool sh_print_operand_punct_valid_p (unsigned char code); 254static bool sh_asm_output_addr_const_extra (FILE *file, rtx x); 255static void sh_output_function_epilogue (FILE *, HOST_WIDE_INT); 256static void sh_insert_attributes (tree, tree *); 257static const char *sh_check_pch_target_flags (int); 258static int sh_register_move_cost (machine_mode, reg_class_t, reg_class_t); 259static int sh_adjust_cost (rtx_insn *, rtx, rtx_insn *, int); 260static int sh_issue_rate (void); 261static int sh_dfa_new_cycle (FILE *, int, rtx_insn *, int, int, int *sort_p); 262static short find_set_regmode_weight (rtx, machine_mode); 263static short find_insn_regmode_weight (rtx, machine_mode); 264static void find_regmode_weight (basic_block, machine_mode); 265static int find_r0_life_regions (basic_block); 266static void sh_md_init_global (FILE *, int, int); 267static void sh_md_finish_global (FILE *, int); 268static int rank_for_reorder (const void *, const void *); 269static void swap_reorder (rtx_insn **, int); 270static void ready_reorder (rtx_insn **, int); 271static bool high_pressure (machine_mode); 272static int sh_reorder (FILE *, int, rtx_insn **, int *, int); 273static int sh_reorder2 (FILE *, int, rtx_insn **, int *, int); 274static void sh_md_init (FILE *, int, int); 275static int sh_variable_issue (FILE *, int, rtx_insn *, int); 276 277static bool sh_function_ok_for_sibcall (tree, tree); 278 279static bool sh_cannot_modify_jumps_p (void); 280static bool sh_can_follow_jump (const rtx_insn *, const rtx_insn *); 281static reg_class_t sh_target_reg_class (void); 282static bool sh_optimize_target_register_callee_saved (bool); 283static bool sh_ms_bitfield_layout_p (const_tree); 284 285static void sh_init_builtins (void); 286static tree sh_builtin_decl (unsigned, bool); 287static rtx sh_expand_builtin (tree, rtx, rtx, machine_mode, int); 288static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, 289 HOST_WIDE_INT, tree); 290static void sh_file_start (void); 291static bool flow_dependent_p (rtx, rtx); 292static void flow_dependent_p_1 (rtx, const_rtx, void *); 293static int shiftcosts (rtx); 294static int and_xor_ior_costs (rtx, int); 295static int addsubcosts (rtx); 296static int multcosts (rtx); 297static bool unspec_caller_rtx_p (rtx); 298static bool sh_cannot_copy_insn_p (rtx_insn *); 299static bool sh_rtx_costs (rtx, int, int, int, int *, bool); 300static int sh_address_cost (rtx, machine_mode, addr_space_t, bool); 301static int sh_pr_n_sets (void); 302static rtx sh_allocate_initial_value (rtx); 303static reg_class_t sh_preferred_reload_class (rtx, reg_class_t); 304static reg_class_t sh_secondary_reload (bool, rtx, reg_class_t, 305 machine_mode, 306 struct secondary_reload_info *); 307static bool sh_legitimate_address_p (machine_mode, rtx, bool); 308static rtx sh_legitimize_address (rtx, rtx, machine_mode); 309static rtx sh_delegitimize_address (rtx); 310static bool sh_cannot_substitute_mem_equiv_p (rtx); 311static bool sh_legitimize_address_displacement (rtx *, rtx *, machine_mode); 312static int shmedia_target_regs_stack_space (HARD_REG_SET *); 313static int shmedia_reserve_space_for_target_registers_p (int, HARD_REG_SET *); 314static int shmedia_target_regs_stack_adjust (HARD_REG_SET *); 315static int scavenge_reg (HARD_REG_SET *s); 316struct save_schedule_s; 317static struct save_entry_s *sh5_schedule_saves (HARD_REG_SET *, 318 struct save_schedule_s *, int); 319 320static rtx sh_struct_value_rtx (tree, int); 321static rtx sh_function_value (const_tree, const_tree, bool); 322static bool sh_function_value_regno_p (const unsigned int); 323static rtx sh_libcall_value (machine_mode, const_rtx); 324static bool sh_return_in_memory (const_tree, const_tree); 325static rtx sh_builtin_saveregs (void); 326static void sh_setup_incoming_varargs (cumulative_args_t, machine_mode, 327 tree, int *, int); 328static bool sh_strict_argument_naming (cumulative_args_t); 329static bool sh_pretend_outgoing_varargs_named (cumulative_args_t); 330static void sh_atomic_assign_expand_fenv (tree *, tree *, tree *); 331static tree sh_build_builtin_va_list (void); 332static void sh_va_start (tree, rtx); 333static tree sh_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *); 334static bool sh_promote_prototypes (const_tree); 335static machine_mode sh_promote_function_mode (const_tree type, 336 machine_mode, 337 int *punsignedp, 338 const_tree funtype, 339 int for_return); 340static bool sh_pass_by_reference (cumulative_args_t, machine_mode, 341 const_tree, bool); 342static bool sh_callee_copies (cumulative_args_t, machine_mode, 343 const_tree, bool); 344static int sh_arg_partial_bytes (cumulative_args_t, machine_mode, 345 tree, bool); 346static void sh_function_arg_advance (cumulative_args_t, machine_mode, 347 const_tree, bool); 348static rtx sh_function_arg (cumulative_args_t, machine_mode, 349 const_tree, bool); 350static bool sh_scalar_mode_supported_p (machine_mode); 351static int sh_dwarf_calling_convention (const_tree); 352static void sh_encode_section_info (tree, rtx, int); 353static bool sh2a_function_vector_p (tree); 354static void sh_trampoline_init (rtx, tree, rtx); 355static rtx sh_trampoline_adjust_address (rtx); 356static void sh_conditional_register_usage (void); 357static bool sh_legitimate_constant_p (machine_mode, rtx); 358static int mov_insn_size (machine_mode, bool); 359static int mov_insn_alignment_mask (machine_mode, bool); 360static bool sh_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT, 361 unsigned int, 362 enum by_pieces_operation, 363 bool); 364static bool sequence_insn_p (rtx_insn *); 365static void sh_canonicalize_comparison (int *, rtx *, rtx *, bool); 366static void sh_canonicalize_comparison (enum rtx_code&, rtx&, rtx&, 367 machine_mode, bool); 368static bool sh_legitimate_combined_insn (rtx_insn* insn); 369 370static bool sh_fixed_condition_code_regs (unsigned int* p1, unsigned int* p2); 371 372static void sh_init_sync_libfuncs (void) ATTRIBUTE_UNUSED; 373 374static const struct attribute_spec sh_attribute_table[] = 375{ 376 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler, 377 affects_type_identity } */ 378 { "interrupt_handler", 0, 0, true, false, false, 379 sh_handle_interrupt_handler_attribute, false }, 380 { "sp_switch", 1, 1, true, false, false, 381 sh_handle_sp_switch_attribute, false }, 382 { "trap_exit", 1, 1, true, false, false, 383 sh_handle_trap_exit_attribute, false }, 384 { "renesas", 0, 0, false, true, false, 385 sh_handle_renesas_attribute, false }, 386 { "trapa_handler", 0, 0, true, false, false, 387 sh_handle_interrupt_handler_attribute, false }, 388 { "nosave_low_regs", 0, 0, true, false, false, 389 sh_handle_interrupt_handler_attribute, false }, 390 { "resbank", 0, 0, true, false, false, 391 sh_handle_resbank_handler_attribute, false }, 392 { "function_vector", 1, 1, true, false, false, 393 sh2a_handle_function_vector_handler_attribute, false }, 394 { NULL, 0, 0, false, false, false, NULL, false } 395}; 396 397/* Initialize the GCC target structure. */ 398#undef TARGET_ATTRIBUTE_TABLE 399#define TARGET_ATTRIBUTE_TABLE sh_attribute_table 400 401/* The next two are used for debug info when compiling with -gdwarf. */ 402#undef TARGET_ASM_UNALIGNED_HI_OP 403#define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t" 404#undef TARGET_ASM_UNALIGNED_SI_OP 405#define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t" 406 407/* These are NULLed out on non-SH5 in TARGET_OPTION_OVERRIDE. */ 408#undef TARGET_ASM_UNALIGNED_DI_OP 409#define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t" 410#undef TARGET_ASM_ALIGNED_DI_OP 411#define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t" 412 413#undef TARGET_OPTION_OVERRIDE 414#define TARGET_OPTION_OVERRIDE sh_option_override 415 416#undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE 417#define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \ 418 sh_override_options_after_change 419 420#undef TARGET_PRINT_OPERAND 421#define TARGET_PRINT_OPERAND sh_print_operand 422#undef TARGET_PRINT_OPERAND_ADDRESS 423#define TARGET_PRINT_OPERAND_ADDRESS sh_print_operand_address 424#undef TARGET_PRINT_OPERAND_PUNCT_VALID_P 425#define TARGET_PRINT_OPERAND_PUNCT_VALID_P sh_print_operand_punct_valid_p 426#undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA 427#define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA sh_asm_output_addr_const_extra 428 429#undef TARGET_ASM_FUNCTION_EPILOGUE 430#define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue 431 432#undef TARGET_ASM_OUTPUT_MI_THUNK 433#define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk 434 435#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK 436#define TARGET_ASM_CAN_OUTPUT_MI_THUNK \ 437 hook_bool_const_tree_hwi_hwi_const_tree_true 438 439#undef TARGET_ASM_FILE_START 440#define TARGET_ASM_FILE_START sh_file_start 441#undef TARGET_ASM_FILE_START_FILE_DIRECTIVE 442#define TARGET_ASM_FILE_START_FILE_DIRECTIVE true 443 444#undef TARGET_REGISTER_MOVE_COST 445#define TARGET_REGISTER_MOVE_COST sh_register_move_cost 446 447#undef TARGET_INSERT_ATTRIBUTES 448#define TARGET_INSERT_ATTRIBUTES sh_insert_attributes 449 450#undef TARGET_SCHED_ADJUST_COST 451#define TARGET_SCHED_ADJUST_COST sh_adjust_cost 452 453#undef TARGET_SCHED_ISSUE_RATE 454#define TARGET_SCHED_ISSUE_RATE sh_issue_rate 455 456/* The next 5 hooks have been implemented for reenabling sched1. With the 457 help of these macros we are limiting the movement of insns in sched1 to 458 reduce the register pressure. The overall idea is to keep count of SImode 459 and SFmode regs required by already scheduled insns. When these counts 460 cross some threshold values; give priority to insns that free registers. 461 The insn that frees registers is most likely to be the insn with lowest 462 LUID (original insn order); but such an insn might be there in the stalled 463 queue (Q) instead of the ready queue (R). To solve this, we skip cycles 464 up to a max of 8 cycles so that such insns may move from Q -> R. 465 466 The description of the hooks are as below: 467 468 TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic 469 scheduler; it is called inside the sched_init function just after 470 find_insn_reg_weights function call. It is used to calculate the SImode 471 and SFmode weights of insns of basic blocks; much similar to what 472 find_insn_reg_weights does. 473 TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook. 474 475 TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is 476 indicated by TARGET_SCHED_REORDER2; doing this may move insns from 477 (Q)->(R). 478 479 TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is 480 high; reorder the ready queue so that the insn with lowest LUID will be 481 issued next. 482 483 TARGET_SCHED_REORDER2: If the register pressure is high, indicate to 484 TARGET_SCHED_DFA_NEW_CYCLE to skip cycles. 485 486 TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it 487 can be returned from TARGET_SCHED_REORDER2. 488 489 TARGET_SCHED_INIT: Reset the register pressure counting variables. */ 490 491#undef TARGET_SCHED_DFA_NEW_CYCLE 492#define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle 493 494#undef TARGET_SCHED_INIT_GLOBAL 495#define TARGET_SCHED_INIT_GLOBAL sh_md_init_global 496 497#undef TARGET_SCHED_FINISH_GLOBAL 498#define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global 499 500#undef TARGET_SCHED_VARIABLE_ISSUE 501#define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue 502 503#undef TARGET_SCHED_REORDER 504#define TARGET_SCHED_REORDER sh_reorder 505 506#undef TARGET_SCHED_REORDER2 507#define TARGET_SCHED_REORDER2 sh_reorder2 508 509#undef TARGET_SCHED_INIT 510#define TARGET_SCHED_INIT sh_md_init 511 512#undef TARGET_DELEGITIMIZE_ADDRESS 513#define TARGET_DELEGITIMIZE_ADDRESS sh_delegitimize_address 514 515#undef TARGET_LEGITIMIZE_ADDRESS 516#define TARGET_LEGITIMIZE_ADDRESS sh_legitimize_address 517 518#undef TARGET_CANNOT_MODIFY_JUMPS_P 519#define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p 520#undef TARGET_CAN_FOLLOW_JUMP 521#define TARGET_CAN_FOLLOW_JUMP sh_can_follow_jump 522#undef TARGET_BRANCH_TARGET_REGISTER_CLASS 523#define TARGET_BRANCH_TARGET_REGISTER_CLASS sh_target_reg_class 524#undef TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED 525#define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED \ 526 sh_optimize_target_register_callee_saved 527 528#undef TARGET_MS_BITFIELD_LAYOUT_P 529#define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p 530 531#undef TARGET_INIT_BUILTINS 532#define TARGET_INIT_BUILTINS sh_init_builtins 533#undef TARGET_BUILTIN_DECL 534#define TARGET_BUILTIN_DECL sh_builtin_decl 535#undef TARGET_EXPAND_BUILTIN 536#define TARGET_EXPAND_BUILTIN sh_expand_builtin 537 538#undef TARGET_FUNCTION_OK_FOR_SIBCALL 539#define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall 540 541#undef TARGET_CANNOT_COPY_INSN_P 542#define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p 543#undef TARGET_RTX_COSTS 544#define TARGET_RTX_COSTS sh_rtx_costs 545#undef TARGET_ADDRESS_COST 546#define TARGET_ADDRESS_COST sh_address_cost 547#undef TARGET_ALLOCATE_INITIAL_VALUE 548#define TARGET_ALLOCATE_INITIAL_VALUE sh_allocate_initial_value 549 550#undef TARGET_MACHINE_DEPENDENT_REORG 551#define TARGET_MACHINE_DEPENDENT_REORG sh_reorg 552 553#undef TARGET_DWARF_REGISTER_SPAN 554#define TARGET_DWARF_REGISTER_SPAN sh_dwarf_register_span 555 556#ifdef HAVE_AS_TLS 557#undef TARGET_HAVE_TLS 558#define TARGET_HAVE_TLS true 559#endif 560 561#undef TARGET_PROMOTE_PROTOTYPES 562#define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes 563#undef TARGET_PROMOTE_FUNCTION_MODE 564#define TARGET_PROMOTE_FUNCTION_MODE sh_promote_function_mode 565 566#undef TARGET_FUNCTION_VALUE 567#define TARGET_FUNCTION_VALUE sh_function_value 568#undef TARGET_FUNCTION_VALUE_REGNO_P 569#define TARGET_FUNCTION_VALUE_REGNO_P sh_function_value_regno_p 570#undef TARGET_LIBCALL_VALUE 571#define TARGET_LIBCALL_VALUE sh_libcall_value 572#undef TARGET_STRUCT_VALUE_RTX 573#define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx 574#undef TARGET_RETURN_IN_MEMORY 575#define TARGET_RETURN_IN_MEMORY sh_return_in_memory 576 577#undef TARGET_EXPAND_BUILTIN_SAVEREGS 578#define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs 579#undef TARGET_SETUP_INCOMING_VARARGS 580#define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs 581#undef TARGET_STRICT_ARGUMENT_NAMING 582#define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming 583#undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED 584#define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named 585#undef TARGET_MUST_PASS_IN_STACK 586#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size 587#undef TARGET_PASS_BY_REFERENCE 588#define TARGET_PASS_BY_REFERENCE sh_pass_by_reference 589#undef TARGET_CALLEE_COPIES 590#define TARGET_CALLEE_COPIES sh_callee_copies 591#undef TARGET_ARG_PARTIAL_BYTES 592#define TARGET_ARG_PARTIAL_BYTES sh_arg_partial_bytes 593#undef TARGET_FUNCTION_ARG 594#define TARGET_FUNCTION_ARG sh_function_arg 595#undef TARGET_FUNCTION_ARG_ADVANCE 596#define TARGET_FUNCTION_ARG_ADVANCE sh_function_arg_advance 597 598#undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV 599#define TARGET_ATOMIC_ASSIGN_EXPAND_FENV sh_atomic_assign_expand_fenv 600 601#undef TARGET_BUILD_BUILTIN_VA_LIST 602#define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list 603#undef TARGET_EXPAND_BUILTIN_VA_START 604#define TARGET_EXPAND_BUILTIN_VA_START sh_va_start 605#undef TARGET_GIMPLIFY_VA_ARG_EXPR 606#define TARGET_GIMPLIFY_VA_ARG_EXPR sh_gimplify_va_arg_expr 607 608#undef TARGET_SCALAR_MODE_SUPPORTED_P 609#define TARGET_SCALAR_MODE_SUPPORTED_P sh_scalar_mode_supported_p 610#undef TARGET_VECTOR_MODE_SUPPORTED_P 611#define TARGET_VECTOR_MODE_SUPPORTED_P sh_vector_mode_supported_p 612 613#undef TARGET_CHECK_PCH_TARGET_FLAGS 614#define TARGET_CHECK_PCH_TARGET_FLAGS sh_check_pch_target_flags 615 616#undef TARGET_DWARF_CALLING_CONVENTION 617#define TARGET_DWARF_CALLING_CONVENTION sh_dwarf_calling_convention 618 619#undef TARGET_FRAME_POINTER_REQUIRED 620#define TARGET_FRAME_POINTER_REQUIRED sh_frame_pointer_required 621 622#undef TARGET_MODE_EMIT 623#define TARGET_MODE_EMIT sh_emit_mode_set 624 625#undef TARGET_MODE_NEEDED 626#define TARGET_MODE_NEEDED sh_mode_needed 627 628#undef TARGET_MODE_AFTER 629#define TARGET_MODE_AFTER sh_mode_after 630 631#undef TARGET_MODE_ENTRY 632#define TARGET_MODE_ENTRY sh_mode_entry 633 634#undef TARGET_MODE_EXIT 635#define TARGET_MODE_EXIT sh_mode_exit 636 637#undef TARGET_MODE_PRIORITY 638#define TARGET_MODE_PRIORITY sh_mode_priority 639 640/* Return regmode weight for insn. */ 641#define INSN_REGMODE_WEIGHT(INSN, MODE)\ 642 regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)] 643 644/* Return current register pressure for regmode. */ 645#define CURR_REGMODE_PRESSURE(MODE)\ 646 curr_regmode_pressure[((MODE) == SImode) ? 0 : 1] 647 648#undef TARGET_ENCODE_SECTION_INFO 649#define TARGET_ENCODE_SECTION_INFO sh_encode_section_info 650 651#undef TARGET_LRA_P 652#define TARGET_LRA_P sh_lra_p 653 654#undef TARGET_SECONDARY_RELOAD 655#define TARGET_SECONDARY_RELOAD sh_secondary_reload 656 657#undef TARGET_PREFERRED_RELOAD_CLASS 658#define TARGET_PREFERRED_RELOAD_CLASS sh_preferred_reload_class 659 660#undef TARGET_CONDITIONAL_REGISTER_USAGE 661#define TARGET_CONDITIONAL_REGISTER_USAGE sh_conditional_register_usage 662 663#undef TARGET_LEGITIMATE_ADDRESS_P 664#define TARGET_LEGITIMATE_ADDRESS_P sh_legitimate_address_p 665 666#undef TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P 667#define TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P sh_cannot_substitute_mem_equiv_p 668 669#undef TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT 670#define TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT \ 671 sh_legitimize_address_displacement 672 673#undef TARGET_TRAMPOLINE_INIT 674#define TARGET_TRAMPOLINE_INIT sh_trampoline_init 675#undef TARGET_TRAMPOLINE_ADJUST_ADDRESS 676#define TARGET_TRAMPOLINE_ADJUST_ADDRESS sh_trampoline_adjust_address 677 678#undef TARGET_LEGITIMATE_CONSTANT_P 679#define TARGET_LEGITIMATE_CONSTANT_P sh_legitimate_constant_p 680 681#undef TARGET_CANONICALIZE_COMPARISON 682#define TARGET_CANONICALIZE_COMPARISON sh_canonicalize_comparison 683 684#undef TARGET_LEGITIMATE_COMBINED_INSN 685#define TARGET_LEGITIMATE_COMBINED_INSN sh_legitimate_combined_insn 686 687#undef TARGET_FIXED_CONDITION_CODE_REGS 688#define TARGET_FIXED_CONDITION_CODE_REGS sh_fixed_condition_code_regs 689 690#undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P 691#define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \ 692 sh_use_by_pieces_infrastructure_p 693 694/* Machine-specific symbol_ref flags. */ 695#define SYMBOL_FLAG_FUNCVEC_FUNCTION (SYMBOL_FLAG_MACH_DEP << 0) 696 697/* The tas.b instruction sets the 7th bit in the byte, i.e. 0x80. This value 698 is used by optabs.c atomic op expansion code as well as in sync.md. */ 699#undef TARGET_ATOMIC_TEST_AND_SET_TRUEVAL 700#define TARGET_ATOMIC_TEST_AND_SET_TRUEVAL 0x80 701 702struct gcc_target targetm = TARGET_INITIALIZER; 703 704 705/* Information on the currently selected atomic model. 706 This is initialized in sh_option_override. */ 707static sh_atomic_model selected_atomic_model_; 708 709const sh_atomic_model& 710selected_atomic_model (void) 711{ 712 return selected_atomic_model_; 713} 714 715static sh_atomic_model 716parse_validate_atomic_model_option (const char* str) 717{ 718 const char* model_names[sh_atomic_model::num_models]; 719 model_names[sh_atomic_model::none] = "none"; 720 model_names[sh_atomic_model::soft_gusa] = "soft-gusa"; 721 model_names[sh_atomic_model::hard_llcs] = "hard-llcs"; 722 model_names[sh_atomic_model::soft_tcb] = "soft-tcb"; 723 model_names[sh_atomic_model::soft_imask] = "soft-imask"; 724 725 const char* model_cdef_names[sh_atomic_model::num_models]; 726 model_cdef_names[sh_atomic_model::none] = "NONE"; 727 model_cdef_names[sh_atomic_model::soft_gusa] = "SOFT_GUSA"; 728 model_cdef_names[sh_atomic_model::hard_llcs] = "HARD_LLCS"; 729 model_cdef_names[sh_atomic_model::soft_tcb] = "SOFT_TCB"; 730 model_cdef_names[sh_atomic_model::soft_imask] = "SOFT_IMASK"; 731 732 sh_atomic_model ret; 733 ret.type = sh_atomic_model::none; 734 ret.name = model_names[sh_atomic_model::none]; 735 ret.cdef_name = model_cdef_names[sh_atomic_model::none]; 736 ret.strict = false; 737 ret.tcb_gbr_offset = -1; 738 739 /* Handle empty string as 'none'. */ 740 if (str == NULL || *str == '\0') 741 return ret; 742 743#define err_ret(...) do { error (__VA_ARGS__); return ret; } while (0) 744 745 std::vector<std::string> tokens; 746 for (std::stringstream ss (str); ss.good (); ) 747 { 748 tokens.push_back (std::string ()); 749 std::getline (ss, tokens.back (), ','); 750 } 751 752 if (tokens.empty ()) 753 err_ret ("invalid atomic model option"); 754 755 /* The first token must be the atomic model name. */ 756 { 757 for (size_t i = 0; i < sh_atomic_model::num_models; ++i) 758 if (tokens.front () == model_names[i]) 759 { 760 ret.type = (sh_atomic_model::enum_type)i; 761 ret.name = model_names[i]; 762 ret.cdef_name = model_cdef_names[i]; 763 goto got_mode_name; 764 } 765 766 err_ret ("invalid atomic model name \"%s\"", tokens.front ().c_str ()); 767got_mode_name:; 768 } 769 770 /* Go through the remaining tokens. */ 771 for (size_t i = 1; i < tokens.size (); ++i) 772 { 773 if (tokens[i] == "strict") 774 ret.strict = true; 775 else if (tokens[i].find ("gbr-offset=") == 0) 776 { 777 std::string offset_str = tokens[i].substr (strlen ("gbr-offset=")); 778 ret.tcb_gbr_offset = integral_argument (offset_str.c_str ()); 779 if (offset_str.empty () || ret.tcb_gbr_offset == -1) 780 err_ret ("could not parse gbr-offset value \"%s\" in atomic model " 781 "option", offset_str.c_str ()); 782 } 783 else 784 err_ret ("unknown parameter \"%s\" in atomic model option", 785 tokens[i].c_str ()); 786 } 787 788 /* Check that the selection makes sense. */ 789 if (TARGET_SHMEDIA && ret.type != sh_atomic_model::none) 790 err_ret ("atomic operations are not supported on SHmedia"); 791 792 if (ret.type == sh_atomic_model::soft_gusa && !TARGET_SH3) 793 err_ret ("atomic model %s is only available on SH3 and SH4 targets", 794 ret.name); 795 796 if (ret.type == sh_atomic_model::hard_llcs && !TARGET_SH4A) 797 err_ret ("atomic model %s is only available on SH4A targets", ret.name); 798 799 if (ret.type == sh_atomic_model::soft_tcb && ret.tcb_gbr_offset == -1) 800 err_ret ("atomic model %s requires gbr-offset parameter", ret.name); 801 802 if (ret.type == sh_atomic_model::soft_tcb 803 && (ret.tcb_gbr_offset < 0 || ret.tcb_gbr_offset > 1020 804 || (ret.tcb_gbr_offset & 3) != 0)) 805 err_ret ("invalid gbr-offset value \"%d\" for atomic model %s; it must be " 806 "a multiple of 4 in the range 0-1020", ret.tcb_gbr_offset, 807 ret.name); 808 809 if (ret.type == sh_atomic_model::soft_imask && TARGET_USERMODE) 810 err_ret ("cannot use atomic model %s in user mode", ret.name); 811 812 return ret; 813 814#undef err_ret 815} 816 817/* Register SH specific RTL passes. */ 818extern opt_pass* make_pass_sh_treg_combine (gcc::context* ctx, bool split_insns, 819 const char* name); 820extern opt_pass* make_pass_sh_optimize_sett_clrt (gcc::context* ctx, 821 const char* name); 822static void 823register_sh_passes (void) 824{ 825 if (!TARGET_SH1) 826 return; 827 828/* Running the sh_treg_combine pass after ce1 generates better code when 829 comparisons are combined and reg-reg moves are introduced, because 830 reg-reg moves will be eliminated afterwards. However, there are quite 831 some cases where combine will be unable to fold comparison related insns, 832 thus for now don't do it. 833 register_pass (make_pass_sh_treg_combine (g, false, "sh_treg_combine1"), 834 PASS_POS_INSERT_AFTER, "ce1", 1); 835*/ 836 837 /* Run sh_treg_combine pass after combine but before register allocation. */ 838 register_pass (make_pass_sh_treg_combine (g, true, "sh_treg_combine2"), 839 PASS_POS_INSERT_AFTER, "split1", 1); 840 841 /* Run sh_treg_combine pass after register allocation and basic block 842 reordering as this sometimes creates new opportunities. */ 843 register_pass (make_pass_sh_treg_combine (g, true, "sh_treg_combine3"), 844 PASS_POS_INSERT_AFTER, "split4", 1); 845 846 /* Optimize sett and clrt insns, by e.g. removing them if the T bit value 847 is known after a conditional branch. 848 This must be done after basic blocks and branch conditions have 849 stabilized and won't be changed by further passes. */ 850 register_pass (make_pass_sh_optimize_sett_clrt (g, "sh_optimize_sett_clrt"), 851 PASS_POS_INSERT_BEFORE, "sched2", 1); 852} 853 854/* Implement TARGET_OPTION_OVERRIDE macro. Validate and override 855 various options, and do some machine dependent initialization. */ 856static void 857sh_option_override (void) 858{ 859 int regno; 860 861 SUBTARGET_OVERRIDE_OPTIONS; 862 if (optimize > 1 && !optimize_size) 863 target_flags |= MASK_SAVE_ALL_TARGET_REGS; 864 865 /* Set default values of TARGET_CBRANCHDI4 and TARGET_CMPEQDI_T. */ 866 TARGET_CBRANCHDI4 = 1; 867 TARGET_CMPEQDI_T = 0; 868 869 sh_cpu = PROCESSOR_SH1; 870 assembler_dialect = 0; 871 if (TARGET_SH2) 872 sh_cpu = PROCESSOR_SH2; 873 if (TARGET_SH2E) 874 sh_cpu = PROCESSOR_SH2E; 875 if (TARGET_SH2A) 876 sh_cpu = PROCESSOR_SH2A; 877 if (TARGET_SH3) 878 sh_cpu = PROCESSOR_SH3; 879 if (TARGET_SH3E) 880 sh_cpu = PROCESSOR_SH3E; 881 if (TARGET_SH4) 882 { 883 assembler_dialect = 1; 884 sh_cpu = PROCESSOR_SH4; 885 } 886 if (TARGET_SH4A) 887 { 888 assembler_dialect = 1; 889 sh_cpu = PROCESSOR_SH4A; 890 } 891 if (TARGET_SH5) 892 { 893 sh_cpu = PROCESSOR_SH5; 894 target_flags |= MASK_ALIGN_DOUBLE; 895 if (TARGET_SHMEDIA_FPU) 896 target_flags |= MASK_FMOVD; 897 if (TARGET_SHMEDIA) 898 { 899 /* There are no delay slots on SHmedia. */ 900 flag_delayed_branch = 0; 901 /* Relaxation isn't yet supported for SHmedia */ 902 target_flags &= ~MASK_RELAX; 903 /* After reload, if conversion does little good but can cause 904 ICEs: 905 - find_if_block doesn't do anything for SH because we don't 906 have conditional execution patterns. (We use conditional 907 move patterns, which are handled differently, and only 908 before reload). 909 - find_cond_trap doesn't do anything for the SH because we 910 don't have conditional traps. 911 - find_if_case_1 uses redirect_edge_and_branch_force in 912 the only path that does an optimization, and this causes 913 an ICE when branch targets are in registers. 914 - find_if_case_2 doesn't do anything for the SHmedia after 915 reload except when it can redirect a tablejump - and 916 that's rather rare. */ 917 flag_if_conversion2 = 0; 918 if (! strcmp (sh_div_str, "call")) 919 sh_div_strategy = SH_DIV_CALL; 920 else if (! strcmp (sh_div_str, "call2")) 921 sh_div_strategy = SH_DIV_CALL2; 922 if (! strcmp (sh_div_str, "fp") && TARGET_FPU_ANY) 923 sh_div_strategy = SH_DIV_FP; 924 else if (! strcmp (sh_div_str, "inv")) 925 sh_div_strategy = SH_DIV_INV; 926 else if (! strcmp (sh_div_str, "inv:minlat")) 927 sh_div_strategy = SH_DIV_INV_MINLAT; 928 else if (! strcmp (sh_div_str, "inv20u")) 929 sh_div_strategy = SH_DIV_INV20U; 930 else if (! strcmp (sh_div_str, "inv20l")) 931 sh_div_strategy = SH_DIV_INV20L; 932 else if (! strcmp (sh_div_str, "inv:call2")) 933 sh_div_strategy = SH_DIV_INV_CALL2; 934 else if (! strcmp (sh_div_str, "inv:call")) 935 sh_div_strategy = SH_DIV_INV_CALL; 936 else if (! strcmp (sh_div_str, "inv:fp")) 937 { 938 if (TARGET_FPU_ANY) 939 sh_div_strategy = SH_DIV_INV_FP; 940 else 941 sh_div_strategy = SH_DIV_INV; 942 } 943 TARGET_CBRANCHDI4 = 0; 944 /* Assembler CFI isn't yet fully supported for SHmedia. */ 945 flag_dwarf2_cfi_asm = 0; 946 } 947 } 948 else 949 { 950 /* Only the sh64-elf assembler fully supports .quad properly. */ 951 targetm.asm_out.aligned_op.di = NULL; 952 targetm.asm_out.unaligned_op.di = NULL; 953 } 954 955 /* User/priviledged mode is supported only on SH3*, SH4* and SH5*. 956 Disable it for everything else. */ 957 if (! (TARGET_SH3 || TARGET_SH5) && TARGET_USERMODE) 958 TARGET_USERMODE = false; 959 960 if (TARGET_SH1) 961 { 962 if (! strcmp (sh_div_str, "call-div1")) 963 sh_div_strategy = SH_DIV_CALL_DIV1; 964 else if (! strcmp (sh_div_str, "call-fp") 965 && (TARGET_FPU_DOUBLE || TARGET_FPU_SINGLE_ONLY 966 || (TARGET_SHCOMPACT && TARGET_FPU_ANY))) 967 sh_div_strategy = SH_DIV_CALL_FP; 968 else if (! strcmp (sh_div_str, "call-table") && TARGET_DYNSHIFT) 969 sh_div_strategy = SH_DIV_CALL_TABLE; 970 else 971 /* Pick one that makes most sense for the target in general. 972 It is not much good to use different functions depending 973 on -Os, since then we'll end up with two different functions 974 when some of the code is compiled for size, and some for 975 speed. */ 976 977 /* SH4 tends to emphasize speed. */ 978 if (TARGET_HARD_SH4) 979 sh_div_strategy = SH_DIV_CALL_TABLE; 980 /* These have their own way of doing things. */ 981 else if (TARGET_SH2A) 982 sh_div_strategy = SH_DIV_INTRINSIC; 983 /* ??? Should we use the integer SHmedia function instead? */ 984 else if (TARGET_SHCOMPACT && TARGET_FPU_ANY) 985 sh_div_strategy = SH_DIV_CALL_FP; 986 /* SH1 .. SH3 cores often go into small-footprint systems, so 987 default to the smallest implementation available. */ 988 else 989 sh_div_strategy = SH_DIV_CALL_DIV1; 990 } 991 if (!TARGET_SH1) 992 TARGET_PRETEND_CMOVE = 0; 993 if (sh_divsi3_libfunc[0]) 994 ; /* User supplied - leave it alone. */ 995 else if (TARGET_DIVIDE_CALL_FP) 996 sh_divsi3_libfunc = "__sdivsi3_i4"; 997 else if (TARGET_DIVIDE_CALL_TABLE) 998 sh_divsi3_libfunc = "__sdivsi3_i4i"; 999 else if (TARGET_SH5) 1000 sh_divsi3_libfunc = "__sdivsi3_1"; 1001 else 1002 sh_divsi3_libfunc = "__sdivsi3"; 1003 1004 if (sh_branch_cost == -1) 1005 { 1006 /* The SH1 does not have delay slots, hence we get a pipeline stall 1007 at every branch. The SH4 is superscalar, so the single delay slot 1008 is not sufficient to keep both pipelines filled. 1009 In any case, set the default branch cost to '2', as it results in 1010 slightly overall smaller code and also enables some if conversions 1011 that are required for matching special T bit related insns. */ 1012 sh_branch_cost = 2; 1013 } 1014 1015 /* Set -mzdcbranch for SH4 / SH4A if not otherwise specified by the user. */ 1016 if (! global_options_set.x_TARGET_ZDCBRANCH && TARGET_HARD_SH4) 1017 TARGET_ZDCBRANCH = 1; 1018 1019 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) 1020 if (! VALID_REGISTER_P (regno)) 1021 sh_register_names[regno][0] = '\0'; 1022 1023 for (regno = 0; regno < ADDREGNAMES_SIZE; regno++) 1024 if (! VALID_REGISTER_P (ADDREGNAMES_REGNO (regno))) 1025 sh_additional_register_names[regno][0] = '\0'; 1026 1027 if ((flag_pic && ! TARGET_PREFERGOT) 1028 || (TARGET_SHMEDIA && !TARGET_PT_FIXED)) 1029 flag_no_function_cse = 1; 1030 1031 if (targetm.small_register_classes_for_mode_p (VOIDmode)) 1032 { 1033 /* Never run scheduling before reload, since that can 1034 break global alloc, and generates slower code anyway due 1035 to the pressure on R0. */ 1036 /* Enable sched1 for SH4 if the user explicitly requests. 1037 When sched1 is enabled, the ready queue will be reordered by 1038 the target hooks if pressure is high. We can not do this for 1039 PIC, SH3 and lower as they give spill failures for R0. */ 1040 if (!TARGET_HARD_SH4 || flag_pic) 1041 flag_schedule_insns = 0; 1042 /* ??? Current exception handling places basic block boundaries 1043 after call_insns. It causes the high pressure on R0 and gives 1044 spill failures for R0 in reload. See PR 22553 and the thread 1045 on gcc-patches 1046 <http://gcc.gnu.org/ml/gcc-patches/2005-10/msg00816.html>. */ 1047 else if (flag_exceptions) 1048 { 1049 if (flag_schedule_insns && global_options_set.x_flag_schedule_insns) 1050 warning (0, "ignoring -fschedule-insns because of exception " 1051 "handling bug"); 1052 flag_schedule_insns = 0; 1053 } 1054 else if (flag_schedule_insns 1055 && !global_options_set.x_flag_schedule_insns) 1056 flag_schedule_insns = 0; 1057 } 1058 1059 /* Unwind info is not correct around the CFG unless either a frame 1060 pointer is present or M_A_O_A is set. Fixing this requires rewriting 1061 unwind info generation to be aware of the CFG and propagating states 1062 around edges. */ 1063 if ((flag_unwind_tables || flag_asynchronous_unwind_tables 1064 || flag_exceptions || flag_non_call_exceptions) 1065 && flag_omit_frame_pointer && !TARGET_ACCUMULATE_OUTGOING_ARGS) 1066 { 1067 warning (0, "unwind tables currently require either a frame pointer " 1068 "or -maccumulate-outgoing-args for correctness"); 1069 TARGET_ACCUMULATE_OUTGOING_ARGS = 1; 1070 } 1071 1072 /* The linker relaxation code breaks when a function contains 1073 alignments that are larger than that at the start of a 1074 compilation unit. */ 1075 if (TARGET_RELAX) 1076 { 1077 int min_align = align_loops > align_jumps ? align_loops : align_jumps; 1078 1079 /* Also take possible .long constants / mova tables into account. */ 1080 if (min_align < 4) 1081 min_align = 4; 1082 if (align_functions < min_align) 1083 align_functions = min_align; 1084 } 1085 1086 if (flag_unsafe_math_optimizations) 1087 { 1088 /* Enable fsca insn for SH4A if not otherwise specified by the user. */ 1089 if (global_options_set.x_TARGET_FSCA == 0 && TARGET_SH4A_FP) 1090 TARGET_FSCA = 1; 1091 1092 /* Enable fsrra insn for SH4A if not otherwise specified by the user. */ 1093 if (global_options_set.x_TARGET_FSRRA == 0 && TARGET_SH4A_FP) 1094 TARGET_FSRRA = 1; 1095 } 1096 1097 /* Allow fsrra insn only if -funsafe-math-optimizations and 1098 -ffinite-math-only is enabled. */ 1099 TARGET_FSRRA = TARGET_FSRRA 1100 && flag_unsafe_math_optimizations 1101 && flag_finite_math_only; 1102 1103 /* If the -mieee option was not explicitly set by the user, turn it on 1104 unless -ffinite-math-only was specified. See also PR 33135. */ 1105 if (! global_options_set.x_TARGET_IEEE) 1106 TARGET_IEEE = ! flag_finite_math_only; 1107 1108 if (sh_fixed_range_str) 1109 sh_fix_range (sh_fixed_range_str); 1110 1111 /* This target defaults to strict volatile bitfields. */ 1112 if (flag_strict_volatile_bitfields < 0 && abi_version_at_least(2)) 1113 flag_strict_volatile_bitfields = 1; 1114 1115 sh_override_options_after_change (); 1116 1117 /* Parse atomic model option and make sure it is valid for the current 1118 target CPU. */ 1119 selected_atomic_model_ 1120 = parse_validate_atomic_model_option (sh_atomic_model_str); 1121 1122 register_sh_passes (); 1123} 1124 1125/* Implement targetm.override_options_after_change. */ 1126 1127static void 1128sh_override_options_after_change (void) 1129{ 1130 /* Adjust loop, jump and function alignment values (in bytes), if those 1131 were not specified by the user using -falign-loops, -falign-jumps 1132 and -falign-functions options. 1133 32 bit alignment is better for speed, because instructions can be 1134 fetched as a pair from a longword boundary. For size use 16 bit 1135 alignment to get more compact code. 1136 Aligning all jumps increases the code size, even if it might 1137 result in slightly faster code. Thus, it is set to the smallest 1138 alignment possible if not specified by the user. */ 1139 if (align_loops == 0) 1140 { 1141 if (TARGET_SH5) 1142 align_loops = 8; 1143 else 1144 align_loops = optimize_size ? 2 : 4; 1145 } 1146 1147 if (align_jumps == 0) 1148 { 1149 if (TARGET_SHMEDIA) 1150 align_jumps = 1 << CACHE_LOG; 1151 else 1152 align_jumps = 2; 1153 } 1154 else if (align_jumps < (TARGET_SHMEDIA ? 4 : 2)) 1155 align_jumps = TARGET_SHMEDIA ? 4 : 2; 1156 1157 if (align_functions == 0) 1158 { 1159 if (TARGET_SHMEDIA) 1160 align_functions = optimize_size 1161 ? FUNCTION_BOUNDARY/8 : (1 << CACHE_LOG); 1162 else 1163 align_functions = optimize_size ? 2 : 4; 1164 } 1165 1166 /* The linker relaxation code breaks when a function contains 1167 alignments that are larger than that at the start of a 1168 compilation unit. */ 1169 if (TARGET_RELAX) 1170 { 1171 int min_align = align_loops > align_jumps ? align_loops : align_jumps; 1172 1173 /* Also take possible .long constants / mova tables into account. */ 1174 if (min_align < 4) 1175 min_align = 4; 1176 if (align_functions < min_align) 1177 align_functions = min_align; 1178 } 1179} 1180 1181/* Print the operand address in x to the stream. */ 1182static void 1183sh_print_operand_address (FILE *stream, rtx x) 1184{ 1185 switch (GET_CODE (x)) 1186 { 1187 case REG: 1188 case SUBREG: 1189 fprintf (stream, "@%s", reg_names[true_regnum (x)]); 1190 break; 1191 1192 case PLUS: 1193 { 1194 rtx base = XEXP (x, 0); 1195 rtx index = XEXP (x, 1); 1196 1197 switch (GET_CODE (index)) 1198 { 1199 case CONST_INT: 1200 fprintf (stream, "@(%d,%s)", (int) INTVAL (index), 1201 reg_names[true_regnum (base)]); 1202 break; 1203 1204 case REG: 1205 case SUBREG: 1206 { 1207 int base_num = true_regnum (base); 1208 int index_num = true_regnum (index); 1209 1210 fprintf (stream, "@(r0,%s)", 1211 reg_names[MAX (base_num, index_num)]); 1212 break; 1213 } 1214 1215 default: 1216 gcc_unreachable (); 1217 } 1218 } 1219 break; 1220 1221 case PRE_DEC: 1222 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]); 1223 break; 1224 1225 case POST_INC: 1226 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]); 1227 break; 1228 1229 default: 1230 x = mark_constant_pool_use (x); 1231 output_addr_const (stream, x); 1232 break; 1233 } 1234} 1235 1236/* Print operand x (an rtx) in assembler syntax to file stream 1237 according to modifier code. 1238 1239 '.' print a .s if insn needs delay slot 1240 ',' print LOCAL_LABEL_PREFIX 1241 '@' print trap, rte or rts depending upon pragma interruptness 1242 '#' output a nop if there is nothing to put in the delay slot 1243 ''' print likelihood suffix (/u for unlikely). 1244 '>' print branch target if -fverbose-asm 1245 'O' print a constant without the # 1246 'R' print the LSW of a dp value - changes if in little endian 1247 'S' print the MSW of a dp value - changes if in little endian 1248 'T' print the next word of a dp value - same as 'R' in big endian mode. 1249 'M' SHMEDIA: print an `x' if `m' will print `base,index'. 1250 otherwise: print .b / .w / .l / .s / .d suffix if operand is a MEM. 1251 'N' print 'r63' if the operand is (const_int 0). 1252 'd' print a V2SF reg as dN instead of fpN. 1253 'm' print a pair `base,offset' or `base,index', for LD and ST. 1254 'U' Likewise for {LD,ST}{HI,LO}. 1255 'V' print the position of a single bit set. 1256 'W' print the position of a single bit cleared. 1257 't' print a memory address which is a register. 1258 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value. 1259 'o' output an operator. */ 1260static void 1261sh_print_operand (FILE *stream, rtx x, int code) 1262{ 1263 int regno; 1264 machine_mode mode; 1265 1266 switch (code) 1267 { 1268 tree trapa_attr; 1269 1270 case '.': 1271 if (final_sequence 1272 && ! INSN_ANNULLED_BRANCH_P (final_sequence->insn (0)) 1273 && get_attr_length (final_sequence->insn (1))) 1274 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s"); 1275 break; 1276 case ',': 1277 fprintf (stream, "%s", LOCAL_LABEL_PREFIX); 1278 break; 1279 case '@': 1280 trapa_attr = lookup_attribute ("trap_exit", 1281 DECL_ATTRIBUTES (current_function_decl)); 1282 if (trapa_attr) 1283 fprintf (stream, "trapa #%ld", 1284 (long) TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (trapa_attr)))); 1285 else if (sh_cfun_interrupt_handler_p ()) 1286 { 1287 if (sh_cfun_resbank_handler_p ()) 1288 fprintf (stream, "resbank\n"); 1289 fprintf (stream, "rte"); 1290 } 1291 else 1292 fprintf (stream, "rts"); 1293 break; 1294 case '#': 1295 /* Output a nop if there's nothing in the delay slot. */ 1296 if (dbr_sequence_length () == 0) 1297 fprintf (stream, "\n\tnop"); 1298 break; 1299 case '\'': 1300 { 1301 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0); 1302 1303 if (note && XINT (note, 0) * 2 < REG_BR_PROB_BASE) 1304 fputs ("/u", stream); 1305 break; 1306 } 1307 case '>': 1308 if (flag_verbose_asm && JUMP_LABEL (current_output_insn)) 1309 { 1310 fputs ("\t! target: ", stream); 1311 output_addr_const (stream, JUMP_LABEL (current_output_insn)); 1312 } 1313 break; 1314 case 'O': 1315 x = mark_constant_pool_use (x); 1316 output_addr_const (stream, x); 1317 break; 1318 /* N.B.: %R / %S / %T adjust memory addresses by four. 1319 For SHMEDIA, that means they can be used to access the first and 1320 second 32 bit part of a 64 bit (or larger) value that 1321 might be held in floating point registers or memory. 1322 While they can be used to access 64 bit parts of a larger value 1323 held in general purpose registers, that won't work with memory - 1324 neither for fp registers, since the frxx names are used. */ 1325 case 'R': 1326 if (REG_P (x) || GET_CODE (x) == SUBREG) 1327 { 1328 regno = true_regnum (x); 1329 regno += FP_REGISTER_P (regno) ? 1 : SH_REG_LSW_OFFSET; 1330 fputs (reg_names[regno], (stream)); 1331 } 1332 else if (MEM_P (x)) 1333 { 1334 x = adjust_address (x, SImode, 4 * SH_REG_LSW_OFFSET); 1335 sh_print_operand_address (stream, XEXP (x, 0)); 1336 } 1337 else 1338 { 1339 rtx sub = NULL_RTX; 1340 1341 mode = GET_MODE (x); 1342 if (mode == VOIDmode) 1343 mode = DImode; 1344 if (GET_MODE_SIZE (mode) >= 8) 1345 sub = simplify_subreg (SImode, x, mode, 4 * SH_REG_LSW_OFFSET); 1346 if (sub) 1347 sh_print_operand (stream, sub, 0); 1348 else 1349 output_operand_lossage ("invalid operand to %%R"); 1350 } 1351 break; 1352 case 'S': 1353 if (REG_P (x) || GET_CODE (x) == SUBREG) 1354 { 1355 regno = true_regnum (x); 1356 regno += FP_REGISTER_P (regno) ? 0 : SH_REG_MSW_OFFSET; 1357 fputs (reg_names[regno], (stream)); 1358 } 1359 else if (MEM_P (x)) 1360 { 1361 x = adjust_address (x, SImode, 4 * SH_REG_MSW_OFFSET); 1362 sh_print_operand_address (stream, XEXP (x, 0)); 1363 } 1364 else 1365 { 1366 rtx sub = NULL_RTX; 1367 1368 mode = GET_MODE (x); 1369 if (mode == VOIDmode) 1370 mode = DImode; 1371 if (GET_MODE_SIZE (mode) >= 8) 1372 sub = simplify_subreg (SImode, x, mode, 4 * SH_REG_MSW_OFFSET); 1373 if (sub) 1374 sh_print_operand (stream, sub, 0); 1375 else 1376 output_operand_lossage ("invalid operand to %%S"); 1377 } 1378 break; 1379 case 'T': 1380 /* Next word of a double. */ 1381 switch (GET_CODE (x)) 1382 { 1383 case REG: 1384 fputs (reg_names[REGNO (x) + 1], (stream)); 1385 break; 1386 case MEM: 1387 if (GET_CODE (XEXP (x, 0)) != PRE_DEC 1388 && GET_CODE (XEXP (x, 0)) != POST_INC) 1389 x = adjust_address (x, SImode, 4); 1390 sh_print_operand_address (stream, XEXP (x, 0)); 1391 break; 1392 default: 1393 break; 1394 } 1395 break; 1396 1397 case 't': 1398 gcc_assert (MEM_P (x)); 1399 x = XEXP (x, 0); 1400 switch (GET_CODE (x)) 1401 { 1402 case REG: 1403 case SUBREG: 1404 sh_print_operand (stream, x, 0); 1405 break; 1406 default: 1407 break; 1408 } 1409 break; 1410 1411 case 'o': 1412 switch (GET_CODE (x)) 1413 { 1414 case PLUS: fputs ("add", stream); break; 1415 case MINUS: fputs ("sub", stream); break; 1416 case MULT: fputs ("mul", stream); break; 1417 case DIV: fputs ("div", stream); break; 1418 case EQ: fputs ("eq", stream); break; 1419 case NE: fputs ("ne", stream); break; 1420 case GT: case LT: fputs ("gt", stream); break; 1421 case GE: case LE: fputs ("ge", stream); break; 1422 case GTU: case LTU: fputs ("gtu", stream); break; 1423 case GEU: case LEU: fputs ("geu", stream); break; 1424 default: 1425 break; 1426 } 1427 break; 1428 case 'M': 1429 if (TARGET_SHMEDIA) 1430 { 1431 if (MEM_P (x) 1432 && GET_CODE (XEXP (x, 0)) == PLUS 1433 && (REG_P (XEXP (XEXP (x, 0), 1)) 1434 || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG)) 1435 fputc ('x', stream); 1436 } 1437 else 1438 { 1439 if (MEM_P (x)) 1440 { 1441 switch (GET_MODE (x)) 1442 { 1443 case QImode: fputs (".b", stream); break; 1444 case HImode: fputs (".w", stream); break; 1445 case SImode: fputs (".l", stream); break; 1446 case SFmode: fputs (".s", stream); break; 1447 case DFmode: fputs (".d", stream); break; 1448 default: gcc_unreachable (); 1449 } 1450 } 1451 } 1452 break; 1453 1454 case 'm': 1455 gcc_assert (MEM_P (x)); 1456 x = XEXP (x, 0); 1457 /* Fall through. */ 1458 case 'U': 1459 switch (GET_CODE (x)) 1460 { 1461 case REG: 1462 case SUBREG: 1463 sh_print_operand (stream, x, 0); 1464 fputs (", 0", stream); 1465 break; 1466 1467 case PLUS: 1468 sh_print_operand (stream, XEXP (x, 0), 0); 1469 fputs (", ", stream); 1470 sh_print_operand (stream, XEXP (x, 1), 0); 1471 break; 1472 1473 default: 1474 gcc_unreachable (); 1475 } 1476 break; 1477 1478 case 'V': 1479 { 1480 int num = exact_log2 (INTVAL (x)); 1481 gcc_assert (num >= 0); 1482 fprintf (stream, "#%d", num); 1483 } 1484 break; 1485 1486 case 'W': 1487 { 1488 int num = exact_log2 (~INTVAL (x)); 1489 gcc_assert (num >= 0); 1490 fprintf (stream, "#%d", num); 1491 } 1492 break; 1493 1494 case 'd': 1495 gcc_assert (REG_P (x) && GET_MODE (x) == V2SFmode); 1496 1497 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1); 1498 break; 1499 1500 case 'N': 1501 if (x == CONST0_RTX (GET_MODE (x))) 1502 { 1503 fprintf ((stream), "r63"); 1504 break; 1505 } 1506 goto default_output; 1507 case 'u': 1508 if (CONST_INT_P (x)) 1509 { 1510 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1)); 1511 break; 1512 } 1513 /* Fall through. */ 1514 1515 default_output: 1516 default: 1517 regno = 0; 1518 mode = GET_MODE (x); 1519 1520 switch (GET_CODE (x)) 1521 { 1522 case TRUNCATE: 1523 { 1524 rtx inner = XEXP (x, 0); 1525 int offset = 0; 1526 machine_mode inner_mode; 1527 1528 /* We might see SUBREGs with vector mode registers inside. */ 1529 if (GET_CODE (inner) == SUBREG 1530 && (GET_MODE_SIZE (GET_MODE (inner)) 1531 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner)))) 1532 && subreg_lowpart_p (inner)) 1533 inner = SUBREG_REG (inner); 1534 if (CONST_INT_P (inner)) 1535 { 1536 x = GEN_INT (trunc_int_for_mode (INTVAL (inner), GET_MODE (x))); 1537 goto default_output; 1538 } 1539 inner_mode = GET_MODE (inner); 1540 if (GET_CODE (inner) == SUBREG 1541 && (GET_MODE_SIZE (GET_MODE (inner)) 1542 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner)))) 1543 && REG_P (SUBREG_REG (inner))) 1544 { 1545 offset = subreg_regno_offset (REGNO (SUBREG_REG (inner)), 1546 GET_MODE (SUBREG_REG (inner)), 1547 SUBREG_BYTE (inner), 1548 GET_MODE (inner)); 1549 inner = SUBREG_REG (inner); 1550 } 1551 if (!REG_P (inner) || GET_MODE_SIZE (inner_mode) > 8) 1552 abort (); 1553 /* Floating point register pairs are always big endian; 1554 general purpose registers are 64 bit wide. */ 1555 regno = REGNO (inner); 1556 regno = (HARD_REGNO_NREGS (regno, inner_mode) 1557 - HARD_REGNO_NREGS (regno, mode)) 1558 + offset; 1559 x = inner; 1560 goto reg; 1561 } 1562 case SIGN_EXTEND: 1563 x = XEXP (x, 0); 1564 goto reg; 1565 /* FIXME: We need this on SHmedia32 because reload generates 1566 some sign-extended HI or QI loads into DImode registers 1567 but, because Pmode is SImode, the address ends up with a 1568 subreg:SI of the DImode register. Maybe reload should be 1569 fixed so as to apply alter_subreg to such loads? */ 1570 case IF_THEN_ELSE: 1571 gcc_assert (trapping_target_operand (x, VOIDmode)); 1572 x = XEXP (XEXP (x, 2), 0); 1573 goto default_output; 1574 case SUBREG: 1575 gcc_assert (SUBREG_BYTE (x) == 0 1576 && REG_P (SUBREG_REG (x))); 1577 1578 x = SUBREG_REG (x); 1579 /* Fall through. */ 1580 1581 reg: 1582 case REG: 1583 regno += REGNO (x); 1584 if (FP_REGISTER_P (regno) 1585 && mode == V16SFmode) 1586 fprintf ((stream), "mtrx%s", reg_names[regno] + 2); 1587 else if (FP_REGISTER_P (REGNO (x)) 1588 && mode == V4SFmode) 1589 fprintf ((stream), "fv%s", reg_names[regno] + 2); 1590 else if (REG_P (x) 1591 && mode == V2SFmode) 1592 fprintf ((stream), "fp%s", reg_names[regno] + 2); 1593 else if (FP_REGISTER_P (REGNO (x)) 1594 && GET_MODE_SIZE (mode) > 4) 1595 fprintf ((stream), "d%s", reg_names[regno] + 1); 1596 else 1597 fputs (reg_names[regno], (stream)); 1598 break; 1599 1600 case MEM: 1601 output_address (XEXP (x, 0)); 1602 break; 1603 1604 default: 1605 if (TARGET_SH1) 1606 fputc ('#', stream); 1607 output_addr_const (stream, x); 1608 break; 1609 } 1610 break; 1611 } 1612} 1613 1614static bool 1615sh_print_operand_punct_valid_p (unsigned char code) 1616{ 1617 return (code == '.' || code == '#' || code == '@' || code == ',' 1618 || code == '$' || code == '\'' || code == '>'); 1619} 1620 1621/* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */ 1622static bool 1623sh_asm_output_addr_const_extra (FILE *file, rtx x) 1624{ 1625 if (GET_CODE (x) == UNSPEC) 1626 { 1627 switch (XINT (x, 1)) 1628 { 1629 case UNSPEC_DATALABEL: 1630 fputs ("datalabel ", file); 1631 output_addr_const (file, XVECEXP (x, 0, 0)); 1632 break; 1633 case UNSPEC_PIC: 1634 /* GLOBAL_OFFSET_TABLE or local symbols, no suffix. */ 1635 output_addr_const (file, XVECEXP (x, 0, 0)); 1636 break; 1637 case UNSPEC_GOT: 1638 output_addr_const (file, XVECEXP (x, 0, 0)); 1639 fputs ("@GOT", file); 1640 break; 1641 case UNSPEC_GOTOFF: 1642 output_addr_const (file, XVECEXP (x, 0, 0)); 1643 fputs ("@GOTOFF", file); 1644 break; 1645 case UNSPEC_PLT: 1646 output_addr_const (file, XVECEXP (x, 0, 0)); 1647 fputs ("@PLT", file); 1648 break; 1649 case UNSPEC_GOTPLT: 1650 output_addr_const (file, XVECEXP (x, 0, 0)); 1651 fputs ("@GOTPLT", file); 1652 break; 1653 case UNSPEC_DTPOFF: 1654 output_addr_const (file, XVECEXP (x, 0, 0)); 1655 fputs ("@DTPOFF", file); 1656 break; 1657 case UNSPEC_GOTTPOFF: 1658 output_addr_const (file, XVECEXP (x, 0, 0)); 1659 fputs ("@GOTTPOFF", file); 1660 break; 1661 case UNSPEC_TPOFF: 1662 output_addr_const (file, XVECEXP (x, 0, 0)); 1663 fputs ("@TPOFF", file); 1664 break; 1665 case UNSPEC_CALLER: 1666 { 1667 char name[32]; 1668 /* LPCS stands for Label for PIC Call Site. */ 1669 targetm.asm_out.generate_internal_label (name, "LPCS", 1670 INTVAL (XVECEXP (x, 0, 0))); 1671 assemble_name (file, name); 1672 } 1673 break; 1674 case UNSPEC_EXTRACT_S16: 1675 case UNSPEC_EXTRACT_U16: 1676 { 1677 rtx val, shift; 1678 1679 val = XVECEXP (x, 0, 0); 1680 shift = XVECEXP (x, 0, 1); 1681 fputc ('(', file); 1682 if (shift != const0_rtx) 1683 fputc ('(', file); 1684 if (GET_CODE (val) == CONST 1685 || GET_RTX_CLASS (GET_CODE (val)) != RTX_OBJ) 1686 { 1687 fputc ('(', file); 1688 output_addr_const (file, val); 1689 fputc (')', file); 1690 } 1691 else 1692 output_addr_const (file, val); 1693 if (shift != const0_rtx) 1694 { 1695 fputs (" >> ", file); 1696 output_addr_const (file, shift); 1697 fputc (')', file); 1698 } 1699 fputs (" & 65535)", file); 1700 } 1701 break; 1702 case UNSPEC_SYMOFF: 1703 output_addr_const (file, XVECEXP (x, 0, 0)); 1704 fputc ('-', file); 1705 if (GET_CODE (XVECEXP (x, 0, 1)) == CONST) 1706 { 1707 fputc ('(', file); 1708 output_addr_const (file, XVECEXP (x, 0, 1)); 1709 fputc (')', file); 1710 } 1711 else 1712 output_addr_const (file, XVECEXP (x, 0, 1)); 1713 break; 1714 case UNSPEC_PCREL_SYMOFF: 1715 output_addr_const (file, XVECEXP (x, 0, 0)); 1716 fputs ("-(", file); 1717 output_addr_const (file, XVECEXP (x, 0, 1)); 1718 fputs ("-.)", file); 1719 break; 1720 default: 1721 return false; 1722 } 1723 return true; 1724 } 1725 else 1726 return false; 1727} 1728 1729/* Encode symbol attributes of a SYMBOL_REF into its 1730 SYMBOL_REF_FLAGS. */ 1731static void 1732sh_encode_section_info (tree decl, rtx rtl, int first) 1733{ 1734 default_encode_section_info (decl, rtl, first); 1735 1736 if (TREE_CODE (decl) == FUNCTION_DECL 1737 && sh2a_function_vector_p (decl) && TARGET_SH2A) 1738 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FUNCVEC_FUNCTION; 1739} 1740 1741/* Prepare operands for a move define_expand; specifically, one of the 1742 operands must be in a register. */ 1743void 1744prepare_move_operands (rtx operands[], machine_mode mode) 1745{ 1746 if ((mode == SImode || mode == DImode) 1747 && flag_pic 1748 && ! ((mode == Pmode || mode == ptr_mode) 1749 && tls_symbolic_operand (operands[1], Pmode) != TLS_MODEL_NONE)) 1750 { 1751 rtx temp; 1752 if (SYMBOLIC_CONST_P (operands[1])) 1753 { 1754 if (MEM_P (operands[0])) 1755 operands[1] = force_reg (Pmode, operands[1]); 1756 else if (TARGET_SHMEDIA 1757 && GET_CODE (operands[1]) == LABEL_REF 1758 && target_reg_operand (operands[0], mode)) 1759 /* It's ok. */; 1760 else 1761 { 1762 temp = (!can_create_pseudo_p () 1763 ? operands[0] 1764 : gen_reg_rtx (Pmode)); 1765 operands[1] = legitimize_pic_address (operands[1], mode, temp); 1766 } 1767 } 1768 else if (GET_CODE (operands[1]) == CONST 1769 && GET_CODE (XEXP (operands[1], 0)) == PLUS 1770 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0))) 1771 { 1772 temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode); 1773 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0), 1774 mode, temp); 1775 operands[1] = expand_binop (mode, add_optab, temp, 1776 XEXP (XEXP (operands[1], 0), 1), 1777 (!can_create_pseudo_p () 1778 ? temp 1779 : gen_reg_rtx (Pmode)), 1780 0, OPTAB_LIB_WIDEN); 1781 } 1782 } 1783 1784 if (! reload_in_progress && ! reload_completed) 1785 { 1786 /* Copy the source to a register if both operands aren't registers. */ 1787 if (! register_operand (operands[0], mode) 1788 && ! sh_register_operand (operands[1], mode)) 1789 operands[1] = copy_to_mode_reg (mode, operands[1]); 1790 1791 if (MEM_P (operands[0]) && ! memory_operand (operands[0], mode)) 1792 { 1793 /* This is like change_address_1 (operands[0], mode, 0, 1) , 1794 except that we can't use that function because it is static. */ 1795 rtx new_rtx = change_address (operands[0], mode, 0); 1796 MEM_COPY_ATTRIBUTES (new_rtx, operands[0]); 1797 operands[0] = new_rtx; 1798 } 1799 1800 /* This case can happen while generating code to move the result 1801 of a library call to the target. Reject `st r0,@(rX,rY)' because 1802 reload will fail to find a spill register for rX, since r0 is already 1803 being used for the source. */ 1804 else if (TARGET_SH1 1805 && refers_to_regno_p (R0_REG, operands[1]) 1806 && MEM_P (operands[0]) 1807 && GET_CODE (XEXP (operands[0], 0)) == PLUS 1808 && REG_P (XEXP (XEXP (operands[0], 0), 1))) 1809 operands[1] = copy_to_mode_reg (mode, operands[1]); 1810 1811 /* When the displacement addressing is used, RA will assign r0 to 1812 the pseudo register operand for the QI/HImode load/store. 1813 This tends to make a long live range for R0 and might cause 1814 anomalous register spills in some case with LRA. See PR 1815 target/55212. 1816 We split possible load/store to two move insns via r0 so as to 1817 shorten R0 live range. It will make some codes worse but will 1818 win on avarage for LRA. */ 1819 else if (sh_lra_p () 1820 && TARGET_SH1 && ! TARGET_SH2A 1821 && (mode == QImode || mode == HImode) 1822 && ((REG_P (operands[0]) && MEM_P (operands[1])) 1823 || (REG_P (operands[1]) && MEM_P (operands[0])))) 1824 { 1825 bool load_p = REG_P (operands[0]); 1826 rtx reg = operands[load_p ? 0 : 1]; 1827 rtx adr = XEXP (operands[load_p ? 1 : 0], 0); 1828 1829 if (REGNO (reg) >= FIRST_PSEUDO_REGISTER 1830 && GET_CODE (adr) == PLUS 1831 && REG_P (XEXP (adr, 0)) 1832 && (REGNO (XEXP (adr, 0)) >= FIRST_PSEUDO_REGISTER) 1833 && CONST_INT_P (XEXP (adr, 1)) 1834 && INTVAL (XEXP (adr, 1)) != 0 1835 && sh_legitimate_index_p (mode, XEXP (adr, 1), false, true)) 1836 { 1837 rtx r0_rtx = gen_rtx_REG (mode, R0_REG); 1838 emit_move_insn (r0_rtx, operands[1]); 1839 operands[1] = r0_rtx; 1840 } 1841 } 1842 } 1843 1844 if (mode == Pmode || mode == ptr_mode) 1845 { 1846 rtx op0, op1, opc; 1847 enum tls_model tls_kind; 1848 1849 op0 = operands[0]; 1850 op1 = operands[1]; 1851 if (GET_CODE (op1) == CONST 1852 && GET_CODE (XEXP (op1, 0)) == PLUS 1853 && (tls_symbolic_operand (XEXP (XEXP (op1, 0), 0), Pmode) 1854 != TLS_MODEL_NONE)) 1855 { 1856 opc = XEXP (XEXP (op1, 0), 1); 1857 op1 = XEXP (XEXP (op1, 0), 0); 1858 } 1859 else 1860 opc = NULL_RTX; 1861 1862 if (! reload_in_progress && ! reload_completed 1863 && (tls_kind = tls_symbolic_operand (op1, Pmode)) != TLS_MODEL_NONE) 1864 { 1865 rtx tga_op1, tga_ret, tmp, tmp2; 1866 1867 if (! flag_pic 1868 && (tls_kind == TLS_MODEL_GLOBAL_DYNAMIC 1869 || tls_kind == TLS_MODEL_LOCAL_DYNAMIC 1870 || tls_kind == TLS_MODEL_INITIAL_EXEC)) 1871 { 1872 static int got_labelno; 1873 /* Don't schedule insns for getting GOT address when 1874 the first scheduling is enabled, to avoid spill 1875 failures for R0. */ 1876 if (flag_schedule_insns) 1877 emit_insn (gen_blockage ()); 1878 emit_insn (gen_GOTaddr2picreg (GEN_INT (++got_labelno))); 1879 emit_use (gen_rtx_REG (SImode, PIC_REG)); 1880 if (flag_schedule_insns) 1881 emit_insn (gen_blockage ()); 1882 } 1883 1884 switch (tls_kind) 1885 { 1886 case TLS_MODEL_GLOBAL_DYNAMIC: 1887 tga_ret = gen_rtx_REG (Pmode, R0_REG); 1888 emit_call_insn (gen_tls_global_dynamic (tga_ret, op1)); 1889 tmp = gen_reg_rtx (Pmode); 1890 emit_move_insn (tmp, tga_ret); 1891 op1 = tmp; 1892 break; 1893 1894 case TLS_MODEL_LOCAL_DYNAMIC: 1895 tga_ret = gen_rtx_REG (Pmode, R0_REG); 1896 emit_call_insn (gen_tls_local_dynamic (tga_ret, op1)); 1897 1898 tmp = gen_reg_rtx (Pmode); 1899 emit_move_insn (tmp, tga_ret); 1900 1901 if (register_operand (op0, Pmode)) 1902 tmp2 = op0; 1903 else 1904 tmp2 = gen_reg_rtx (Pmode); 1905 1906 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp)); 1907 op1 = tmp2; 1908 break; 1909 1910 case TLS_MODEL_INITIAL_EXEC: 1911 tga_op1 = !can_create_pseudo_p () ? op0 : gen_reg_rtx (Pmode); 1912 tmp = gen_sym2GOTTPOFF (op1); 1913 emit_insn (gen_tls_initial_exec (tga_op1, tmp)); 1914 op1 = tga_op1; 1915 break; 1916 1917 case TLS_MODEL_LOCAL_EXEC: 1918 tmp2 = gen_reg_rtx (Pmode); 1919 emit_insn (gen_store_gbr (tmp2)); 1920 tmp = gen_reg_rtx (Pmode); 1921 emit_insn (gen_symTPOFF2reg (tmp, op1)); 1922 1923 if (register_operand (op0, Pmode)) 1924 op1 = op0; 1925 else 1926 op1 = gen_reg_rtx (Pmode); 1927 1928 emit_insn (gen_addsi3 (op1, tmp, tmp2)); 1929 break; 1930 1931 default: 1932 gcc_unreachable (); 1933 } 1934 if (opc) 1935 emit_insn (gen_addsi3 (op1, op1, force_reg (SImode, opc))); 1936 operands[1] = op1; 1937 } 1938 } 1939} 1940 1941/* Implement the canonicalize_comparison target hook for the combine 1942 pass. For the target hook this function is invoked via 1943 sh_canonicalize_comparison. This function is also re-used to 1944 canonicalize comparisons in cbranch pattern expanders. */ 1945static void 1946sh_canonicalize_comparison (enum rtx_code& cmp, rtx& op0, rtx& op1, 1947 machine_mode mode, 1948 bool op0_preserve_value) 1949{ 1950 /* When invoked from within the combine pass the mode is not specified, 1951 so try to get it from one of the operands. */ 1952 if (mode == VOIDmode) 1953 mode = GET_MODE (op0); 1954 if (mode == VOIDmode) 1955 mode = GET_MODE (op1); 1956 1957 // We need to have a mode to do something useful here. 1958 if (mode == VOIDmode) 1959 return; 1960 1961 // Currently, we don't deal with floats here. 1962 if (GET_MODE_CLASS (mode) == MODE_FLOAT) 1963 return; 1964 1965 // Make sure that the constant operand is the second operand. 1966 if (CONST_INT_P (op0) && !CONST_INT_P (op1)) 1967 { 1968 if (op0_preserve_value) 1969 return; 1970 1971 std::swap (op0, op1); 1972 cmp = swap_condition (cmp); 1973 } 1974 1975 if (CONST_INT_P (op1)) 1976 { 1977 /* Try to adjust the constant operand in such a way that available 1978 comparison insns can be utilized better and the constant can be 1979 loaded with a 'mov #imm,Rm' insn. This avoids a load from the 1980 constant pool. */ 1981 const HOST_WIDE_INT val = INTVAL (op1); 1982 1983 /* x > -1 --> x >= 0 1984 x > 0xFFFFFF7F --> x >= 0xFFFFFF80 1985 x <= -1 --> x < 0 1986 x <= 0xFFFFFF7F --> x < 0xFFFFFF80 */ 1987 if ((val == -1 || val == -0x81) && (cmp == GT || cmp == LE)) 1988 { 1989 cmp = cmp == GT ? GE : LT; 1990 op1 = gen_int_mode (val + 1, mode); 1991 } 1992 1993 /* x >= 1 --> x > 0 1994 x >= 0x80 --> x > 0x7F 1995 x < 1 --> x <= 0 1996 x < 0x80 --> x <= 0x7F */ 1997 else if ((val == 1 || val == 0x80) && (cmp == GE || cmp == LT)) 1998 { 1999 cmp = cmp == GE ? GT : LE; 2000 op1 = gen_int_mode (val - 1, mode); 2001 } 2002 2003 /* unsigned x >= 1 --> x != 0 2004 unsigned x < 1 --> x == 0 */ 2005 else if (val == 1 && (cmp == GEU || cmp == LTU)) 2006 { 2007 cmp = cmp == GEU ? NE : EQ; 2008 op1 = CONST0_RTX (mode); 2009 } 2010 2011 /* unsigned x >= 0x80 --> unsigned x > 0x7F 2012 unsigned x < 0x80 --> unsigned x < 0x7F */ 2013 else if (val == 0x80 && (cmp == GEU || cmp == LTU)) 2014 { 2015 cmp = cmp == GEU ? GTU : LEU; 2016 op1 = gen_int_mode (val - 1, mode); 2017 } 2018 2019 /* unsigned x > 0 --> x != 0 2020 unsigned x <= 0 --> x == 0 */ 2021 else if (val == 0 && (cmp == GTU || cmp == LEU)) 2022 cmp = cmp == GTU ? NE : EQ; 2023 2024 /* unsigned x > 0x7FFFFFFF --> signed x < 0 2025 unsigned x <= 0x7FFFFFFF --> signed x >= 0 */ 2026 else if (mode == SImode && (cmp == GTU || cmp == LEU) 2027 && val == 0x7FFFFFFF) 2028 { 2029 cmp = cmp == GTU ? LT : GE; 2030 op1 = const0_rtx; 2031 } 2032 2033 /* unsigned x >= 0x80000000 --> signed x < 0 2034 unsigned x < 0x80000000 --> signed x >= 0 */ 2035 else if (mode == SImode && (cmp == GEU || cmp == LTU) 2036 && (unsigned HOST_WIDE_INT)val 2037 == ((unsigned HOST_WIDE_INT)0x7FFFFFFF + 1)) 2038 { 2039 cmp = cmp == GEU ? LT : GE; 2040 op1 = const0_rtx; 2041 } 2042 } 2043} 2044 2045/* This function implements the canonicalize_comparison target hook. 2046 This wrapper around the internally used sh_canonicalize_comparison 2047 function is needed to do the enum rtx_code <-> int conversion. 2048 Target hooks cannot use enum rtx_code in its definition. */ 2049static void 2050sh_canonicalize_comparison (int *code, rtx *op0, rtx *op1, 2051 bool op0_preserve_value) 2052{ 2053 enum rtx_code tmp_code = (enum rtx_code)*code; 2054 sh_canonicalize_comparison (tmp_code, *op0, *op1, 2055 VOIDmode, op0_preserve_value); 2056 *code = (int)tmp_code; 2057} 2058 2059/* This function implements the legitimate_combined_insn target hook, 2060 which the combine pass uses to early reject combined insns, before 2061 it tries to recog the insn and determine its cost. */ 2062static bool 2063sh_legitimate_combined_insn (rtx_insn* insn) 2064{ 2065 /* Reject combinations of memory loads and zero extensions, as these 2066 interfere with other combine patterns such as zero extracts and bit 2067 tests. The SH2A movu.{b|w} insns are formed later in the 2068 'sh_optimize_extu_exts' pass after combine/split1. */ 2069 rtx p = PATTERN (insn); 2070 if (GET_CODE (p) == SET 2071 && REG_P (XEXP (p, 0)) && GET_MODE (XEXP (p, 0)) == SImode 2072 && GET_CODE (XEXP (p, 1)) == ZERO_EXTEND 2073 && MEM_P (XEXP (XEXP (p, 1), 0))) 2074 return false; 2075 2076 return true; 2077} 2078 2079bool 2080sh_fixed_condition_code_regs (unsigned int* p1, unsigned int* p2) 2081{ 2082 *p1 = T_REG; 2083 *p2 = INVALID_REGNUM; 2084 return true; 2085} 2086 2087enum rtx_code 2088prepare_cbranch_operands (rtx *operands, machine_mode mode, 2089 enum rtx_code comparison) 2090{ 2091 /* The scratch reg is only available when this is invoked from within 2092 the cbranchdi4_i splitter, through expand_cbranchdi4. */ 2093 rtx scratch = NULL_RTX; 2094 2095 if (comparison == LAST_AND_UNUSED_RTX_CODE) 2096 comparison = GET_CODE (operands[0]); 2097 else 2098 scratch = operands[4]; 2099 2100 sh_canonicalize_comparison (comparison, operands[1], operands[2], 2101 mode, false); 2102 2103 /* Notice that this function is also invoked after reload by 2104 the cbranchdi4_i pattern, through expand_cbranchdi4. */ 2105 rtx op1 = operands[1]; 2106 2107 if (can_create_pseudo_p ()) 2108 operands[1] = force_reg (mode, op1); 2109 /* When we are handling DImode comparisons, we want to keep constants so 2110 that we can optimize the component comparisons; however, memory loads 2111 are better issued as a whole so that they can be scheduled well. 2112 SImode equality comparisons allow I08 constants, but only when they 2113 compare r0. Hence, if operands[1] has to be loaded from somewhere else 2114 into a register, that register might as well be r0, and we allow the 2115 constant. If it is already in a register, this is likely to be 2116 allocated to a different hard register, thus we load the constant into 2117 a register unless it is zero. */ 2118 if (!REG_P (operands[2]) 2119 && (!CONST_INT_P (operands[2]) 2120 || (mode == SImode && operands[2] != CONST0_RTX (SImode) 2121 && ((comparison != EQ && comparison != NE) 2122 || (REG_P (op1) && REGNO (op1) != R0_REG) 2123 || !satisfies_constraint_I08 (operands[2]))))) 2124 { 2125 if (scratch && GET_MODE (scratch) == mode) 2126 { 2127 emit_move_insn (scratch, operands[2]); 2128 operands[2] = scratch; 2129 } 2130 else if (can_create_pseudo_p ()) 2131 operands[2] = force_reg (mode, operands[2]); 2132 } 2133 return comparison; 2134} 2135 2136void 2137expand_cbranchsi4 (rtx *operands, enum rtx_code comparison, int probability) 2138{ 2139 rtx (*branch_expander) (rtx) = gen_branch_true; 2140 comparison = prepare_cbranch_operands (operands, SImode, comparison); 2141 switch (comparison) 2142 { 2143 case NE: case LT: case LE: case LTU: case LEU: 2144 comparison = reverse_condition (comparison); 2145 branch_expander = gen_branch_false; 2146 default: ; 2147 } 2148 emit_insn (gen_rtx_SET (VOIDmode, get_t_reg_rtx (), 2149 gen_rtx_fmt_ee (comparison, SImode, 2150 operands[1], operands[2]))); 2151 rtx_insn *jump = emit_jump_insn (branch_expander (operands[3])); 2152 if (probability >= 0) 2153 add_int_reg_note (jump, REG_BR_PROB, probability); 2154} 2155 2156/* ??? How should we distribute probabilities when more than one branch 2157 is generated. So far we only have some ad-hoc observations: 2158 - If the operands are random, they are likely to differ in both parts. 2159 - If comparing items in a hash chain, the operands are random or equal; 2160 operation should be EQ or NE. 2161 - If items are searched in an ordered tree from the root, we can expect 2162 the highpart to be unequal about half of the time; operation should be 2163 an inequality comparison, operands non-constant, and overall probability 2164 about 50%. Likewise for quicksort. 2165 - Range checks will be often made against constants. Even if we assume for 2166 simplicity an even distribution of the non-constant operand over a 2167 sub-range here, the same probability could be generated with differently 2168 wide sub-ranges - as long as the ratio of the part of the subrange that 2169 is before the threshold to the part that comes after the threshold stays 2170 the same. Thus, we can't really tell anything here; 2171 assuming random distribution is at least simple. 2172 */ 2173bool 2174expand_cbranchdi4 (rtx *operands, enum rtx_code comparison) 2175{ 2176 enum rtx_code msw_taken, msw_skip, lsw_taken; 2177 rtx_code_label *skip_label = NULL; 2178 rtx op1h, op1l, op2h, op2l; 2179 int num_branches; 2180 int prob, rev_prob; 2181 int msw_taken_prob = -1, msw_skip_prob = -1, lsw_taken_prob = -1; 2182 rtx scratch = operands[4]; 2183 2184 comparison = prepare_cbranch_operands (operands, DImode, comparison); 2185 op1h = gen_highpart_mode (SImode, DImode, operands[1]); 2186 op2h = gen_highpart_mode (SImode, DImode, operands[2]); 2187 op1l = gen_lowpart (SImode, operands[1]); 2188 op2l = gen_lowpart (SImode, operands[2]); 2189 msw_taken = msw_skip = lsw_taken = LAST_AND_UNUSED_RTX_CODE; 2190 prob = split_branch_probability; 2191 rev_prob = REG_BR_PROB_BASE - prob; 2192 switch (comparison) 2193 { 2194 /* ??? Should we use the cmpeqdi_t pattern for equality comparisons? 2195 That costs 1 cycle more when the first branch can be predicted taken, 2196 but saves us mispredicts because only one branch needs prediction. 2197 It also enables generating the cmpeqdi_t-1 pattern. */ 2198 case EQ: 2199 if (TARGET_CMPEQDI_T) 2200 { 2201 emit_insn (gen_cmpeqdi_t (operands[1], operands[2])); 2202 emit_jump_insn (gen_branch_true (operands[3])); 2203 return true; 2204 } 2205 msw_skip = NE; 2206 lsw_taken = EQ; 2207 if (prob >= 0) 2208 { 2209 // If we had more precision, we'd use rev_prob - (rev_prob >> 32) . 2210 msw_skip_prob = rev_prob; 2211 if (REG_BR_PROB_BASE <= 65535) 2212 lsw_taken_prob = prob ? REG_BR_PROB_BASE : 0; 2213 else 2214 { 2215 lsw_taken_prob 2216 = (prob 2217 ? (REG_BR_PROB_BASE 2218 - ((gcov_type) REG_BR_PROB_BASE * rev_prob 2219 / ((gcov_type) prob << 32))) 2220 : 0); 2221 } 2222 } 2223 break; 2224 case NE: 2225 if (TARGET_CMPEQDI_T) 2226 { 2227 emit_insn (gen_cmpeqdi_t (operands[1], operands[2])); 2228 emit_jump_insn (gen_branch_false (operands[3])); 2229 return true; 2230 } 2231 msw_taken = NE; 2232 msw_taken_prob = prob; 2233 lsw_taken = NE; 2234 lsw_taken_prob = 0; 2235 break; 2236 case GTU: case GT: 2237 msw_taken = comparison; 2238 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1) 2239 break; 2240 if (comparison != GTU || op2h != CONST0_RTX (SImode)) 2241 msw_skip = swap_condition (msw_taken); 2242 lsw_taken = GTU; 2243 break; 2244 case GEU: case GE: 2245 if (op2l == CONST0_RTX (SImode)) 2246 msw_taken = comparison; 2247 else 2248 { 2249 msw_taken = comparison == GE ? GT : GTU; 2250 msw_skip = swap_condition (msw_taken); 2251 lsw_taken = GEU; 2252 } 2253 break; 2254 case LTU: case LT: 2255 msw_taken = comparison; 2256 if (op2l == CONST0_RTX (SImode)) 2257 break; 2258 msw_skip = swap_condition (msw_taken); 2259 lsw_taken = LTU; 2260 break; 2261 case LEU: case LE: 2262 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1) 2263 msw_taken = comparison; 2264 else 2265 { 2266 lsw_taken = LEU; 2267 if (comparison == LE) 2268 msw_taken = LT; 2269 else if (op2h != CONST0_RTX (SImode)) 2270 msw_taken = LTU; 2271 else 2272 { 2273 msw_skip = swap_condition (LTU); 2274 break; 2275 } 2276 msw_skip = swap_condition (msw_taken); 2277 } 2278 break; 2279 default: return false; 2280 } 2281 num_branches = ((msw_taken != LAST_AND_UNUSED_RTX_CODE) 2282 + (msw_skip != LAST_AND_UNUSED_RTX_CODE) 2283 + (lsw_taken != LAST_AND_UNUSED_RTX_CODE)); 2284 if (comparison != EQ && comparison != NE && num_branches > 1) 2285 { 2286 if (!CONSTANT_P (operands[2]) 2287 && prob >= (int) (REG_BR_PROB_BASE * 3 / 8U) 2288 && prob <= (int) (REG_BR_PROB_BASE * 5 / 8U)) 2289 { 2290 msw_taken_prob = prob / 2U; 2291 msw_skip_prob 2292 = REG_BR_PROB_BASE * rev_prob / (REG_BR_PROB_BASE + rev_prob); 2293 lsw_taken_prob = prob; 2294 } 2295 else 2296 { 2297 msw_taken_prob = prob; 2298 msw_skip_prob = REG_BR_PROB_BASE; 2299 /* ??? If we have a constant op2h, should we use that when 2300 calculating lsw_taken_prob? */ 2301 lsw_taken_prob = prob; 2302 } 2303 } 2304 operands[1] = op1h; 2305 operands[2] = op2h; 2306 operands[4] = NULL_RTX; 2307 if (reload_completed 2308 && ! arith_reg_or_0_operand (op2h, SImode) 2309 && (true_regnum (op1h) || (comparison != EQ && comparison != NE)) 2310 && (msw_taken != LAST_AND_UNUSED_RTX_CODE 2311 || msw_skip != LAST_AND_UNUSED_RTX_CODE)) 2312 { 2313 emit_move_insn (scratch, operands[2]); 2314 operands[2] = scratch; 2315 } 2316 if (msw_taken != LAST_AND_UNUSED_RTX_CODE) 2317 expand_cbranchsi4 (operands, msw_taken, msw_taken_prob); 2318 if (msw_skip != LAST_AND_UNUSED_RTX_CODE) 2319 { 2320 rtx taken_label = operands[3]; 2321 2322 /* Operands were possibly modified, but msw_skip doesn't expect this. 2323 Always use the original ones. */ 2324 if (msw_taken != LAST_AND_UNUSED_RTX_CODE) 2325 { 2326 operands[1] = op1h; 2327 operands[2] = op2h; 2328 if (reload_completed 2329 && ! arith_reg_or_0_operand (op2h, SImode) 2330 && (true_regnum (op1h) || (comparison != EQ && comparison != NE))) 2331 { 2332 emit_move_insn (scratch, operands[2]); 2333 operands[2] = scratch; 2334 } 2335 } 2336 2337 operands[3] = skip_label = gen_label_rtx (); 2338 expand_cbranchsi4 (operands, msw_skip, msw_skip_prob); 2339 operands[3] = taken_label; 2340 } 2341 operands[1] = op1l; 2342 operands[2] = op2l; 2343 if (lsw_taken != LAST_AND_UNUSED_RTX_CODE) 2344 { 2345 if (reload_completed 2346 && ! arith_reg_or_0_operand (op2l, SImode) 2347 && (true_regnum (op1l) || (lsw_taken != EQ && lsw_taken != NE))) 2348 { 2349 emit_move_insn (scratch, operands[2]); 2350 operands[2] = scratch; 2351 } 2352 expand_cbranchsi4 (operands, lsw_taken, lsw_taken_prob); 2353 } 2354 if (msw_skip != LAST_AND_UNUSED_RTX_CODE) 2355 emit_label (skip_label); 2356 return true; 2357} 2358 2359/* Given an operand, return 1 if the evaluated operand plugged into an 2360 if_then_else will result in a branch_true, 0 if branch_false, or 2361 -1 if neither nor applies. The truth table goes like this: 2362 2363 op | cmpval | code | result 2364 ---------+--------+---------+-------------------- 2365 T (0) | 0 | EQ (1) | 0 = 0 ^ (0 == 1) 2366 T (0) | 1 | EQ (1) | 1 = 0 ^ (1 == 1) 2367 T (0) | 0 | NE (0) | 1 = 0 ^ (0 == 0) 2368 T (0) | 1 | NE (0) | 0 = 0 ^ (1 == 0) 2369 !T (1) | 0 | EQ (1) | 1 = 1 ^ (0 == 1) 2370 !T (1) | 1 | EQ (1) | 0 = 1 ^ (1 == 1) 2371 !T (1) | 0 | NE (0) | 0 = 1 ^ (0 == 0) 2372 !T (1) | 1 | NE (0) | 1 = 1 ^ (1 == 0) */ 2373int 2374sh_eval_treg_value (rtx op) 2375{ 2376 if (t_reg_operand (op, GET_MODE (op))) 2377 return 1; 2378 if (negt_reg_operand (op, GET_MODE (op))) 2379 return 0; 2380 2381 rtx_code code = GET_CODE (op); 2382 if ((code != EQ && code != NE) || !CONST_INT_P (XEXP (op, 1))) 2383 return -1; 2384 2385 int cmpop = code == EQ ? 1 : 0; 2386 int cmpval = INTVAL (XEXP (op, 1)); 2387 if (cmpval != 0 && cmpval != 1) 2388 return -1; 2389 2390 int t; 2391 if (t_reg_operand (XEXP (op, 0), GET_MODE (XEXP (op, 0)))) 2392 t = 0; 2393 else if (negt_reg_operand (XEXP (op, 0), GET_MODE (XEXP (op, 0)))) 2394 t = 1; 2395 else 2396 return -1; 2397 2398 return t ^ (cmpval == cmpop); 2399} 2400 2401/* Emit INSN, possibly in a PARALLEL with an USE/CLOBBER of FPSCR bits in case 2402 of floating-point comparisons. */ 2403static void 2404sh_emit_set_t_insn (rtx insn, machine_mode mode) 2405{ 2406 if (TARGET_FPU_ANY && GET_MODE_CLASS (mode) == MODE_FLOAT 2407 && GET_CODE (insn) != PARALLEL) 2408 { 2409 insn = gen_rtx_PARALLEL (VOIDmode, 2410 gen_rtvec (3, insn, 2411 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, FPSCR_STAT_REG)), 2412 gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, FPSCR_MODES_REG)))); 2413 } 2414 emit_insn (insn); 2415} 2416 2417/* Prepare the operands for an scc instruction; make sure that the 2418 compare has been done and the result is in T_REG. */ 2419void 2420sh_emit_scc_to_t (enum rtx_code code, rtx op0, rtx op1) 2421{ 2422 rtx t_reg = get_t_reg_rtx (); 2423 enum rtx_code oldcode = code; 2424 machine_mode mode; 2425 2426 /* First need a compare insn. */ 2427 switch (code) 2428 { 2429 case NE: 2430 /* It isn't possible to handle this case. */ 2431 gcc_unreachable (); 2432 case LT: 2433 code = GT; 2434 break; 2435 case LE: 2436 code = GE; 2437 break; 2438 case LTU: 2439 code = GTU; 2440 break; 2441 case LEU: 2442 code = GEU; 2443 break; 2444 default: 2445 break; 2446 } 2447 if (code != oldcode) 2448 std::swap (op0, op1); 2449 2450 mode = GET_MODE (op0); 2451 if (mode == VOIDmode) 2452 mode = GET_MODE (op1); 2453 2454 op0 = force_reg (mode, op0); 2455 if ((code != EQ && code != NE 2456 && (op1 != const0_rtx 2457 || code == GTU || code == GEU || code == LTU || code == LEU)) 2458 || (mode == DImode && op1 != const0_rtx) 2459 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT)) 2460 op1 = force_reg (mode, op1); 2461 2462 sh_emit_set_t_insn (gen_rtx_SET (VOIDmode, t_reg, 2463 gen_rtx_fmt_ee (code, SImode, op0, op1)), 2464 mode); 2465} 2466 2467rtx 2468sh_emit_cheap_store_flag (machine_mode mode, enum rtx_code code, 2469 rtx op0, rtx op1) 2470{ 2471 rtx target = gen_reg_rtx (SImode); 2472 rtx tmp; 2473 2474 gcc_assert (TARGET_SHMEDIA); 2475 switch (code) 2476 { 2477 case EQ: 2478 case GT: 2479 case LT: 2480 case UNORDERED: 2481 case GTU: 2482 case LTU: 2483 tmp = gen_rtx_fmt_ee (code, SImode, op0, op1); 2484 emit_insn (gen_cstore4_media (target, tmp, op0, op1)); 2485 code = NE; 2486 break; 2487 2488 case NE: 2489 case GE: 2490 case LE: 2491 case ORDERED: 2492 case GEU: 2493 case LEU: 2494 tmp = gen_rtx_fmt_ee (reverse_condition (code), mode, op0, op1); 2495 emit_insn (gen_cstore4_media (target, tmp, op0, op1)); 2496 code = EQ; 2497 break; 2498 2499 case UNEQ: 2500 case UNGE: 2501 case UNGT: 2502 case UNLE: 2503 case UNLT: 2504 case LTGT: 2505 return NULL_RTX; 2506 2507 default: 2508 gcc_unreachable (); 2509 } 2510 2511 if (mode == DImode) 2512 { 2513 rtx t2 = gen_reg_rtx (DImode); 2514 emit_insn (gen_extendsidi2 (t2, target)); 2515 target = t2; 2516 } 2517 2518 return gen_rtx_fmt_ee (code, VOIDmode, target, const0_rtx); 2519} 2520 2521/* Called from the md file, set up the operands of a compare instruction. */ 2522void 2523sh_emit_compare_and_branch (rtx *operands, machine_mode mode) 2524{ 2525 enum rtx_code code = GET_CODE (operands[0]); 2526 enum rtx_code branch_code; 2527 rtx op0 = operands[1]; 2528 rtx op1 = operands[2]; 2529 rtx insn; 2530 bool need_ccmpeq = false; 2531 2532 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT) 2533 { 2534 op0 = force_reg (mode, op0); 2535 op1 = force_reg (mode, op1); 2536 } 2537 else 2538 { 2539 if (code != EQ || mode == DImode) 2540 { 2541 /* Force args into regs, since we can't use constants here. */ 2542 op0 = force_reg (mode, op0); 2543 if (op1 != const0_rtx || code == GTU || code == GEU) 2544 op1 = force_reg (mode, op1); 2545 } 2546 } 2547 2548 if (GET_MODE_CLASS (mode) == MODE_FLOAT) 2549 { 2550 if (code == LT 2551 || (code == LE && TARGET_IEEE && TARGET_SH2E) 2552 || (code == GE && !(TARGET_IEEE && TARGET_SH2E))) 2553 { 2554 std::swap (op0, op1); 2555 code = swap_condition (code); 2556 } 2557 2558 /* GE becomes fcmp/gt+fcmp/eq, for SH2E and TARGET_IEEE only. */ 2559 if (code == GE) 2560 { 2561 gcc_assert (TARGET_IEEE && TARGET_SH2E); 2562 need_ccmpeq = true; 2563 code = GT; 2564 } 2565 2566 /* Now we can have EQ, NE, GT, LE. NE and LE are then transformed 2567 to EQ/GT respectively. */ 2568 gcc_assert (code == EQ || code == GT || code == NE || code == LE); 2569 } 2570 2571 switch (code) 2572 { 2573 case EQ: 2574 case GT: 2575 case GE: 2576 case GTU: 2577 case GEU: 2578 branch_code = code; 2579 break; 2580 case NE: 2581 case LT: 2582 case LE: 2583 case LTU: 2584 case LEU: 2585 branch_code = reverse_condition (code); 2586 break; 2587 default: 2588 gcc_unreachable (); 2589 } 2590 2591 insn = gen_rtx_SET (VOIDmode, 2592 get_t_reg_rtx (), 2593 gen_rtx_fmt_ee (branch_code, SImode, op0, op1)); 2594 2595 sh_emit_set_t_insn (insn, mode); 2596 if (need_ccmpeq) 2597 sh_emit_set_t_insn (gen_ieee_ccmpeqsf_t (op0, op1), mode); 2598 2599 if (branch_code == code) 2600 emit_jump_insn (gen_branch_true (operands[3])); 2601 else 2602 emit_jump_insn (gen_branch_false (operands[3])); 2603} 2604 2605void 2606sh_emit_compare_and_set (rtx *operands, machine_mode mode) 2607{ 2608 enum rtx_code code = GET_CODE (operands[1]); 2609 rtx op0 = operands[2]; 2610 rtx op1 = operands[3]; 2611 rtx_code_label *lab = NULL; 2612 bool invert = false; 2613 2614 op0 = force_reg (mode, op0); 2615 if ((code != EQ && code != NE 2616 && (op1 != const0_rtx 2617 || code == GTU || code == GEU || code == LTU || code == LEU)) 2618 || (mode == DImode && op1 != const0_rtx) 2619 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT)) 2620 op1 = force_reg (mode, op1); 2621 2622 if (GET_MODE_CLASS (mode) == MODE_FLOAT) 2623 { 2624 if (code == LT || code == LE) 2625 { 2626 std::swap (op0, op1); 2627 code = swap_condition (code); 2628 } 2629 if (code == GE) 2630 { 2631 if (TARGET_IEEE) 2632 { 2633 lab = gen_label_rtx (); 2634 sh_emit_scc_to_t (EQ, op0, op1); 2635 emit_jump_insn (gen_branch_true (lab)); 2636 code = GT; 2637 } 2638 else 2639 { 2640 code = LT; 2641 invert = true; 2642 } 2643 } 2644 } 2645 2646 if (code == NE) 2647 { 2648 code = EQ; 2649 invert = true; 2650 } 2651 2652 sh_emit_scc_to_t (code, op0, op1); 2653 if (lab) 2654 emit_label (lab); 2655 if (invert) 2656 emit_insn (gen_movnegt (operands[0], get_t_reg_rtx ())); 2657 else 2658 emit_move_insn (operands[0], get_t_reg_rtx ()); 2659} 2660 2661/* Functions to output assembly code. */ 2662 2663/* Return a sequence of instructions to perform DI or DF move. 2664 2665 Since the SH cannot move a DI or DF in one instruction, we have 2666 to take care when we see overlapping source and dest registers. */ 2667const char * 2668output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[], 2669 machine_mode mode) 2670{ 2671 rtx dst = operands[0]; 2672 rtx src = operands[1]; 2673 2674 if (MEM_P (dst) 2675 && GET_CODE (XEXP (dst, 0)) == PRE_DEC) 2676 return "mov.l %T1,%0" "\n" 2677 " mov.l %1,%0"; 2678 2679 if (register_operand (dst, mode) 2680 && register_operand (src, mode)) 2681 { 2682 if (REGNO (src) == MACH_REG) 2683 return "sts mach,%S0" "\n" 2684 " sts macl,%R0"; 2685 2686 /* When mov.d r1,r2 do r2->r3 then r1->r2; 2687 when mov.d r1,r0 do r1->r0 then r2->r1. */ 2688 if (REGNO (src) + 1 == REGNO (dst)) 2689 return "mov %T1,%T0" "\n" 2690 " mov %1,%0"; 2691 else 2692 return "mov %1,%0" "\n" 2693 " mov %T1,%T0"; 2694 } 2695 else if (CONST_INT_P (src)) 2696 { 2697 if (INTVAL (src) < 0) 2698 output_asm_insn ("mov #-1,%S0", operands); 2699 else 2700 output_asm_insn ("mov #0,%S0", operands); 2701 2702 return "mov %1,%R0"; 2703 } 2704 else if (MEM_P (src)) 2705 { 2706 int ptrreg = -1; 2707 int dreg = REGNO (dst); 2708 rtx inside = XEXP (src, 0); 2709 2710 switch (GET_CODE (inside)) 2711 { 2712 case REG: 2713 ptrreg = REGNO (inside); 2714 break; 2715 2716 case SUBREG: 2717 ptrreg = subreg_regno (inside); 2718 break; 2719 2720 case PLUS: 2721 ptrreg = REGNO (XEXP (inside, 0)); 2722 /* ??? A r0+REG address shouldn't be possible here, because it isn't 2723 an offsettable address. Unfortunately, offsettable addresses use 2724 QImode to check the offset, and a QImode offsettable address 2725 requires r0 for the other operand, which is not currently 2726 supported, so we can't use the 'o' constraint. 2727 Thus we must check for and handle r0+REG addresses here. 2728 We punt for now, since this is likely very rare. */ 2729 gcc_assert (!REG_P (XEXP (inside, 1))); 2730 break; 2731 2732 case LABEL_REF: 2733 return "mov.l %1,%0" "\n" 2734 " mov.l %1+4,%T0"; 2735 case POST_INC: 2736 return "mov.l %1,%0" "\n" 2737 " mov.l %1,%T0"; 2738 default: 2739 gcc_unreachable (); 2740 } 2741 2742 /* Work out the safe way to copy. Copy into the second half first. */ 2743 if (dreg == ptrreg) 2744 return "mov.l %T1,%T0" "\n" 2745 " mov.l %1,%0"; 2746 } 2747 2748 return "mov.l %1,%0" "\n" 2749 " mov.l %T1,%T0"; 2750} 2751 2752/* Print an instruction which would have gone into a delay slot after 2753 another instruction, but couldn't because the other instruction expanded 2754 into a sequence where putting the slot insn at the end wouldn't work. */ 2755static void 2756print_slot (rtx_sequence *seq) 2757{ 2758 final_scan_insn (seq->insn (1), asm_out_file, optimize, 1, NULL); 2759 2760 seq->insn (1)->set_deleted (); 2761} 2762 2763const char * 2764output_far_jump (rtx_insn *insn, rtx op) 2765{ 2766 struct { rtx lab, reg, op; } this_jmp; 2767 rtx_code_label *braf_base_lab = NULL; 2768 const char *jump; 2769 int far; 2770 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn)); 2771 rtx_insn *prev; 2772 2773 this_jmp.lab = gen_label_rtx (); 2774 2775 if (TARGET_SH2 2776 && offset >= -32764 2777 && offset - get_attr_length (insn) <= 32766) 2778 { 2779 far = 0; 2780 jump = "mov.w %O0,%1" "\n" 2781 " braf %1"; 2782 } 2783 else 2784 { 2785 far = 1; 2786 if (flag_pic) 2787 { 2788 if (TARGET_SH2) 2789 jump = "mov.l %O0,%1" "\n" 2790 " braf %1"; 2791 else 2792 jump = "mov.l r0,@-r15" "\n" 2793 " mova %O0,r0" "\n" 2794 " mov.l @r0,%1" "\n" 2795 " add r0,%1" "\n" 2796 " mov.l @r15+,r0" "\n" 2797 " jmp @%1"; 2798 } 2799 else 2800 jump = "mov.l %O0,%1" "\n" 2801 " jmp @%1"; 2802 } 2803 /* If we have a scratch register available, use it. */ 2804 if (NONJUMP_INSN_P ((prev = prev_nonnote_insn (insn))) 2805 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch) 2806 { 2807 this_jmp.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0)); 2808 if (REGNO (this_jmp.reg) == R0_REG && flag_pic && ! TARGET_SH2) 2809 jump = "mov.l r1,@-r15" "\n" 2810 " mova %O0,r0" "\n" 2811 " mov.l @r0,r1" "\n" 2812 " add r1,r0" "\n" 2813 " mov.l @r15+,r1" "\n" 2814 " jmp @%1"; 2815 output_asm_insn (jump, &this_jmp.lab); 2816 if (dbr_sequence_length ()) 2817 print_slot (final_sequence); 2818 else 2819 output_asm_insn ("nop", 0); 2820 } 2821 else 2822 { 2823 /* Output the delay slot insn first if any. */ 2824 if (dbr_sequence_length ()) 2825 print_slot (final_sequence); 2826 2827 this_jmp.reg = gen_rtx_REG (SImode, 13); 2828 /* We must keep the stack aligned to 8-byte boundaries on SH5. 2829 Fortunately, MACL is fixed and call-clobbered, and we never 2830 need its value across jumps, so save r13 in it instead of in 2831 the stack. */ 2832 if (TARGET_SH5) 2833 output_asm_insn ("lds r13,macl", 0); 2834 else 2835 output_asm_insn ("mov.l r13,@-r15", 0); 2836 output_asm_insn (jump, &this_jmp.lab); 2837 if (TARGET_SH5) 2838 output_asm_insn ("sts macl,r13", 0); 2839 else 2840 output_asm_insn ("mov.l @r15+,r13", 0); 2841 } 2842 if (far && flag_pic && TARGET_SH2) 2843 { 2844 braf_base_lab = gen_label_rtx (); 2845 (*targetm.asm_out.internal_label) (asm_out_file, "L", 2846 CODE_LABEL_NUMBER (braf_base_lab)); 2847 } 2848 if (far) 2849 output_asm_insn (".align 2", 0); 2850 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this_jmp.lab)); 2851 this_jmp.op = op; 2852 if (far && flag_pic) 2853 { 2854 if (TARGET_SH2) 2855 this_jmp.lab = braf_base_lab; 2856 output_asm_insn (".long %O2-%O0", &this_jmp.lab); 2857 } 2858 else 2859 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this_jmp.lab); 2860 return ""; 2861} 2862 2863/* Local label counter, used for constants in the pool and inside 2864 pattern branches. */ 2865static int lf = 100; 2866 2867/* Output code for ordinary branches. */ 2868const char * 2869output_branch (int logic, rtx_insn *insn, rtx *operands) 2870{ 2871 switch (get_attr_length (insn)) 2872 { 2873 case 6: 2874 /* This can happen if filling the delay slot has caused a forward 2875 branch to exceed its range (we could reverse it, but only 2876 when we know we won't overextend other branches; this should 2877 best be handled by relaxation). 2878 It can also happen when other condbranches hoist delay slot insn 2879 from their destination, thus leading to code size increase. 2880 But the branch will still be in the range -4092..+4098 bytes. */ 2881 if (! TARGET_RELAX) 2882 { 2883 int label = lf++; 2884 /* The call to print_slot will clobber the operands. */ 2885 rtx op0 = operands[0]; 2886 2887 /* If the instruction in the delay slot is annulled (true), then 2888 there is no delay slot where we can put it now. The only safe 2889 place for it is after the label. final will do that by default. */ 2890 2891 if (final_sequence 2892 && ! INSN_ANNULLED_BRANCH_P (final_sequence->insn (0)) 2893 && get_attr_length (final_sequence->insn (1))) 2894 { 2895 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t", 2896 ASSEMBLER_DIALECT ? "/" : ".", label); 2897 print_slot (final_sequence); 2898 } 2899 else 2900 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label); 2901 2902 output_asm_insn ("bra\t%l0", &op0); 2903 fprintf (asm_out_file, "\tnop\n"); 2904 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label); 2905 2906 return ""; 2907 } 2908 /* When relaxing, handle this like a short branch. The linker 2909 will fix it up if it still doesn't fit after relaxation. */ 2910 case 2: 2911 return logic ? "bt%.\t%l0" : "bf%.\t%l0"; 2912 2913 /* These are for SH2e, in which we have to account for the 2914 extra nop because of the hardware bug in annulled branches. */ 2915 case 8: 2916 if (! TARGET_RELAX) 2917 { 2918 int label = lf++; 2919 2920 gcc_assert (!final_sequence 2921 || !(INSN_ANNULLED_BRANCH_P 2922 (XVECEXP (final_sequence, 0, 0)))); 2923 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n", 2924 logic ? "f" : "t", 2925 ASSEMBLER_DIALECT ? "/" : ".", label); 2926 fprintf (asm_out_file, "\tnop\n"); 2927 output_asm_insn ("bra\t%l0", operands); 2928 fprintf (asm_out_file, "\tnop\n"); 2929 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label); 2930 2931 return ""; 2932 } 2933 /* When relaxing, fall through. */ 2934 case 4: 2935 { 2936 char buffer[10]; 2937 2938 sprintf (buffer, "b%s%ss\t%%l0", 2939 logic ? "t" : "f", 2940 ASSEMBLER_DIALECT ? "/" : "."); 2941 output_asm_insn (buffer, &operands[0]); 2942 return "nop"; 2943 } 2944 2945 default: 2946 /* There should be no longer branches now - that would 2947 indicate that something has destroyed the branches set 2948 up in machine_dependent_reorg. */ 2949 gcc_unreachable (); 2950 } 2951} 2952 2953/* Output a code sequence for INSN using TEMPL with OPERANDS; but before, 2954 fill in operands 9 as a label to the successor insn. 2955 We try to use jump threading where possible. 2956 IF CODE matches the comparison in the IF_THEN_ELSE of a following jump, 2957 we assume the jump is taken. I.e. EQ means follow jmp and bf, NE means 2958 follow jmp and bt, if the address is in range. */ 2959const char * 2960output_branchy_insn (enum rtx_code code, const char *templ, 2961 rtx_insn *insn, rtx *operands) 2962{ 2963 rtx_insn *next_insn = NEXT_INSN (insn); 2964 2965 if (next_insn && JUMP_P (next_insn) && condjump_p (next_insn)) 2966 { 2967 rtx src = SET_SRC (PATTERN (next_insn)); 2968 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code) 2969 { 2970 /* Following branch not taken */ 2971 rtx_code_label *lab = gen_label_rtx (); 2972 emit_label_after (lab, next_insn); 2973 INSN_ADDRESSES_NEW (lab, 2974 INSN_ADDRESSES (INSN_UID (next_insn)) 2975 + get_attr_length (next_insn)); 2976 operands[9] = lab; 2977 return templ; 2978 } 2979 else 2980 { 2981 int offset = (branch_dest (next_insn) 2982 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4); 2983 if (offset >= -252 && offset <= 258) 2984 { 2985 if (GET_CODE (src) == IF_THEN_ELSE) 2986 /* branch_true */ 2987 src = XEXP (src, 1); 2988 operands[9] = src; 2989 return templ; 2990 } 2991 } 2992 } 2993 rtx_code_label *lab = gen_label_rtx (); 2994 emit_label_after (lab, insn); 2995 INSN_ADDRESSES_NEW (lab, 2996 INSN_ADDRESSES (INSN_UID (insn)) 2997 + get_attr_length (insn)); 2998 operands[9] = lab; 2999 return templ; 3000} 3001 3002const char * 3003output_ieee_ccmpeq (rtx_insn *insn, rtx *operands) 3004{ 3005 return output_branchy_insn (NE, "bt %l9" "\n" 3006 " fcmp/eq %1,%0", 3007 insn, operands); 3008} 3009 3010/* Output the start of the assembler file. */ 3011static void 3012sh_file_start (void) 3013{ 3014 default_file_start (); 3015 3016 if (TARGET_ELF) 3017 /* We need to show the text section with the proper 3018 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out 3019 emits it without attributes in TEXT_SECTION_ASM_OP, else GAS 3020 will complain. We can teach GAS specifically about the 3021 default attributes for our choice of text section, but 3022 then we would have to change GAS again if/when we change 3023 the text section name. */ 3024 fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP); 3025 else 3026 /* Switch to the data section so that the coffsem symbol 3027 isn't in the text section. */ 3028 switch_to_section (data_section); 3029 3030 if (TARGET_LITTLE_ENDIAN) 3031 fputs ("\t.little\n", asm_out_file); 3032 3033 if (!TARGET_ELF) 3034 { 3035 if (TARGET_SHCOMPACT) 3036 fputs ("\t.mode\tSHcompact\n", asm_out_file); 3037 else if (TARGET_SHMEDIA) 3038 fprintf (asm_out_file, "\t.mode\tSHmedia\n\t.abi\t%i\n", 3039 TARGET_SHMEDIA64 ? 64 : 32); 3040 } 3041} 3042 3043/* Check if PAT includes UNSPEC_CALLER unspec pattern. */ 3044static bool 3045unspec_caller_rtx_p (rtx pat) 3046{ 3047 rtx base, offset; 3048 int i; 3049 3050 split_const (pat, &base, &offset); 3051 if (GET_CODE (base) == UNSPEC) 3052 { 3053 if (XINT (base, 1) == UNSPEC_CALLER) 3054 return true; 3055 for (i = 0; i < XVECLEN (base, 0); i++) 3056 if (unspec_caller_rtx_p (XVECEXP (base, 0, i))) 3057 return true; 3058 } 3059 return false; 3060} 3061 3062/* Indicate that INSN cannot be duplicated. This is true for insn 3063 that generates a unique label. */ 3064static bool 3065sh_cannot_copy_insn_p (rtx_insn *insn) 3066{ 3067 rtx pat; 3068 3069 if (!reload_completed || !flag_pic) 3070 return false; 3071 3072 if (!NONJUMP_INSN_P (insn)) 3073 return false; 3074 if (asm_noperands (insn) >= 0) 3075 return false; 3076 3077 pat = PATTERN (insn); 3078 if (GET_CODE (pat) != SET) 3079 return false; 3080 pat = SET_SRC (pat); 3081 3082 if (unspec_caller_rtx_p (pat)) 3083 return true; 3084 3085 return false; 3086} 3087 3088/* Number of instructions used to make an arithmetic right shift by N. */ 3089static const char ashiftrt_insns[] = 3090 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2}; 3091 3092/* Description of a logical left or right shift, when expanded to a sequence 3093 of 1/2/8/16 shifts. 3094 Notice that one bit right shifts clobber the T bit. One bit left shifts 3095 are done with an 'add Rn,Rm' insn and thus do not clobber the T bit. */ 3096enum 3097{ 3098 ASHL_CLOBBERS_T = 1 << 0, 3099 LSHR_CLOBBERS_T = 1 << 1 3100}; 3101 3102struct ashl_lshr_sequence 3103{ 3104 char insn_count; 3105 signed char amount[6]; 3106 char clobbers_t; 3107}; 3108 3109static const struct ashl_lshr_sequence ashl_lshr_seq[32] = 3110{ 3111 { 0, { 0 }, 0 }, // 0 3112 { 1, { 1 }, LSHR_CLOBBERS_T }, 3113 { 1, { 2 }, 0 }, 3114 { 2, { 2, 1 }, LSHR_CLOBBERS_T }, 3115 { 2, { 2, 2 }, 0 }, // 4 3116 { 3, { 2, 1, 2 }, LSHR_CLOBBERS_T }, 3117 { 3, { 2, 2, 2 }, 0 }, 3118 { 4, { 2, 2, 1, 2 }, LSHR_CLOBBERS_T }, 3119 { 1, { 8 }, 0 }, // 8 3120 { 2, { 8, 1 }, LSHR_CLOBBERS_T }, 3121 { 2, { 8, 2 }, 0 }, 3122 { 3, { 8, 1, 2 }, LSHR_CLOBBERS_T }, 3123 { 3, { 8, 2, 2 }, 0 }, // 12 3124 { 4, { 8, 2, 1, 2 }, LSHR_CLOBBERS_T }, 3125 { 3, { 8, -2, 8 }, 0 }, 3126 { 3, { 8, -1, 8 }, ASHL_CLOBBERS_T }, 3127 { 1, { 16 }, 0 }, // 16 3128 { 2, { 16, 1 }, LSHR_CLOBBERS_T }, 3129 { 2, { 16, 2 }, 0 }, 3130 { 3, { 16, 1, 2 }, LSHR_CLOBBERS_T }, 3131 { 3, { 16, 2, 2 }, 0 }, // 20 3132 { 4, { 16, 2, 1, 2 }, LSHR_CLOBBERS_T }, 3133 { 3, { 16, -2, 8 }, 0 }, 3134 { 3, { 16, -1, 8 }, ASHL_CLOBBERS_T }, 3135 { 2, { 16, 8 }, 0 }, // 24 3136 { 3, { 16, 1, 8 }, LSHR_CLOBBERS_T }, 3137 { 3, { 16, 8, 2 }, 0 }, 3138 { 4, { 16, 8, 1, 2 }, LSHR_CLOBBERS_T }, 3139 { 4, { 16, 8, 2, 2 }, 0 }, // 28 3140 { 4, { 16, -1, -2, 16 }, ASHL_CLOBBERS_T }, 3141 { 3, { 16, -2, 16 }, 0 }, 3142 3143 /* For a right shift by 31 a 2 insn shll-movt sequence can be used. 3144 For a left shift by 31 a 2 insn and-rotl sequences can be used. 3145 However, the shift-and combiner code needs this entry here to be in 3146 terms of real shift insns. */ 3147 { 3, { 16, -1, 16 }, ASHL_CLOBBERS_T } 3148}; 3149 3150/* Individual shift amounts for shift amounts < 16, up to three highmost 3151 bits might be clobbered. This is typically used when combined with some 3152 kind of sign or zero extension. */ 3153static const struct ashl_lshr_sequence ext_ashl_lshr_seq[32] = 3154{ 3155 { 0, { 0 }, 0 }, // 0 3156 { 1, { 1 }, LSHR_CLOBBERS_T }, 3157 { 1, { 2 }, 0 }, 3158 { 2, { 2, 1 }, LSHR_CLOBBERS_T }, 3159 { 2, { 2, 2 }, 0 }, // 4 3160 { 3, { 2, 1, 2 }, LSHR_CLOBBERS_T }, 3161 { 2, { 8, -2 }, 0 }, 3162 { 2, { 8, -1 }, ASHL_CLOBBERS_T }, 3163 { 1, { 8 }, 0 }, // 8 3164 { 2, { 8, 1 }, LSHR_CLOBBERS_T }, 3165 { 2, { 8, 2 }, 0 }, 3166 { 3, { 8, 1, 2 }, LSHR_CLOBBERS_T }, 3167 { 3, { 8, 2, 2 }, 0 }, // 12 3168 { 3, { 16, -2, -1 }, ASHL_CLOBBERS_T }, 3169 { 2, { 16, -2 }, 0 }, 3170 { 2, { 16, -1 }, ASHL_CLOBBERS_T }, 3171 { 1, { 16 }, 0 }, // 16 3172 { 2, { 16, 1 }, LSHR_CLOBBERS_T }, 3173 { 2, { 16, 2 }, 0 }, 3174 { 3, { 16, 1, 2 }, LSHR_CLOBBERS_T }, 3175 { 3, { 16, 2, 2 }, 0 }, // 20 3176 { 4, { 16, 2, 1, 2 }, LSHR_CLOBBERS_T }, 3177 { 3, { 16, -2, 8 }, 0 }, 3178 { 3, { 16, -1, 8 }, ASHL_CLOBBERS_T }, 3179 { 2, { 16, 8 }, 0 }, // 24 3180 { 3, { 16, 1, 8 }, LSHR_CLOBBERS_T }, 3181 { 3, { 16, 8, 2 }, 0 }, 3182 { 4, { 16, 8, 1, 2 }, LSHR_CLOBBERS_T }, 3183 { 4, { 16, 8, 2, 2 }, 0 }, // 28 3184 { 4, { 16, -1, -2, 16 }, ASHL_CLOBBERS_T }, 3185 { 3, { 16, -2, 16 }, 0 }, 3186 { 3, { 16, -1, 16 }, ASHL_CLOBBERS_T } 3187}; 3188 3189/* Return true if a shift left consisting of 1/2/8/16 shift instructions 3190 will clobber the T bit. */ 3191bool 3192sh_ashlsi_clobbers_t_reg_p (rtx shift_amount) 3193{ 3194 gcc_assert (CONST_INT_P (shift_amount)); 3195 3196 const int shift_amount_i = INTVAL (shift_amount) & 31; 3197 3198 /* Special case for shift count of 31: use and-rotl sequence. */ 3199 if (shift_amount_i == 31) 3200 return true; 3201 3202 return (ashl_lshr_seq[shift_amount_i].clobbers_t 3203 & ASHL_CLOBBERS_T) != 0; 3204} 3205 3206/* Return true if a logical right shift consisting of 1/2/8/16 shift 3207 instructions will clobber the T bit. */ 3208bool 3209sh_lshrsi_clobbers_t_reg_p (rtx shift_amount) 3210{ 3211 gcc_assert (CONST_INT_P (shift_amount)); 3212 3213 const int shift_amount_i = INTVAL (shift_amount) & 31; 3214 3215 /* Special case for shift count of 31: use shll-movt sequence. */ 3216 if (shift_amount_i == 31) 3217 return true; 3218 3219 return (ashl_lshr_seq[shift_amount_i].clobbers_t 3220 & LSHR_CLOBBERS_T) != 0; 3221} 3222 3223/* Return true if it is potentially beneficial to use a dynamic shift 3224 instruction (shad / shar) instead of a combination of 1/2/8/16 3225 shift instructions for the specified shift count. 3226 If dynamic shifts are not available, always return false. */ 3227bool 3228sh_dynamicalize_shift_p (rtx count) 3229{ 3230 gcc_assert (CONST_INT_P (count)); 3231 3232 const int shift_amount_i = INTVAL (count) & 31; 3233 int insn_count; 3234 3235 /* For left and right shifts, there are shorter 2 insn sequences for 3236 shift amounts of 31. */ 3237 if (shift_amount_i == 31) 3238 insn_count = 2; 3239 else 3240 insn_count = ashl_lshr_seq[shift_amount_i].insn_count; 3241 3242 return TARGET_DYNSHIFT && (insn_count > 1 + SH_DYNAMIC_SHIFT_COST); 3243} 3244 3245/* Assuming we have a value that has been sign-extended by at least one bit, 3246 can we use the ext_shift_amounts with the last shift turned to an 3247 arithmetic shift to shift it by N without data loss, and quicker than by 3248 other means? */ 3249#define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15) 3250 3251/* Return the cost of a shift. */ 3252static inline int 3253shiftcosts (rtx x) 3254{ 3255 int value; 3256 3257 if (TARGET_SHMEDIA) 3258 return 1; 3259 3260 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD) 3261 { 3262 if (GET_MODE (x) == DImode 3263 && CONST_INT_P (XEXP (x, 1)) 3264 && INTVAL (XEXP (x, 1)) == 1) 3265 return 2; 3266 3267 /* Everything else is invalid, because there is no pattern for it. */ 3268 return -1; 3269 } 3270 /* If shift by a non constant, then this will be expensive. */ 3271 if (!CONST_INT_P (XEXP (x, 1))) 3272 return SH_DYNAMIC_SHIFT_COST; 3273 3274 /* Otherwise, return the true cost in instructions. Cope with out of range 3275 shift counts more or less arbitrarily. */ 3276 value = INTVAL (XEXP (x, 1)) & 31; 3277 3278 if (GET_CODE (x) == ASHIFTRT) 3279 { 3280 int cost = ashiftrt_insns[value]; 3281 /* If dynamic shifts are available and profitable in this case, then we 3282 put the constant in a reg and use shad. */ 3283 if (cost > 1 + SH_DYNAMIC_SHIFT_COST) 3284 cost = 1 + SH_DYNAMIC_SHIFT_COST; 3285 return cost; 3286 } 3287 else 3288 return ashl_lshr_seq[value].insn_count; 3289} 3290 3291/* Return the cost of an AND/XOR/IOR operation. */ 3292static inline int 3293and_xor_ior_costs (rtx x, int code) 3294{ 3295 /* On SH1-4 we have only max. SImode operations. 3296 Double the cost for modes > SImode. */ 3297 const int cost_scale = !TARGET_SHMEDIA 3298 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD 3299 ? 2 : 1; 3300 3301 /* A logical operation with two registers is a single cycle 3302 instruction. */ 3303 if (!CONST_INT_P (XEXP (x, 1))) 3304 return 1 * cost_scale; 3305 3306 int i = INTVAL (XEXP (x, 1)); 3307 3308 if (TARGET_SHMEDIA) 3309 { 3310 if (satisfies_constraint_I10 (XEXP (x, 1)) 3311 || satisfies_constraint_J16 (XEXP (x, 1))) 3312 return 1; 3313 else 3314 return 1 + rtx_cost (XEXP (x, 1), AND, 1, !optimize_size); 3315 } 3316 3317 /* These constants are single cycle extu.[bw] instructions. */ 3318 if ((i == 0xff || i == 0xffff) && code == AND) 3319 return 1 * cost_scale; 3320 /* Constants that can be used in an instruction as an immediate are 3321 a single cycle, but this requires r0, so make it a little more 3322 expensive. */ 3323 if (CONST_OK_FOR_K08 (i)) 3324 return 2 * cost_scale; 3325 /* Constants that can be loaded with a mov immediate need one more cycle. 3326 This case is probably unnecessary. */ 3327 if (CONST_OK_FOR_I08 (i)) 3328 return 2 * cost_scale; 3329 /* Any other constant requires an additional 2 cycle pc-relative load. 3330 This case is probably unnecessary. */ 3331 return 3 * cost_scale; 3332} 3333 3334/* Return the cost of an addition or a subtraction. */ 3335static inline int 3336addsubcosts (rtx x) 3337{ 3338 if (GET_MODE (x) == SImode) 3339 { 3340 /* The addc or subc patterns will eventually become one or two 3341 instructions. Below are some costs for some of the patterns 3342 which combine would reject because the costs of the individual 3343 insns in the patterns are lower. 3344 3345 FIXME: It would be much easier if we had something like insn cost 3346 attributes and the cost calculation machinery used those attributes 3347 in the first place. This would eliminate redundant recog-like C 3348 code to calculate costs of complex patterns. */ 3349 rtx op0 = XEXP (x, 0); 3350 rtx op1 = XEXP (x, 1); 3351 3352 if (GET_CODE (x) == PLUS) 3353 { 3354 if (GET_CODE (op0) == AND 3355 && XEXP (op0, 1) == const1_rtx 3356 && (GET_CODE (op1) == PLUS 3357 || (GET_CODE (op1) == MULT && XEXP (op1, 1) == const2_rtx))) 3358 return 1; 3359 3360 if (GET_CODE (op0) == MULT && XEXP (op0, 1) == const2_rtx 3361 && GET_CODE (op1) == LSHIFTRT 3362 && CONST_INT_P (XEXP (op1, 1)) && INTVAL (XEXP (op1, 1)) == 31) 3363 return 1; 3364 } 3365 /* Let's assume that adding the result of an insns that stores into 3366 the T bit is cheap. */ 3367 if (treg_set_expr (op1, SImode)) 3368 return 1; 3369 if (treg_set_expr (op0, SImode)) 3370 return 1; 3371 } 3372 3373 /* On SH1-4 we have only max. SImode operations. 3374 Double the cost for modes > SImode. */ 3375 const int cost_scale = !TARGET_SHMEDIA 3376 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD 3377 ? 2 : 1; 3378 3379 /* Adding a register is a single cycle insn. */ 3380 if (REG_P (XEXP (x, 1)) 3381 || GET_CODE (XEXP (x, 1)) == SUBREG) 3382 return 1 * cost_scale; 3383 3384 /* Likewise for small constants. */ 3385 if (CONST_INT_P (XEXP (x, 1)) 3386 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1)))) 3387 return 1 * cost_scale; 3388 3389 if (TARGET_SHMEDIA) 3390 switch (GET_CODE (XEXP (x, 1))) 3391 { 3392 case CONST: 3393 case LABEL_REF: 3394 case SYMBOL_REF: 3395 return TARGET_SHMEDIA64 ? 5 : 3; 3396 3397 case CONST_INT: 3398 if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1)))) 3399 return 2; 3400 else if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1)) >> 16)) 3401 return 3; 3402 else if (CONST_OK_FOR_I16 ((INTVAL (XEXP (x, 1)) >> 16) >> 16)) 3403 return 4; 3404 3405 /* Fall through. */ 3406 default: 3407 return 5; 3408 } 3409 3410 /* Any other constant requires a 2 cycle pc-relative load plus an 3411 addition. */ 3412 return 3 * cost_scale; 3413} 3414 3415/* Return the cost of a multiply. */ 3416static inline int 3417multcosts (rtx x ATTRIBUTE_UNUSED) 3418{ 3419 if (sh_multcost >= 0) 3420 return sh_multcost; 3421 if (TARGET_SHMEDIA) 3422 /* ??? We have a mul insn, but it has a latency of three, and doesn't 3423 accept constants. Ideally, we would use a cost of one or two and 3424 add the cost of the operand, but disregard the latter when inside loops 3425 and loop invariant code motion is still to follow. 3426 Using a multiply first and splitting it later if it's a loss 3427 doesn't work because of different sign / zero extension semantics 3428 of multiplies vs. shifts. */ 3429 return optimize_size ? 2 : 3; 3430 3431 if (TARGET_SH2) 3432 { 3433 /* We have a mul insn, so we can never take more than the mul and the 3434 read of the mac reg, but count more because of the latency and extra 3435 reg usage. */ 3436 if (optimize_size) 3437 return 2; 3438 return 3; 3439 } 3440 3441 /* If we're aiming at small code, then just count the number of 3442 insns in a multiply call sequence. */ 3443 if (optimize_size) 3444 return 5; 3445 3446 /* Otherwise count all the insns in the routine we'd be calling too. */ 3447 return 20; 3448} 3449 3450/* Compute a (partial) cost for rtx X. Return true if the complete 3451 cost has been computed, and false if subexpressions should be 3452 scanned. In either case, *TOTAL contains the cost result. */ 3453static bool 3454sh_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED, 3455 int *total, bool speed ATTRIBUTE_UNUSED) 3456{ 3457 switch (code) 3458 { 3459 /* The lower-subreg pass decides whether to split multi-word regs 3460 into individual regs by looking at the cost for a SET of certain 3461 modes with the following patterns: 3462 (set (reg) (reg)) 3463 (set (reg) (const_int 0)) 3464 On machines that support vector-move operations a multi-word move 3465 is the same cost as individual reg move. On SH there is no 3466 vector-move, so we have to provide the correct cost in the number 3467 of move insns to load/store the reg of the mode in question. */ 3468 case SET: 3469 if (register_operand (SET_DEST (x), VOIDmode) 3470 && (register_operand (SET_SRC (x), VOIDmode) 3471 || satisfies_constraint_Z (SET_SRC (x)))) 3472 { 3473 const machine_mode mode = GET_MODE (SET_DEST (x)); 3474 *total = COSTS_N_INSNS (GET_MODE_SIZE (mode) 3475 / mov_insn_size (mode, TARGET_SH2A)); 3476 return true; 3477 } 3478 return false; 3479 3480 /* The cost of a mem access is mainly the cost of the address mode. */ 3481 case MEM: 3482 *total = sh_address_cost (XEXP (x, 0), GET_MODE (x), MEM_ADDR_SPACE (x), 3483 true); 3484 return true; 3485 3486 case IF_THEN_ELSE: 3487 /* This case is required for the if_then_else negc pattern. */ 3488 if (treg_set_expr (XEXP (x, 0), SImode)) 3489 { 3490 *total = COSTS_N_INSNS (1); 3491 return true; 3492 } 3493 else 3494 return false; 3495 3496 /* Zero extracts of single bits are usually combine patterns for the 3497 tst insns. */ 3498 case ZERO_EXTRACT: 3499 if (GET_CODE (XEXP (x, 0)) == XOR 3500 && arith_reg_operand (XEXP (XEXP (x, 0), 0), VOIDmode) 3501 && XEXP (x, 1) == const1_rtx 3502 && CONST_INT_P (XEXP (x, 2)) 3503 && CONST_INT_P (XEXP (XEXP (x, 0), 1)) 3504 /* Check that the xor constaint overlaps with the extracted bit. */ 3505 && (INTVAL (XEXP (XEXP (x, 0), 1)) & (1LL << INTVAL (XEXP (x, 2))))) 3506 { 3507 *total = 1; //COSTS_N_INSNS (1); 3508 return true; 3509 } 3510 return false; 3511 3512 /* The cost of a sign or zero extend depends on whether the source is a 3513 reg or a mem. In case of a mem take the address into acount. */ 3514 case SIGN_EXTEND: 3515 if (arith_reg_operand (XEXP (x, 0), GET_MODE (XEXP (x, 0)))) 3516 { 3517 *total = COSTS_N_INSNS (1); 3518 return true; 3519 } 3520 if (MEM_P (XEXP (x, 0))) 3521 { 3522 *total = sh_address_cost (XEXP (XEXP (x, 0), 0), 3523 GET_MODE (XEXP (x, 0)), 3524 MEM_ADDR_SPACE (XEXP (x, 0)), true); 3525 return true; 3526 } 3527 return false; 3528 3529 case ZERO_EXTEND: 3530 if (arith_reg_operand (XEXP (x, 0), GET_MODE (XEXP (x, 0)))) 3531 { 3532 *total = COSTS_N_INSNS (1); 3533 return true; 3534 } 3535 else if (TARGET_SH2A && MEM_P (XEXP (x, 0)) 3536 && (GET_MODE (XEXP (x, 0)) == QImode 3537 || GET_MODE (XEXP (x, 0)) == HImode)) 3538 { 3539 /* Handle SH2A's movu.b and movu.w insn. */ 3540 *total = sh_address_cost (XEXP (XEXP (x, 0), 0), 3541 GET_MODE (XEXP (x, 0)), 3542 MEM_ADDR_SPACE (XEXP (x, 0)), true); 3543 return true; 3544 } 3545 return false; 3546 3547 /* mems for SFmode and DFmode can be inside a parallel due to 3548 the way the fpscr is handled. */ 3549 case PARALLEL: 3550 for (int i = 0; i < XVECLEN (x, 0); i++) 3551 { 3552 rtx xx = XVECEXP (x, 0, i); 3553 if (GET_CODE (xx) == SET && MEM_P (XEXP (xx, 0))) 3554 { 3555 *total = sh_address_cost (XEXP (XEXP (xx, 0), 0), 3556 GET_MODE (XEXP (xx, 0)), 3557 MEM_ADDR_SPACE (XEXP (xx, 0)), true); 3558 return true; 3559 } 3560 if (GET_CODE (xx) == SET && MEM_P (XEXP (xx, 1))) 3561 { 3562 *total = sh_address_cost (XEXP (XEXP (xx, 1), 0), 3563 GET_MODE (XEXP (xx, 1)), 3564 MEM_ADDR_SPACE (XEXP (xx, 1)), true); 3565 return true; 3566 } 3567 } 3568 3569 if (sh_1el_vec (x, VOIDmode)) 3570 *total = outer_code != SET; 3571 else if (sh_rep_vec (x, VOIDmode)) 3572 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4 3573 + (outer_code != SET)); 3574 else 3575 *total = COSTS_N_INSNS (3) + (outer_code != SET); 3576 return true; 3577 3578 case CONST_INT: 3579 if (TARGET_SHMEDIA) 3580 { 3581 if (INTVAL (x) == 0) 3582 *total = 0; 3583 else if (outer_code == AND && and_operand ((x), DImode)) 3584 *total = 0; 3585 else if ((outer_code == IOR || outer_code == XOR 3586 || outer_code == PLUS) 3587 && CONST_OK_FOR_I10 (INTVAL (x))) 3588 *total = 0; 3589 else if (CONST_OK_FOR_I16 (INTVAL (x))) 3590 *total = COSTS_N_INSNS (outer_code != SET); 3591 else if (CONST_OK_FOR_I16 (INTVAL (x) >> 16)) 3592 *total = COSTS_N_INSNS ((outer_code != SET) + 1); 3593 else if (CONST_OK_FOR_I16 ((INTVAL (x) >> 16) >> 16)) 3594 *total = COSTS_N_INSNS ((outer_code != SET) + 2); 3595 else 3596 *total = COSTS_N_INSNS ((outer_code != SET) + 3); 3597 return true; 3598 } 3599 if (CONST_OK_FOR_I08 (INTVAL (x))) 3600 *total = 0; 3601 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR) 3602 && CONST_OK_FOR_K08 (INTVAL (x))) 3603 *total = 1; 3604 /* prepare_cmp_insn will force costly constants int registers before 3605 the cbranch[sd]i4 patterns can see them, so preserve potentially 3606 interesting ones not covered by I08 above. */ 3607 else if (outer_code == COMPARE 3608 && ((unsigned HOST_WIDE_INT) INTVAL (x) 3609 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1 3610 || INTVAL (x) == 0x7fffffff 3611 || INTVAL (x) == 0x80 || INTVAL (x) == -0x81)) 3612 *total = 1; 3613 else 3614 *total = 8; 3615 return true; 3616 3617 case EQ: 3618 /* An and with a constant compared against zero is 3619 most likely going to be a TST #imm, R0 instruction. 3620 Notice that this does not catch the zero_extract variants from 3621 the md file. */ 3622 if (XEXP (x, 1) == const0_rtx 3623 && (GET_CODE (XEXP (x, 0)) == AND 3624 || (SUBREG_P (XEXP (x, 0)) 3625 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == AND))) 3626 { 3627 *total = 1; 3628 return true; 3629 } 3630 3631 else if (XEXP (x, 1) == const0_rtx 3632 && GET_CODE (XEXP (x, 0)) == AND 3633 && CONST_INT_P (XEXP (XEXP (x, 0), 1)) 3634 && GET_CODE (XEXP (XEXP (x, 0), 0)) == ASHIFT 3635 && arith_reg_operand (XEXP (XEXP (XEXP (x, 0), 0), 0), SImode) 3636 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))) 3637 { 3638 *total = 1; 3639 return true; 3640 } 3641 else 3642 return false; 3643 3644 case SMIN: 3645 case SMAX: 3646 /* This is most likely a clips.b or clips.w insn that is being made up 3647 by combine. */ 3648 if (TARGET_SH2A 3649 && (GET_CODE (XEXP (x, 0)) == SMAX || GET_CODE (XEXP (x, 0)) == SMIN) 3650 && CONST_INT_P (XEXP (XEXP (x, 0), 1)) 3651 && REG_P (XEXP (XEXP (x, 0), 0)) 3652 && CONST_INT_P (XEXP (x, 1))) 3653 { 3654 *total = COSTS_N_INSNS (1); 3655 return true; 3656 } 3657 else 3658 return false; 3659 3660 case CONST: 3661 case LABEL_REF: 3662 case SYMBOL_REF: 3663 if (TARGET_SHMEDIA64) 3664 *total = COSTS_N_INSNS (4); 3665 else if (TARGET_SHMEDIA32) 3666 *total = COSTS_N_INSNS (2); 3667 else 3668 *total = 5; 3669 return true; 3670 3671 case CONST_DOUBLE: 3672 if (TARGET_SHMEDIA) 3673 *total = COSTS_N_INSNS (4); 3674 /* prepare_cmp_insn will force costly constants int registers before 3675 the cbranchdi4 pattern can see them, so preserve potentially 3676 interesting ones. */ 3677 else if (outer_code == COMPARE && GET_MODE (x) == DImode) 3678 *total = 1; 3679 else 3680 *total = 10; 3681 return true; 3682 3683 case CONST_VECTOR: 3684 /* FIXME: This looks broken. Only the last statement has any effect. 3685 Probably this could be folded with the PARALLEL case? */ 3686 if (x == CONST0_RTX (GET_MODE (x))) 3687 *total = 0; 3688 else if (sh_1el_vec (x, VOIDmode)) 3689 *total = outer_code != SET; 3690 if (sh_rep_vec (x, VOIDmode)) 3691 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4 3692 + (outer_code != SET)); 3693 *total = COSTS_N_INSNS (3) + (outer_code != SET); 3694 return true; 3695 3696 case PLUS: 3697 case MINUS: 3698 *total = COSTS_N_INSNS (addsubcosts (x)); 3699 return true; 3700 3701 case AND: 3702 /* Check for (and (not (reg)) (const_int 1)) which is a tst insn. */ 3703 if (GET_CODE (XEXP (x, 0)) == NOT && XEXP (x, 1) == const1_rtx) 3704 { 3705 *total = COSTS_N_INSNS (1); 3706 return true; 3707 } 3708 /* Fall through. */ 3709 3710 case XOR: 3711 case IOR: 3712 *total = COSTS_N_INSNS (and_xor_ior_costs (x, code)); 3713 return true; 3714 3715 case MULT: 3716 *total = COSTS_N_INSNS (multcosts (x)); 3717 return true; 3718 3719 case LT: 3720 case GE: 3721 /* div0s sign comparison. */ 3722 if (GET_CODE (XEXP (x, 0)) == XOR 3723 && REG_P ((XEXP (XEXP (x, 0), 0))) 3724 && REG_P ((XEXP (XEXP (x, 0), 1))) 3725 && satisfies_constraint_Z (XEXP (x, 1))) 3726 { 3727 *total = COSTS_N_INSNS (1); 3728 return true; 3729 } 3730 else 3731 return false; 3732 3733 case LSHIFTRT: 3734 /* div0s sign comparison. */ 3735 if (GET_CODE (XEXP (x, 0)) == XOR 3736 && REG_P ((XEXP (XEXP (x, 0), 0))) 3737 && REG_P ((XEXP (XEXP (x, 0), 1))) 3738 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 31) 3739 { 3740 *total = COSTS_N_INSNS (1); 3741 return true; 3742 } 3743 /* Fall through to shiftcosts. */ 3744 case ASHIFT: 3745 case ASHIFTRT: 3746 { 3747 int cost = shiftcosts (x); 3748 if (cost < 0) 3749 return false; 3750 *total = COSTS_N_INSNS (cost); 3751 return true; 3752 } 3753 3754 case DIV: 3755 case UDIV: 3756 case MOD: 3757 case UMOD: 3758 *total = COSTS_N_INSNS (20); 3759 return true; 3760 3761 case FLOAT: 3762 case FIX: 3763 *total = 100; 3764 return true; 3765 3766 default: 3767 return false; 3768 } 3769} 3770 3771/* Determine the size of the fundamental move insn that will be used 3772 for the specified mode. */ 3773static inline int 3774mov_insn_size (machine_mode mode, bool consider_sh2a) 3775{ 3776 const int mode_sz = GET_MODE_SIZE (mode); 3777 3778 if ((consider_sh2a && TARGET_SH2A_DOUBLE && mode == DFmode) 3779 || (TARGET_FMOVD && mode == DFmode)) 3780 return mode_sz; 3781 else 3782 { 3783 /* The max. available mode for actual move insns is SImode. 3784 Larger accesses will be split into multiple loads/stores. */ 3785 const int max_mov_sz = GET_MODE_SIZE (SImode); 3786 return mode_sz >= max_mov_sz ? max_mov_sz : mode_sz; 3787 } 3788} 3789 3790/* Determine the maximum possible displacement for a move insn for the 3791 specified mode. */ 3792int 3793sh_max_mov_insn_displacement (machine_mode mode, bool consider_sh2a) 3794{ 3795 /* The 4 byte displacement move insns are the same as the 2 byte 3796 versions but take a 12 bit displacement. All we need to do is to 3797 scale the max. displacement value accordingly. */ 3798 const int disp_scale = consider_sh2a ? (4095 / 15) : 1; 3799 3800 /* SH2A supports FPU move insns with 12 bit displacements. 3801 Other variants to do not support any kind of displacements for 3802 FPU move insns. */ 3803 if (! consider_sh2a && TARGET_FPU_ANY && GET_MODE_CLASS (mode) == MODE_FLOAT) 3804 return 0; 3805 else 3806 { 3807 const int mov_insn_sz = mov_insn_size (mode, consider_sh2a); 3808 const int mode_sz = GET_MODE_SIZE (mode); 3809 int r = 15 * mov_insn_sz * disp_scale; 3810 3811 /* If the mov insn will be split into multiple loads/stores, the 3812 maximum possible displacement is a bit smaller. */ 3813 if (mode_sz > mov_insn_sz) 3814 r -= mode_sz - mov_insn_sz; 3815 return r; 3816 } 3817} 3818 3819/* Determine the alignment mask for a move insn of the 3820 specified mode. */ 3821static inline int 3822mov_insn_alignment_mask (machine_mode mode, bool consider_sh2a) 3823{ 3824 const int mov_insn_sz = mov_insn_size (mode, consider_sh2a); 3825 return mov_insn_sz > 0 ? (mov_insn_sz - 1) : 0; 3826} 3827 3828/* Return the displacement value of a displacement address. */ 3829HOST_WIDE_INT 3830sh_disp_addr_displacement (rtx x) 3831{ 3832 gcc_assert (satisfies_constraint_Sdd (x)); 3833 return INTVAL (XEXP (XEXP (x, 0), 1)); 3834} 3835 3836/* Compute the cost of an address. */ 3837static int 3838sh_address_cost (rtx x, machine_mode mode, 3839 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED) 3840{ 3841 /* 'GBR + 0'. Account one more because of R0 restriction. */ 3842 if (REG_P (x) && REGNO (x) == GBR_REG) 3843 return 2; 3844 3845 /* Simple reg, post-inc, pre-dec addressing. */ 3846 if (REG_P (x) || GET_CODE (x) == POST_INC || GET_CODE (x) == PRE_DEC) 3847 return 1; 3848 3849 /* 'reg + disp' addressing. */ 3850 if (GET_CODE (x) == PLUS 3851 && REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1))) 3852 { 3853 /* 'GBR + disp'. Account one more because of R0 restriction. */ 3854 if (REGNO (XEXP (x, 0)) == GBR_REG 3855 && gbr_displacement (XEXP (x, 1), mode)) 3856 return 2; 3857 3858 const HOST_WIDE_INT offset = INTVAL (XEXP (x, 1)); 3859 3860 if (offset == 0) 3861 return 1; 3862 3863 /* The displacement would fit into a 2 byte move insn. 3864 HImode and QImode loads/stores with displacement put pressure on 3865 R0 which will most likely require another reg copy. Thus account 3866 a higher cost for that. */ 3867 if (offset > 0 && offset <= sh_max_mov_insn_displacement (mode, false)) 3868 return (mode == HImode || mode == QImode) ? 2 : 1; 3869 3870 /* The displacement would fit into a 4 byte move insn (SH2A). */ 3871 if (TARGET_SH2A 3872 && offset > 0 && offset <= sh_max_mov_insn_displacement (mode, true)) 3873 return 2; 3874 3875 /* The displacement is probably out of range and will require extra 3876 calculations. */ 3877 return 3; 3878 } 3879 3880 /* 'reg + reg' addressing. Account a slightly higher cost because of 3881 increased pressure on R0. */ 3882 if (GET_CODE (x) == PLUS && ! CONSTANT_P (XEXP (x, 1)) 3883 && ! TARGET_SHMEDIA) 3884 return 3; 3885 3886 /* Not sure what it is - probably expensive. */ 3887 return 10; 3888} 3889 3890/* Code to expand a shift. */ 3891static void 3892gen_ashift (int type, int n, rtx reg) 3893{ 3894 rtx n_rtx; 3895 3896 /* Negative values here come from the shift_amounts array. */ 3897 if (n < 0) 3898 { 3899 if (type == ASHIFT) 3900 type = LSHIFTRT; 3901 else 3902 type = ASHIFT; 3903 n = -n; 3904 } 3905 3906 n_rtx = GEN_INT (n); 3907 gcc_assert (satisfies_constraint_P27 (n_rtx)); 3908 3909 switch (type) 3910 { 3911 case ASHIFTRT: 3912 emit_insn (gen_ashrsi3_k (reg, reg, n_rtx)); 3913 break; 3914 case LSHIFTRT: 3915 if (n == 1) 3916 emit_insn (gen_shlr (reg, reg)); 3917 else 3918 emit_insn (gen_lshrsi3_k (reg, reg, n_rtx)); 3919 break; 3920 case ASHIFT: 3921 emit_insn (gen_ashlsi3_k (reg, reg, n_rtx)); 3922 break; 3923 default: 3924 gcc_unreachable (); 3925 } 3926} 3927 3928/* Code to expand a HImode shift. */ 3929static void 3930gen_ashift_hi (int type, int n, rtx reg) 3931{ 3932 /* Negative values here come from the shift_amounts array. */ 3933 if (n < 0) 3934 { 3935 if (type == ASHIFT) 3936 type = LSHIFTRT; 3937 else 3938 type = ASHIFT; 3939 n = -n; 3940 } 3941 3942 switch (type) 3943 { 3944 case ASHIFTRT: 3945 case LSHIFTRT: 3946 /* We don't have HImode right shift operations because using the 3947 ordinary 32 bit shift instructions for that doesn't generate proper 3948 zero/sign extension. 3949 gen_ashift_hi is only called in contexts where we know that the 3950 sign extension works out correctly. */ 3951 { 3952 int offset = 0; 3953 if (GET_CODE (reg) == SUBREG) 3954 { 3955 offset = SUBREG_BYTE (reg); 3956 reg = SUBREG_REG (reg); 3957 } 3958 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset)); 3959 break; 3960 } 3961 case ASHIFT: 3962 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n))); 3963 break; 3964 } 3965} 3966 3967/* Output RTL to split a constant shift into its component SH constant 3968 shift instructions. */ 3969void 3970gen_shifty_op (int code, rtx *operands) 3971{ 3972 int value = INTVAL (operands[2]); 3973 int max, i; 3974 3975 /* Truncate the shift count in case it is out of bounds. */ 3976 value = value & 31; 3977 3978 if (value == 31) 3979 { 3980 if (code == LSHIFTRT) 3981 { 3982 emit_insn (gen_rotlsi3_1 (operands[0], operands[0])); 3983 emit_insn (gen_movt (operands[0], get_t_reg_rtx ())); 3984 return; 3985 } 3986 else if (code == ASHIFT) 3987 { 3988 /* There is a two instruction sequence for 31 bit left shifts, 3989 but it requires r0. */ 3990 if (REG_P (operands[0]) && REGNO (operands[0]) == 0) 3991 { 3992 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx)); 3993 emit_insn (gen_rotlsi3_31 (operands[0], operands[0])); 3994 return; 3995 } 3996 } 3997 } 3998 else if (value == 0) 3999 { 4000 /* This can happen even when optimizing, if there were subregs before 4001 reload. Don't output a nop here, as this is never optimized away; 4002 use a no-op move instead. */ 4003 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[0])); 4004 return; 4005 } 4006 4007 max = ashl_lshr_seq[value].insn_count; 4008 for (i = 0; i < max; i++) 4009 gen_ashift (code, ashl_lshr_seq[value].amount[i], operands[0]); 4010} 4011 4012/* Same as gen_shifty_op, but optimized for values where the topmost bits 4013 don't matter. */ 4014void 4015gen_shifty_hi_op (int code, rtx *operands) 4016{ 4017 int value = INTVAL (operands[2]); 4018 int max, i; 4019 void (*gen_fun) (int, int, rtx); 4020 4021 /* This operation is used by and_shl for SImode values with a few 4022 high bits known to be cleared. */ 4023 value &= 31; 4024 if (value == 0) 4025 { 4026 emit_insn (gen_nop ()); 4027 return; 4028 } 4029 4030 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift; 4031 if (code == ASHIFT) 4032 { 4033 max = ext_ashl_lshr_seq[value].insn_count; 4034 for (i = 0; i < max; i++) 4035 gen_fun (code, ext_ashl_lshr_seq[value].amount[i], operands[0]); 4036 } 4037 else 4038 /* When shifting right, emit the shifts in reverse order, so that 4039 solitary negative values come first. */ 4040 for (i = ext_ashl_lshr_seq[value].insn_count - 1; i >= 0; i--) 4041 gen_fun (code, ext_ashl_lshr_seq[value].amount[i], operands[0]); 4042} 4043 4044/* Output RTL for an arithmetic right shift. 4045 ??? Rewrite to use super-optimizer sequences. */ 4046bool 4047expand_ashiftrt (rtx *operands) 4048{ 4049 rtx wrk; 4050 char func[18]; 4051 int value; 4052 4053 if (TARGET_DYNSHIFT) 4054 { 4055 if (!CONST_INT_P (operands[2])) 4056 { 4057 rtx count = copy_to_mode_reg (SImode, operands[2]); 4058 emit_insn (gen_negsi2 (count, count)); 4059 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count)); 4060 return true; 4061 } 4062 else if (ashiftrt_insns[INTVAL (operands[2]) & 31] 4063 > 1 + SH_DYNAMIC_SHIFT_COST) 4064 { 4065 rtx count 4066 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31))); 4067 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count)); 4068 return true; 4069 } 4070 } 4071 if (!CONST_INT_P (operands[2])) 4072 return false; 4073 4074 value = INTVAL (operands[2]) & 31; 4075 4076 if (value == 31) 4077 { 4078 /* If we are called from abs expansion, arrange things so that we 4079 we can use a single MT instruction that doesn't clobber the source, 4080 if LICM can hoist out the load of the constant zero. */ 4081 if (currently_expanding_to_rtl) 4082 { 4083 emit_insn (gen_cmpgtsi_t (force_reg (SImode, CONST0_RTX (SImode)), 4084 operands[1])); 4085 emit_insn (gen_mov_neg_si_t (operands[0], get_t_reg_rtx ())); 4086 return true; 4087 } 4088 emit_insn (gen_ashrsi2_31 (operands[0], operands[1])); 4089 return true; 4090 } 4091 else if (value >= 16 && value <= 19) 4092 { 4093 wrk = gen_reg_rtx (SImode); 4094 emit_insn (gen_ashrsi2_16 (wrk, operands[1])); 4095 value -= 16; 4096 while (value--) 4097 gen_ashift (ASHIFTRT, 1, wrk); 4098 emit_move_insn (operands[0], wrk); 4099 return true; 4100 } 4101 /* Expand a short sequence inline, longer call a magic routine. */ 4102 else if (value <= 5) 4103 { 4104 wrk = gen_reg_rtx (SImode); 4105 emit_move_insn (wrk, operands[1]); 4106 while (value--) 4107 gen_ashift (ASHIFTRT, 1, wrk); 4108 emit_move_insn (operands[0], wrk); 4109 return true; 4110 } 4111 4112 wrk = gen_reg_rtx (Pmode); 4113 4114 /* Load the value into an arg reg and call a helper. */ 4115 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]); 4116 sprintf (func, "__ashiftrt_r4_%d", value); 4117 function_symbol (wrk, func, SFUNC_STATIC); 4118 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk)); 4119 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4)); 4120 return true; 4121} 4122 4123/* Try to find a good way to implement the combiner pattern 4124 [(set (match_operand:SI 0 "register_operand" "r") 4125 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r") 4126 (match_operand:SI 2 "const_int_operand" "n")) 4127 (match_operand:SI 3 "const_int_operand" "n"))) . 4128 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3. 4129 return 0 for simple right / left or left/right shift combination. 4130 return 1 for a combination of shifts with zero_extend. 4131 return 2 for a combination of shifts with an AND that needs r0. 4132 return 3 for a combination of shifts with an AND that needs an extra 4133 scratch register, when the three highmost bits of the AND mask are clear. 4134 return 4 for a combination of shifts with an AND that needs an extra 4135 scratch register, when any of the three highmost bits of the AND mask 4136 is set. 4137 If ATTRP is set, store an initial right shift width in ATTRP[0], 4138 and the instruction length in ATTRP[1] . These values are not valid 4139 when returning 0. 4140 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into 4141 shift_amounts for the last shift value that is to be used before the 4142 sign extend. */ 4143int 4144shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp) 4145{ 4146 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2; 4147 int left = INTVAL (left_rtx), right; 4148 int best = 0; 4149 int cost, best_cost = 10000; 4150 int best_right = 0, best_len = 0; 4151 int i; 4152 int can_ext; 4153 4154 if (left < 0 || left > 31) 4155 return 0; 4156 if (CONST_INT_P (mask_rtx)) 4157 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left; 4158 else 4159 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left; 4160 /* Can this be expressed as a right shift / left shift pair? */ 4161 lsb = ((mask ^ (mask - 1)) >> 1) + 1; 4162 right = exact_log2 (lsb); 4163 mask2 = ~(mask + lsb - 1); 4164 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1; 4165 /* mask has no zeroes but trailing zeroes <==> ! mask2 */ 4166 if (! mask2) 4167 best_cost = ashl_lshr_seq[right].insn_count 4168 + ashl_lshr_seq[right + left].insn_count; 4169 /* mask has no trailing zeroes <==> ! right */ 4170 else if (! right && mask2 == ~(lsb2 - 1)) 4171 { 4172 int late_right = exact_log2 (lsb2); 4173 best_cost = ashl_lshr_seq[left + late_right].insn_count 4174 + ashl_lshr_seq[late_right].insn_count; 4175 } 4176 /* Try to use zero extend. */ 4177 if (mask2 == ~(lsb2 - 1)) 4178 { 4179 int width, first; 4180 4181 for (width = 8; width <= 16; width += 8) 4182 { 4183 /* Can we zero-extend right away? */ 4184 if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width) 4185 { 4186 cost = 1 + ext_ashl_lshr_seq[right].insn_count 4187 + ext_ashl_lshr_seq[left + right].insn_count; 4188 if (cost < best_cost) 4189 { 4190 best = 1; 4191 best_cost = cost; 4192 best_right = right; 4193 best_len = cost; 4194 if (attrp) 4195 attrp[2] = -1; 4196 } 4197 continue; 4198 } 4199 /* ??? Could try to put zero extend into initial right shift, 4200 or even shift a bit left before the right shift. */ 4201 /* Determine value of first part of left shift, to get to the 4202 zero extend cut-off point. */ 4203 first = width - exact_log2 (lsb2) + right; 4204 if (first >= 0 && right + left - first >= 0) 4205 { 4206 cost = ext_ashl_lshr_seq[right].insn_count 4207 + ext_ashl_lshr_seq[first].insn_count + 1 4208 + ext_ashl_lshr_seq[right + left - first].insn_count; 4209 4210 if (cost < best_cost) 4211 { 4212 best = 1; 4213 best_cost = cost; 4214 best_right = right; 4215 best_len = cost; 4216 if (attrp) 4217 attrp[2] = first; 4218 } 4219 } 4220 } 4221 } 4222 /* Try to use r0 AND pattern */ 4223 for (i = 0; i <= 2; i++) 4224 { 4225 if (i > right) 4226 break; 4227 if (! CONST_OK_FOR_K08 (mask >> i)) 4228 continue; 4229 cost = (i != 0) + 2 + ext_ashl_lshr_seq[left + i].insn_count; 4230 if (cost < best_cost) 4231 { 4232 best = 2; 4233 best_cost = cost; 4234 best_right = i; 4235 best_len = cost - 1; 4236 } 4237 } 4238 /* Try to use a scratch register to hold the AND operand. */ 4239 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT) 3 << 30)) == 0; 4240 for (i = 0; i <= 2; i++) 4241 { 4242 if (i > right) 4243 break; 4244 cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3) 4245 + (can_ext 4246 ? ext_ashl_lshr_seq 4247 : ashl_lshr_seq)[left + i].insn_count; 4248 if (cost < best_cost) 4249 { 4250 best = 4 - can_ext; 4251 best_cost = cost; 4252 best_right = i; 4253 best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i); 4254 } 4255 } 4256 4257 if (attrp) 4258 { 4259 attrp[0] = best_right; 4260 attrp[1] = best_len; 4261 } 4262 return best; 4263} 4264 4265/* This is used in length attributes of the unnamed instructions 4266 corresponding to shl_and_kind return values of 1 and 2. */ 4267int 4268shl_and_length (rtx insn) 4269{ 4270 rtx set_src, left_rtx, mask_rtx; 4271 int attributes[3]; 4272 4273 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0)); 4274 left_rtx = XEXP (XEXP (set_src, 0), 1); 4275 mask_rtx = XEXP (set_src, 1); 4276 shl_and_kind (left_rtx, mask_rtx, attributes); 4277 return attributes[1]; 4278} 4279 4280/* This is used in length attribute of the and_shl_scratch instruction. */ 4281int 4282shl_and_scr_length (rtx insn) 4283{ 4284 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0)); 4285 int len = ashl_lshr_seq[INTVAL (XEXP (set_src, 1)) & 31].insn_count; 4286 rtx op = XEXP (set_src, 0); 4287 len += ashl_lshr_seq[INTVAL (XEXP (op, 1)) & 31].insn_count + 1; 4288 op = XEXP (XEXP (op, 0), 0); 4289 return len + ashl_lshr_seq[INTVAL (XEXP (op, 1)) & 31].insn_count; 4290} 4291 4292/* Generate rtl for instructions for which shl_and_kind advised a particular 4293 method of generating them, i.e. returned zero. */ 4294bool 4295gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source) 4296{ 4297 int attributes[3]; 4298 unsigned HOST_WIDE_INT mask; 4299 int kind = shl_and_kind (left_rtx, mask_rtx, attributes); 4300 int right, total_shift; 4301 void (*shift_gen_fun) (int, rtx *) = gen_shifty_hi_op; 4302 4303 right = attributes[0]; 4304 total_shift = INTVAL (left_rtx) + right; 4305 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift; 4306 switch (kind) 4307 { 4308 default: 4309 return true; 4310 case 1: 4311 { 4312 int first = attributes[2]; 4313 rtx operands[3]; 4314 4315 if (first < 0) 4316 { 4317 emit_insn ((mask << right) <= 0xff 4318 ? gen_zero_extendqisi2 (dest, 4319 gen_lowpart (QImode, source)) 4320 : gen_zero_extendhisi2 (dest, 4321 gen_lowpart (HImode, source))); 4322 source = dest; 4323 } 4324 if (source != dest) 4325 emit_insn (gen_movsi (dest, source)); 4326 operands[0] = dest; 4327 if (right) 4328 { 4329 operands[2] = GEN_INT (right); 4330 gen_shifty_hi_op (LSHIFTRT, operands); 4331 } 4332 if (first > 0) 4333 { 4334 operands[2] = GEN_INT (first); 4335 gen_shifty_hi_op (ASHIFT, operands); 4336 total_shift -= first; 4337 mask <<= first; 4338 } 4339 if (first >= 0) 4340 emit_insn (mask <= 0xff 4341 ? gen_zero_extendqisi2 (dest, gen_lowpart (QImode, dest)) 4342 : gen_zero_extendhisi2 (dest, gen_lowpart (HImode, dest))); 4343 if (total_shift > 0) 4344 { 4345 operands[2] = GEN_INT (total_shift); 4346 gen_shifty_hi_op (ASHIFT, operands); 4347 } 4348 break; 4349 } 4350 case 4: 4351 shift_gen_fun = gen_shifty_op; 4352 case 3: 4353 /* If the topmost bit that matters is set, set the topmost bits 4354 that don't matter. This way, we might be able to get a shorter 4355 signed constant. */ 4356 if (mask & ((HOST_WIDE_INT) 1 << (31 - total_shift))) 4357 mask |= (HOST_WIDE_INT) ~0 << (31 - total_shift); 4358 case 2: 4359 /* Don't expand fine-grained when combining, because that will 4360 make the pattern fail. */ 4361 if (currently_expanding_to_rtl 4362 || reload_in_progress || reload_completed) 4363 { 4364 rtx operands[3]; 4365 4366 /* Cases 3 and 4 should be handled by this split 4367 only while combining */ 4368 gcc_assert (kind <= 2); 4369 if (right) 4370 { 4371 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right))); 4372 source = dest; 4373 } 4374 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask))); 4375 if (total_shift) 4376 { 4377 operands[0] = dest; 4378 operands[1] = dest; 4379 operands[2] = GEN_INT (total_shift); 4380 shift_gen_fun (ASHIFT, operands); 4381 } 4382 break; 4383 } 4384 else 4385 { 4386 int neg = 0; 4387 if (kind != 4 && total_shift < 16) 4388 { 4389 neg = -ext_ashl_lshr_seq[total_shift].amount[1]; 4390 if (neg > 0) 4391 neg -= ext_ashl_lshr_seq[total_shift].amount[2]; 4392 else 4393 neg = 0; 4394 } 4395 emit_insn (gen_and_shl_scratch (dest, source, 4396 GEN_INT (right), 4397 GEN_INT (mask), 4398 GEN_INT (total_shift + neg), 4399 GEN_INT (neg))); 4400 emit_insn (gen_movsi (dest, dest)); 4401 break; 4402 } 4403 } 4404 return false; 4405} 4406 4407/* Try to find a good way to implement the combiner pattern 4408 [(set (match_operand:SI 0 "register_operand" "=r") 4409 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r") 4410 (match_operand:SI 2 "const_int_operand" "n") 4411 (match_operand:SI 3 "const_int_operand" "n") 4412 (const_int 0))) 4413 (clobber (reg:SI T_REG))] 4414 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3. 4415 return 0 for simple left / right shift combination. 4416 return 1 for left shift / 8 bit sign extend / left shift. 4417 return 2 for left shift / 16 bit sign extend / left shift. 4418 return 3 for left shift / 8 bit sign extend / shift / sign extend. 4419 return 4 for left shift / 16 bit sign extend / shift / sign extend. 4420 return 5 for left shift / 16 bit sign extend / right shift 4421 return 6 for < 8 bit sign extend / left shift. 4422 return 7 for < 8 bit sign extend / left shift / single right shift. 4423 If COSTP is nonzero, assign the calculated cost to *COSTP. */ 4424int 4425shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp) 4426{ 4427 int left, size, insize, ext; 4428 int cost = 0, best_cost; 4429 int kind; 4430 4431 left = INTVAL (left_rtx); 4432 size = INTVAL (size_rtx); 4433 insize = size - left; 4434 gcc_assert (insize > 0); 4435 /* Default to left / right shift. */ 4436 kind = 0; 4437 best_cost = ashl_lshr_seq[32 - insize].insn_count 4438 + ashl_lshr_seq[32 - size].insn_count; 4439 if (size <= 16) 4440 { 4441 /* 16 bit shift / sign extend / 16 bit shift */ 4442 cost = ashl_lshr_seq[16 - insize].insn_count + 1 4443 + ashl_lshr_seq[16 - size].insn_count; 4444 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden 4445 below, by alternative 3 or something even better. */ 4446 if (cost < best_cost) 4447 { 4448 kind = 5; 4449 best_cost = cost; 4450 } 4451 } 4452 /* Try a plain sign extend between two shifts. */ 4453 for (ext = 16; ext >= insize; ext -= 8) 4454 { 4455 if (ext <= size) 4456 { 4457 cost = ext_ashl_lshr_seq[ext - insize].insn_count + 1 4458 + ashl_lshr_seq[size - ext].insn_count; 4459 if (cost < best_cost) 4460 { 4461 kind = ext / (unsigned) 8; 4462 best_cost = cost; 4463 } 4464 } 4465 /* Check if we can do a sloppy shift with a final signed shift 4466 restoring the sign. */ 4467 if (EXT_SHIFT_SIGNED (size - ext)) 4468 cost = ext_ashl_lshr_seq[ext - insize].insn_count 4469 + ext_ashl_lshr_seq[size - ext].insn_count + 1; 4470 /* If not, maybe it's still cheaper to do the second shift sloppy, 4471 and do a final sign extend? */ 4472 else if (size <= 16) 4473 cost = ext_ashl_lshr_seq[ext - insize].insn_count + 1 4474 + ext_ashl_lshr_seq[size > ext ? size - ext : ext - size].insn_count 4475 + 1; 4476 else 4477 continue; 4478 if (cost < best_cost) 4479 { 4480 kind = ext / (unsigned) 8 + 2; 4481 best_cost = cost; 4482 } 4483 } 4484 /* Check if we can sign extend in r0 */ 4485 if (insize < 8) 4486 { 4487 cost = 3 + ashl_lshr_seq[left].insn_count; 4488 if (cost < best_cost) 4489 { 4490 kind = 6; 4491 best_cost = cost; 4492 } 4493 /* Try the same with a final signed shift. */ 4494 if (left < 31) 4495 { 4496 cost = 3 + ext_ashl_lshr_seq[left + 1].insn_count + 1; 4497 if (cost < best_cost) 4498 { 4499 kind = 7; 4500 best_cost = cost; 4501 } 4502 } 4503 } 4504 if (TARGET_DYNSHIFT) 4505 { 4506 /* Try to use a dynamic shift. */ 4507 cost = ashl_lshr_seq[32 - insize].insn_count + 1 + SH_DYNAMIC_SHIFT_COST; 4508 if (cost < best_cost) 4509 { 4510 kind = 0; 4511 best_cost = cost; 4512 } 4513 } 4514 if (costp) 4515 *costp = cost; 4516 return kind; 4517} 4518 4519/* Function to be used in the length attribute of the instructions 4520 implementing this pattern. */ 4521int 4522shl_sext_length (rtx insn) 4523{ 4524 rtx set_src, left_rtx, size_rtx; 4525 int cost; 4526 4527 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0)); 4528 left_rtx = XEXP (XEXP (set_src, 0), 1); 4529 size_rtx = XEXP (set_src, 1); 4530 shl_sext_kind (left_rtx, size_rtx, &cost); 4531 return cost; 4532} 4533 4534/* Generate rtl for this pattern */ 4535bool 4536gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source) 4537{ 4538 int kind; 4539 int left, size, insize, cost; 4540 rtx operands[3]; 4541 4542 kind = shl_sext_kind (left_rtx, size_rtx, &cost); 4543 left = INTVAL (left_rtx); 4544 size = INTVAL (size_rtx); 4545 insize = size - left; 4546 switch (kind) 4547 { 4548 case 1: 4549 case 2: 4550 case 3: 4551 case 4: 4552 { 4553 int ext = kind & 1 ? 8 : 16; 4554 int shift2 = size - ext; 4555 4556 /* Don't expand fine-grained when combining, because that will 4557 make the pattern fail. */ 4558 if (! currently_expanding_to_rtl 4559 && ! reload_in_progress && ! reload_completed) 4560 { 4561 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx)); 4562 emit_insn (gen_movsi (dest, source)); 4563 break; 4564 } 4565 if (dest != source) 4566 emit_insn (gen_movsi (dest, source)); 4567 operands[0] = dest; 4568 if (ext - insize) 4569 { 4570 operands[2] = GEN_INT (ext - insize); 4571 gen_shifty_hi_op (ASHIFT, operands); 4572 } 4573 emit_insn (kind & 1 4574 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest)) 4575 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest))); 4576 if (kind <= 2) 4577 { 4578 if (shift2) 4579 { 4580 operands[2] = GEN_INT (shift2); 4581 gen_shifty_op (ASHIFT, operands); 4582 } 4583 } 4584 else 4585 { 4586 if (shift2 > 0) 4587 { 4588 if (EXT_SHIFT_SIGNED (shift2)) 4589 { 4590 operands[2] = GEN_INT (shift2 + 1); 4591 gen_shifty_op (ASHIFT, operands); 4592 operands[2] = const1_rtx; 4593 gen_shifty_op (ASHIFTRT, operands); 4594 break; 4595 } 4596 operands[2] = GEN_INT (shift2); 4597 gen_shifty_hi_op (ASHIFT, operands); 4598 } 4599 else if (shift2) 4600 { 4601 operands[2] = GEN_INT (-shift2); 4602 gen_shifty_hi_op (LSHIFTRT, operands); 4603 } 4604 emit_insn (size <= 8 4605 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest)) 4606 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest))); 4607 } 4608 break; 4609 } 4610 case 5: 4611 { 4612 int i = 16 - size; 4613 if (! currently_expanding_to_rtl 4614 && ! reload_in_progress && ! reload_completed) 4615 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx)); 4616 else 4617 { 4618 operands[0] = dest; 4619 operands[2] = GEN_INT (16 - insize); 4620 gen_shifty_hi_op (ASHIFT, operands); 4621 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest))); 4622 } 4623 /* Don't use gen_ashrsi3 because it generates new pseudos. */ 4624 while (--i >= 0) 4625 gen_ashift (ASHIFTRT, 1, dest); 4626 break; 4627 } 4628 case 6: 4629 case 7: 4630 /* Don't expand fine-grained when combining, because that will 4631 make the pattern fail. */ 4632 if (! currently_expanding_to_rtl 4633 && ! reload_in_progress && ! reload_completed) 4634 { 4635 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx)); 4636 emit_insn (gen_movsi (dest, source)); 4637 break; 4638 } 4639 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1))); 4640 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1)))); 4641 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1)))); 4642 operands[0] = dest; 4643 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx; 4644 gen_shifty_op (ASHIFT, operands); 4645 if (kind == 7) 4646 emit_insn (gen_ashrsi3_k (dest, dest, const1_rtx)); 4647 break; 4648 default: 4649 return true; 4650 } 4651 return false; 4652} 4653 4654/* Prefix a symbol_ref name with "datalabel". */ 4655rtx 4656gen_datalabel_ref (rtx sym) 4657{ 4658 const char *str; 4659 4660 if (GET_CODE (sym) == LABEL_REF) 4661 return gen_rtx_CONST (GET_MODE (sym), 4662 gen_rtx_UNSPEC (GET_MODE (sym), 4663 gen_rtvec (1, sym), 4664 UNSPEC_DATALABEL)); 4665 4666 gcc_assert (GET_CODE (sym) == SYMBOL_REF); 4667 4668 str = XSTR (sym, 0); 4669 /* Share all SYMBOL_REF strings with the same value - that is important 4670 for cse. */ 4671 str = IDENTIFIER_POINTER (get_identifier (str)); 4672 XSTR (sym, 0) = str; 4673 4674 return sym; 4675} 4676 4677 4678static alloc_pool label_ref_list_pool; 4679 4680typedef struct label_ref_list_d 4681{ 4682 rtx_code_label *label; 4683 struct label_ref_list_d *next; 4684} *label_ref_list_t; 4685 4686/* The SH cannot load a large constant into a register, constants have to 4687 come from a pc relative load. The reference of a pc relative load 4688 instruction must be less than 1k in front of the instruction. This 4689 means that we often have to dump a constant inside a function, and 4690 generate code to branch around it. 4691 4692 It is important to minimize this, since the branches will slow things 4693 down and make things bigger. 4694 4695 Worst case code looks like: 4696 4697 mov.l L1,rn 4698 bra L2 4699 nop 4700 align 4701 L1: .long value 4702 L2: 4703 .. 4704 4705 mov.l L3,rn 4706 bra L4 4707 nop 4708 align 4709 L3: .long value 4710 L4: 4711 .. 4712 4713 We fix this by performing a scan before scheduling, which notices which 4714 instructions need to have their operands fetched from the constant table 4715 and builds the table. 4716 4717 The algorithm is: 4718 4719 scan, find an instruction which needs a pcrel move. Look forward, find the 4720 last barrier which is within MAX_COUNT bytes of the requirement. 4721 If there isn't one, make one. Process all the instructions between 4722 the find and the barrier. 4723 4724 In the above example, we can tell that L3 is within 1k of L1, so 4725 the first move can be shrunk from the 3 insn+constant sequence into 4726 just 1 insn, and the constant moved to L3 to make: 4727 4728 mov.l L1,rn 4729 .. 4730 mov.l L3,rn 4731 bra L4 4732 nop 4733 align 4734 L3:.long value 4735 L4:.long value 4736 4737 Then the second move becomes the target for the shortening process. */ 4738 4739typedef struct 4740{ 4741 rtx value; /* Value in table. */ 4742 rtx_code_label *label; /* Label of value. */ 4743 label_ref_list_t wend; /* End of window. */ 4744 machine_mode mode; /* Mode of value. */ 4745 4746 /* True if this constant is accessed as part of a post-increment 4747 sequence. Note that HImode constants are never accessed in this way. */ 4748 bool part_of_sequence_p; 4749} pool_node; 4750 4751/* The maximum number of constants that can fit into one pool, since 4752 constants in the range 0..510 are at least 2 bytes long, and in the 4753 range from there to 1018 at least 4 bytes. */ 4754 4755#define MAX_POOL_SIZE 372 4756static pool_node pool_vector[MAX_POOL_SIZE]; 4757static int pool_size; 4758static rtx_code_label *pool_window_label; 4759static int pool_window_last; 4760 4761static int max_labelno_before_reorg; 4762 4763/* ??? If we need a constant in HImode which is the truncated value of a 4764 constant we need in SImode, we could combine the two entries thus saving 4765 two bytes. Is this common enough to be worth the effort of implementing 4766 it? */ 4767 4768/* ??? This stuff should be done at the same time that we shorten branches. 4769 As it is now, we must assume that all branches are the maximum size, and 4770 this causes us to almost always output constant pools sooner than 4771 necessary. */ 4772 4773/* Add a constant to the pool and return its label. */ 4774static rtx_code_label * 4775add_constant (rtx x, machine_mode mode, rtx last_value) 4776{ 4777 int i; 4778 rtx_code_label *lab, *new_rtx; 4779 label_ref_list_t ref, newref; 4780 4781 /* First see if we've already got it. */ 4782 for (i = 0; i < pool_size; i++) 4783 { 4784 if (x->code == pool_vector[i].value->code 4785 && mode == pool_vector[i].mode) 4786 { 4787 if (x->code == CODE_LABEL) 4788 { 4789 if (XINT (x, 3) != XINT (pool_vector[i].value, 3)) 4790 continue; 4791 } 4792 if (rtx_equal_p (x, pool_vector[i].value)) 4793 { 4794 lab = new_rtx = 0; 4795 if (! last_value 4796 || ! i 4797 || ! rtx_equal_p (last_value, pool_vector[i-1].value)) 4798 { 4799 new_rtx = gen_label_rtx (); 4800 LABEL_REFS (new_rtx) = pool_vector[i].label; 4801 pool_vector[i].label = lab = new_rtx; 4802 } 4803 if (lab && pool_window_label) 4804 { 4805 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool); 4806 newref->label = pool_window_label; 4807 ref = pool_vector[pool_window_last].wend; 4808 newref->next = ref; 4809 pool_vector[pool_window_last].wend = newref; 4810 } 4811 if (new_rtx) 4812 pool_window_label = new_rtx; 4813 pool_window_last = i; 4814 return lab; 4815 } 4816 } 4817 } 4818 4819 /* Need a new one. */ 4820 pool_vector[pool_size].value = x; 4821 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value)) 4822 { 4823 lab = 0; 4824 pool_vector[pool_size - 1].part_of_sequence_p = true; 4825 } 4826 else 4827 lab = gen_label_rtx (); 4828 pool_vector[pool_size].mode = mode; 4829 pool_vector[pool_size].label = lab; 4830 pool_vector[pool_size].wend = NULL; 4831 pool_vector[pool_size].part_of_sequence_p = (lab == 0); 4832 if (lab && pool_window_label) 4833 { 4834 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool); 4835 newref->label = pool_window_label; 4836 ref = pool_vector[pool_window_last].wend; 4837 newref->next = ref; 4838 pool_vector[pool_window_last].wend = newref; 4839 } 4840 if (lab) 4841 pool_window_label = lab; 4842 pool_window_last = pool_size; 4843 pool_size++; 4844 return lab; 4845} 4846 4847/* Output the literal table. START, if nonzero, is the first instruction 4848 this table is needed for, and also indicates that there is at least one 4849 casesi_worker_2 instruction; We have to emit the operand3 labels from 4850 these insns at a 4-byte aligned position. BARRIER is the barrier 4851 after which we are to place the table. */ 4852static void 4853dump_table (rtx_insn *start, rtx_insn *barrier) 4854{ 4855 rtx_insn *scan = barrier; 4856 int i; 4857 bool need_align = true; 4858 rtx lab; 4859 label_ref_list_t ref; 4860 bool have_df = false; 4861 4862 /* Do two passes, first time dump out the HI sized constants. */ 4863 4864 for (i = 0; i < pool_size; i++) 4865 { 4866 pool_node *p = &pool_vector[i]; 4867 4868 if (p->mode == HImode) 4869 { 4870 if (need_align) 4871 { 4872 scan = emit_insn_after (gen_align_2 (), scan); 4873 need_align = false; 4874 } 4875 for (lab = p->label; lab; lab = LABEL_REFS (lab)) 4876 scan = emit_label_after (lab, scan); 4877 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx), 4878 scan); 4879 for (ref = p->wend; ref; ref = ref->next) 4880 { 4881 lab = ref->label; 4882 scan = emit_insn_after (gen_consttable_window_end (lab), scan); 4883 } 4884 } 4885 else if (p->mode == DFmode) 4886 have_df = true; 4887 } 4888 4889 need_align = true; 4890 4891 if (start) 4892 { 4893 scan = emit_insn_after (gen_align_4 (), scan); 4894 need_align = false; 4895 for (; start != barrier; start = NEXT_INSN (start)) 4896 if (NONJUMP_INSN_P (start) 4897 && recog_memoized (start) == CODE_FOR_casesi_worker_2) 4898 { 4899 rtx src = SET_SRC (XVECEXP (PATTERN (start), 0, 0)); 4900 rtx lab = XEXP (XVECEXP (src, 0, 3), 0); 4901 4902 scan = emit_label_after (lab, scan); 4903 } 4904 } 4905 if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df) 4906 { 4907 rtx_insn *align_insn = NULL; 4908 4909 scan = emit_label_after (gen_label_rtx (), scan); 4910 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan); 4911 need_align = false; 4912 4913 for (i = 0; i < pool_size; i++) 4914 { 4915 pool_node *p = &pool_vector[i]; 4916 4917 switch (p->mode) 4918 { 4919 case HImode: 4920 break; 4921 case SImode: 4922 case SFmode: 4923 if (align_insn && !p->part_of_sequence_p) 4924 { 4925 for (lab = p->label; lab; lab = LABEL_REFS (lab)) 4926 emit_label_before (lab, align_insn); 4927 emit_insn_before (gen_consttable_4 (p->value, const0_rtx), 4928 align_insn); 4929 for (ref = p->wend; ref; ref = ref->next) 4930 { 4931 lab = ref->label; 4932 emit_insn_before (gen_consttable_window_end (lab), 4933 align_insn); 4934 } 4935 delete_insn (align_insn); 4936 align_insn = NULL; 4937 continue; 4938 } 4939 else 4940 { 4941 for (lab = p->label; lab; lab = LABEL_REFS (lab)) 4942 scan = emit_label_after (lab, scan); 4943 scan = emit_insn_after (gen_consttable_4 (p->value, 4944 const0_rtx), scan); 4945 need_align = ! need_align; 4946 } 4947 break; 4948 case DFmode: 4949 if (need_align) 4950 { 4951 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan); 4952 align_insn = scan; 4953 need_align = false; 4954 } 4955 case DImode: 4956 for (lab = p->label; lab; lab = LABEL_REFS (lab)) 4957 scan = emit_label_after (lab, scan); 4958 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx), 4959 scan); 4960 break; 4961 default: 4962 gcc_unreachable (); 4963 } 4964 4965 if (p->mode != HImode) 4966 { 4967 for (ref = p->wend; ref; ref = ref->next) 4968 { 4969 lab = ref->label; 4970 scan = emit_insn_after (gen_consttable_window_end (lab), 4971 scan); 4972 } 4973 } 4974 } 4975 4976 pool_size = 0; 4977 } 4978 4979 for (i = 0; i < pool_size; i++) 4980 { 4981 pool_node *p = &pool_vector[i]; 4982 4983 switch (p->mode) 4984 { 4985 case HImode: 4986 break; 4987 case SImode: 4988 case SFmode: 4989 if (need_align) 4990 { 4991 need_align = false; 4992 scan = emit_label_after (gen_label_rtx (), scan); 4993 scan = emit_insn_after (gen_align_4 (), scan); 4994 } 4995 for (lab = p->label; lab; lab = LABEL_REFS (lab)) 4996 scan = emit_label_after (lab, scan); 4997 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx), 4998 scan); 4999 break; 5000 case DFmode: 5001 case DImode: 5002 if (need_align) 5003 { 5004 need_align = false; 5005 scan = emit_label_after (gen_label_rtx (), scan); 5006 scan = emit_insn_after (gen_align_4 (), scan); 5007 } 5008 for (lab = p->label; lab; lab = LABEL_REFS (lab)) 5009 scan = emit_label_after (lab, scan); 5010 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx), 5011 scan); 5012 break; 5013 default: 5014 gcc_unreachable (); 5015 } 5016 5017 if (p->mode != HImode) 5018 { 5019 for (ref = p->wend; ref; ref = ref->next) 5020 { 5021 lab = ref->label; 5022 scan = emit_insn_after (gen_consttable_window_end (lab), scan); 5023 } 5024 } 5025 } 5026 5027 scan = emit_insn_after (gen_consttable_end (), scan); 5028 scan = emit_barrier_after (scan); 5029 pool_size = 0; 5030 pool_window_label = NULL; 5031 pool_window_last = 0; 5032} 5033 5034#define MOVA_LABELREF(mova) XVECEXP (SET_SRC (PATTERN (mova)), 0, 0) 5035 5036/* Nonzero if the insn is a move instruction which needs to be fixed. */ 5037 5038/* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the 5039 CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't 5040 need to fix it if the input value is CONST_OK_FOR_I08. */ 5041static bool 5042broken_move (rtx_insn *insn) 5043{ 5044 if (NONJUMP_INSN_P (insn)) 5045 { 5046 rtx pat = PATTERN (insn); 5047 if (GET_CODE (pat) == PARALLEL) 5048 pat = XVECEXP (pat, 0, 0); 5049 if (GET_CODE (pat) == SET 5050 /* We can load any 8-bit value if we don't care what the high 5051 order bits end up as. */ 5052 && GET_MODE (SET_DEST (pat)) != QImode 5053 && (CONSTANT_P (SET_SRC (pat)) 5054 || (GET_CODE (SET_SRC (pat)) == UNSPEC_VOLATILE 5055 && XINT (SET_SRC (pat), 1) == UNSPECV_SP_SWITCH_B) 5056 /* Match mova_const. */ 5057 || (GET_CODE (SET_SRC (pat)) == UNSPEC 5058 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA 5059 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST)) 5060 && ! (TARGET_SH2E 5061 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE 5062 && (fp_zero_operand (SET_SRC (pat)) 5063 || fp_one_operand (SET_SRC (pat))) 5064 /* In general we don't know the current setting of fpscr, so 5065 disable fldi. 5066 There is an exception if this was a register-register move 5067 before reload - and hence it was ascertained that we have 5068 single precision setting - and in a post-reload optimization 5069 we changed this to do a constant load. In that case 5070 we don't have an r0 clobber, hence we must use fldi. */ 5071 && (TARGET_FMOVD 5072 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0)) 5073 == SCRATCH)) 5074 && REG_P (SET_DEST (pat)) 5075 && FP_REGISTER_P (REGNO (SET_DEST (pat)))) 5076 && ! (TARGET_SH2A 5077 && GET_MODE (SET_DEST (pat)) == SImode 5078 && (satisfies_constraint_I20 (SET_SRC (pat)) 5079 || satisfies_constraint_I28 (SET_SRC (pat)))) 5080 && ! satisfies_constraint_I08 (SET_SRC (pat))) 5081 return true; 5082 } 5083 5084 return false; 5085} 5086 5087/* Return true if the specified insn is a mova insn. */ 5088static bool 5089mova_p (rtx_insn *insn) 5090{ 5091 return (NONJUMP_INSN_P (insn) 5092 && GET_CODE (PATTERN (insn)) == SET 5093 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC 5094 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA 5095 /* Don't match mova_const. */ 5096 && GET_CODE (MOVA_LABELREF (insn)) == LABEL_REF); 5097} 5098 5099/* Fix up a mova from a switch that went out of range. */ 5100static void 5101fixup_mova (rtx_insn *mova) 5102{ 5103 PUT_MODE (XEXP (MOVA_LABELREF (mova), 0), QImode); 5104 if (! flag_pic) 5105 { 5106 SET_SRC (PATTERN (mova)) = MOVA_LABELREF (mova); 5107 INSN_CODE (mova) = -1; 5108 } 5109 else 5110 { 5111 rtx_insn *worker = mova; 5112 rtx_code_label *lab = gen_label_rtx (); 5113 rtx wpat, wpat0, wpat1, wsrc, target, base, diff; 5114 5115 do 5116 { 5117 worker = NEXT_INSN (worker); 5118 gcc_assert (worker 5119 && !LABEL_P (worker) 5120 && !JUMP_P (worker)); 5121 } while (NOTE_P (worker) 5122 || recog_memoized (worker) != CODE_FOR_casesi_worker_1); 5123 wpat = PATTERN (worker); 5124 wpat0 = XVECEXP (wpat, 0, 0); 5125 wpat1 = XVECEXP (wpat, 0, 1); 5126 wsrc = SET_SRC (wpat0); 5127 PATTERN (worker) = (gen_casesi_worker_2 5128 (SET_DEST (wpat0), XVECEXP (wsrc, 0, 1), 5129 XEXP (XVECEXP (wsrc, 0, 2), 0), lab, 5130 XEXP (wpat1, 0))); 5131 INSN_CODE (worker) = -1; 5132 target = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0); 5133 base = gen_rtx_LABEL_REF (Pmode, lab); 5134 diff = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, target, base), UNSPEC_SYMOFF); 5135 SET_SRC (PATTERN (mova)) = gen_rtx_CONST (Pmode, diff); 5136 INSN_CODE (mova) = -1; 5137 } 5138} 5139 5140/* NEW_MOVA is a mova we've just encountered while scanning forward. Update 5141 *num_mova, and check if the new mova is not nested within the first one. 5142 return 0 if *first_mova was replaced, 1 if new_mova was replaced, 5143 2 if new_mova has been assigned to *first_mova, -1 otherwise.. */ 5144static int 5145untangle_mova (int *num_mova, rtx_insn **first_mova, rtx_insn *new_mova) 5146{ 5147 int n_addr = 0; /* Initialization to shut up spurious warning. */ 5148 int f_target, n_target = 0; /* Likewise. */ 5149 5150 if (optimize) 5151 { 5152 /* If NEW_MOVA has no address yet, it will be handled later. */ 5153 if (INSN_ADDRESSES_SIZE() <= (unsigned) INSN_UID (new_mova)) 5154 return -1; 5155 5156 n_addr = INSN_ADDRESSES (INSN_UID (new_mova)); 5157 n_target = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (new_mova), 0))); 5158 if (n_addr > n_target || n_addr + 1022 < n_target) 5159 { 5160 /* Change the mova into a load. 5161 broken_move will then return true for it. */ 5162 fixup_mova (new_mova); 5163 return 1; 5164 } 5165 } 5166 if (!(*num_mova)++) 5167 { 5168 *first_mova = new_mova; 5169 return 2; 5170 } 5171 if (!optimize 5172 || ((f_target 5173 = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (*first_mova), 0)))) 5174 >= n_target)) 5175 return -1; 5176 5177 (*num_mova)--; 5178 if (f_target - INSN_ADDRESSES (INSN_UID (*first_mova)) 5179 > n_target - n_addr) 5180 { 5181 fixup_mova (*first_mova); 5182 return 0; 5183 } 5184 else 5185 { 5186 fixup_mova (new_mova); 5187 return 1; 5188 } 5189} 5190 5191/* Find the last barrier from insn FROM which is close enough to hold the 5192 constant pool. If we can't find one, then create one near the end of 5193 the range. */ 5194static rtx_insn * 5195find_barrier (int num_mova, rtx_insn *mova, rtx_insn *from) 5196{ 5197 int count_si = 0; 5198 int count_hi = 0; 5199 int found_hi = 0; 5200 int found_si = 0; 5201 int found_di = 0; 5202 int hi_align = 2; 5203 int si_align = 2; 5204 int leading_mova = num_mova; 5205 rtx_insn *barrier_before_mova = NULL; 5206 rtx_insn *found_barrier = NULL; 5207 rtx_insn *good_barrier = NULL; 5208 int si_limit; 5209 int hi_limit; 5210 rtx_insn *orig = from; 5211 rtx_insn *last_got = NULL; 5212 rtx_insn *last_symoff = NULL; 5213 5214 /* For HImode: range is 510, add 4 because pc counts from address of 5215 second instruction after this one, subtract 2 for the jump instruction 5216 that we may need to emit before the table, subtract 2 for the instruction 5217 that fills the jump delay slot (in very rare cases, reorg will take an 5218 instruction from after the constant pool or will leave the delay slot 5219 empty). This gives 510. 5220 For SImode: range is 1020, add 4 because pc counts from address of 5221 second instruction after this one, subtract 2 in case pc is 2 byte 5222 aligned, subtract 2 for the jump instruction that we may need to emit 5223 before the table, subtract 2 for the instruction that fills the jump 5224 delay slot. This gives 1018. */ 5225 5226 /* The branch will always be shortened now that the reference address for 5227 forward branches is the successor address, thus we need no longer make 5228 adjustments to the [sh]i_limit for -O0. */ 5229 5230 si_limit = 1018; 5231 hi_limit = 510; 5232 5233 while (from && count_si < si_limit && count_hi < hi_limit) 5234 { 5235 int inc = get_attr_length (from); 5236 int new_align = 1; 5237 5238 /* If this is a label that existed at the time of the compute_alignments 5239 call, determine the alignment. N.B. When find_barrier recurses for 5240 an out-of-reach mova, we might see labels at the start of previously 5241 inserted constant tables. */ 5242 if (LABEL_P (from) 5243 && CODE_LABEL_NUMBER (from) <= max_labelno_before_reorg) 5244 { 5245 if (optimize) 5246 new_align = 1 << label_to_alignment (from); 5247 else if (BARRIER_P (prev_nonnote_insn (from))) 5248 new_align = 1 << barrier_align (from); 5249 else 5250 new_align = 1; 5251 inc = 0; 5252 } 5253 /* In case we are scanning a constant table because of recursion, check 5254 for explicit alignments. If the table is long, we might be forced 5255 to emit the new table in front of it; the length of the alignment 5256 might be the last straw. */ 5257 else if (NONJUMP_INSN_P (from) 5258 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE 5259 && XINT (PATTERN (from), 1) == UNSPECV_ALIGN) 5260 new_align = INTVAL (XVECEXP (PATTERN (from), 0, 0)); 5261 /* When we find the end of a constant table, paste the new constant 5262 at the end. That is better than putting it in front because 5263 this way, we don't need extra alignment for adding a 4-byte-aligned 5264 mov(a) label to a 2/4 or 8/4 byte aligned table. */ 5265 else if (NONJUMP_INSN_P (from) 5266 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE 5267 && XINT (PATTERN (from), 1) == UNSPECV_CONST_END) 5268 return from; 5269 5270 if (BARRIER_P (from)) 5271 { 5272 rtx_insn *next; 5273 5274 found_barrier = from; 5275 5276 /* If we are at the end of the function, or in front of an alignment 5277 instruction, we need not insert an extra alignment. We prefer 5278 this kind of barrier. */ 5279 if (barrier_align (from) > 2) 5280 good_barrier = from; 5281 5282 /* If we are at the end of a hot/cold block, dump the constants 5283 here. */ 5284 next = NEXT_INSN (from); 5285 if (next 5286 && NOTE_P (next) 5287 && NOTE_KIND (next) == NOTE_INSN_SWITCH_TEXT_SECTIONS) 5288 break; 5289 } 5290 5291 if (broken_move (from)) 5292 { 5293 rtx pat, src, dst; 5294 machine_mode mode; 5295 5296 pat = PATTERN (from); 5297 if (GET_CODE (pat) == PARALLEL) 5298 pat = XVECEXP (pat, 0, 0); 5299 src = SET_SRC (pat); 5300 dst = SET_DEST (pat); 5301 mode = GET_MODE (dst); 5302 5303 /* GOT pcrelat setting comes in pair of 5304 mova .L8,r0 5305 mov.l .L8,r12 5306 instructions. (plus add r0,r12). 5307 Remember if we see one without the other. */ 5308 if (GET_CODE (src) == UNSPEC && PIC_ADDR_P (XVECEXP (src, 0, 0))) 5309 last_got = last_got ? NULL : from; 5310 else if (PIC_ADDR_P (src)) 5311 last_got = last_got ? NULL : from; 5312 5313 /* We must explicitly check the mode, because sometimes the 5314 front end will generate code to load unsigned constants into 5315 HImode targets without properly sign extending them. */ 5316 if (mode == HImode 5317 || (mode == SImode && satisfies_constraint_I16 (src) 5318 && REGNO (dst) != FPUL_REG)) 5319 { 5320 found_hi += 2; 5321 /* We put the short constants before the long constants, so 5322 we must count the length of short constants in the range 5323 for the long constants. */ 5324 /* ??? This isn't optimal, but is easy to do. */ 5325 si_limit -= 2; 5326 } 5327 else 5328 { 5329 /* We dump DF/DI constants before SF/SI ones, because 5330 the limit is the same, but the alignment requirements 5331 are higher. We may waste up to 4 additional bytes 5332 for alignment, and the DF/DI constant may have 5333 another SF/SI constant placed before it. */ 5334 if (TARGET_SHCOMPACT 5335 && ! found_di 5336 && (mode == DFmode || mode == DImode)) 5337 { 5338 found_di = 1; 5339 si_limit -= 8; 5340 } 5341 while (si_align > 2 && found_si + si_align - 2 > count_si) 5342 si_align >>= 1; 5343 if (found_si > count_si) 5344 count_si = found_si; 5345 found_si += GET_MODE_SIZE (mode); 5346 if (num_mova) 5347 si_limit -= GET_MODE_SIZE (mode); 5348 } 5349 } 5350 5351 if (mova_p (from)) 5352 { 5353 switch (untangle_mova (&num_mova, &mova, from)) 5354 { 5355 case 1: 5356 if (flag_pic) 5357 { 5358 rtx src = SET_SRC (PATTERN (from)); 5359 if (GET_CODE (src) == CONST 5360 && GET_CODE (XEXP (src, 0)) == UNSPEC 5361 && XINT (XEXP (src, 0), 1) == UNSPEC_SYMOFF) 5362 last_symoff = from; 5363 } 5364 break; 5365 case 0: return find_barrier (0, 0, mova); 5366 case 2: 5367 { 5368 leading_mova = 0; 5369 barrier_before_mova 5370 = good_barrier ? good_barrier : found_barrier; 5371 } 5372 default: break; 5373 } 5374 if (found_si > count_si) 5375 count_si = found_si; 5376 } 5377 else if (JUMP_TABLE_DATA_P (from) 5378 && GET_CODE (PATTERN (from)) == ADDR_DIFF_VEC) 5379 { 5380 if ((num_mova > 1 && GET_MODE (prev_nonnote_insn (from)) == VOIDmode) 5381 || (num_mova 5382 && (prev_nonnote_insn (from) 5383 == XEXP (MOVA_LABELREF (mova), 0)))) 5384 num_mova--; 5385 if (barrier_align (next_real_insn (from)) == align_jumps_log) 5386 { 5387 /* We have just passed the barrier in front of the 5388 ADDR_DIFF_VEC, which is stored in found_barrier. Since 5389 the ADDR_DIFF_VEC is accessed as data, just like our pool 5390 constants, this is a good opportunity to accommodate what 5391 we have gathered so far. 5392 If we waited any longer, we could end up at a barrier in 5393 front of code, which gives worse cache usage for separated 5394 instruction / data caches. */ 5395 good_barrier = found_barrier; 5396 break; 5397 } 5398 else 5399 { 5400 rtx body = PATTERN (from); 5401 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body)); 5402 } 5403 } 5404 /* For the SH1, we generate alignments even after jumps-around-jumps. */ 5405 else if (JUMP_P (from) 5406 && ! TARGET_SH2 5407 && ! optimize_size) 5408 new_align = 4; 5409 5410 /* There is a possibility that a bf is transformed into a bf/s by the 5411 delay slot scheduler. */ 5412 if (JUMP_P (from) 5413 && get_attr_type (from) == TYPE_CBRANCH 5414 && ! sequence_insn_p (from)) 5415 inc += 2; 5416 5417 if (found_si) 5418 { 5419 count_si += inc; 5420 if (new_align > si_align) 5421 { 5422 si_limit -= (count_si - 1) & (new_align - si_align); 5423 si_align = new_align; 5424 } 5425 count_si = (count_si + new_align - 1) & -new_align; 5426 } 5427 if (found_hi) 5428 { 5429 count_hi += inc; 5430 if (new_align > hi_align) 5431 { 5432 hi_limit -= (count_hi - 1) & (new_align - hi_align); 5433 hi_align = new_align; 5434 } 5435 count_hi = (count_hi + new_align - 1) & -new_align; 5436 } 5437 from = NEXT_INSN (from); 5438 } 5439 5440 if (num_mova) 5441 { 5442 if (leading_mova) 5443 { 5444 /* Try as we might, the leading mova is out of range. Change 5445 it into a load (which will become a pcload) and retry. */ 5446 fixup_mova (mova); 5447 return find_barrier (0, 0, mova); 5448 } 5449 else 5450 { 5451 /* Insert the constant pool table before the mova instruction, 5452 to prevent the mova label reference from going out of range. */ 5453 from = mova; 5454 good_barrier = found_barrier = barrier_before_mova; 5455 } 5456 } 5457 5458 if (found_barrier) 5459 { 5460 if (good_barrier && next_real_insn (found_barrier)) 5461 found_barrier = good_barrier; 5462 } 5463 else 5464 { 5465 /* We didn't find a barrier in time to dump our stuff, 5466 so we'll make one. */ 5467 rtx_code_label *label = gen_label_rtx (); 5468 5469 /* Don't emit a constant table in the middle of insns for 5470 casesi_worker_2. This is a bit overkill but is enough 5471 because casesi_worker_2 wouldn't appear so frequently. */ 5472 if (last_symoff) 5473 from = last_symoff; 5474 5475 /* If we exceeded the range, then we must back up over the last 5476 instruction we looked at. Otherwise, we just need to undo the 5477 NEXT_INSN at the end of the loop. */ 5478 if (PREV_INSN (from) != orig 5479 && (count_hi > hi_limit || count_si > si_limit)) 5480 from = PREV_INSN (PREV_INSN (from)); 5481 else 5482 from = PREV_INSN (from); 5483 5484 /* Don't emit a constant table int the middle of global pointer setting, 5485 since that that would move the addressing base GOT into another table. 5486 We need the first mov instruction before the _GLOBAL_OFFSET_TABLE_ 5487 in the pool anyway, so just move up the whole constant pool. 5488 5489 However, avoid doing so when the last single GOT mov is the starting 5490 insn itself. Going past above the start insn would create a negative 5491 offset, causing errors. */ 5492 if (last_got && last_got != orig) 5493 from = PREV_INSN (last_got); 5494 5495 /* Don't insert the constant pool table at the position which 5496 may be the landing pad. */ 5497 if (flag_exceptions 5498 && CALL_P (from) 5499 && find_reg_note (from, REG_EH_REGION, NULL_RTX)) 5500 from = PREV_INSN (from); 5501 5502 /* Walk back to be just before any jump or label. 5503 Putting it before a label reduces the number of times the branch 5504 around the constant pool table will be hit. Putting it before 5505 a jump makes it more likely that the bra delay slot will be 5506 filled. */ 5507 while (NOTE_P (from) || JUMP_P (from) 5508 || LABEL_P (from)) 5509 from = PREV_INSN (from); 5510 5511 /* Make sure we do not split between a call and its corresponding 5512 CALL_ARG_LOCATION note. */ 5513 if (CALL_P (from)) 5514 { 5515 rtx_insn *next = NEXT_INSN (from); 5516 if (next && NOTE_P (next) 5517 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION) 5518 from = next; 5519 } 5520 5521 from = emit_jump_insn_after (gen_jump (label), from); 5522 JUMP_LABEL (from) = label; 5523 LABEL_NUSES (label) = 1; 5524 found_barrier = emit_barrier_after (from); 5525 emit_label_after (label, found_barrier); 5526 } 5527 5528 return found_barrier; 5529} 5530 5531/* If the instruction INSN is implemented by a special function, and we can 5532 positively find the register that is used to call the sfunc, and this 5533 register is not used anywhere else in this instruction - except as the 5534 destination of a set, return this register; else, return 0. */ 5535rtx 5536sfunc_uses_reg (rtx_insn *insn) 5537{ 5538 int i; 5539 rtx pattern, part, reg_part, reg; 5540 5541 if (!NONJUMP_INSN_P (insn)) 5542 return NULL_RTX; 5543 pattern = PATTERN (insn); 5544 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC) 5545 return NULL_RTX; 5546 5547 for (reg_part = NULL_RTX, i = XVECLEN (pattern, 0) - 1; i >= 1; i--) 5548 { 5549 part = XVECEXP (pattern, 0, i); 5550 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode) 5551 reg_part = part; 5552 } 5553 if (! reg_part) 5554 return NULL_RTX; 5555 reg = XEXP (reg_part, 0); 5556 for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--) 5557 { 5558 part = XVECEXP (pattern, 0, i); 5559 if (part == reg_part || GET_CODE (part) == CLOBBER) 5560 continue; 5561 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET 5562 && REG_P (SET_DEST (part))) 5563 ? SET_SRC (part) : part))) 5564 return NULL_RTX; 5565 } 5566 return reg; 5567} 5568 5569/* See if the only way in which INSN uses REG is by calling it, or by 5570 setting it while calling it. Set *SET to a SET rtx if the register 5571 is set by INSN. */ 5572static bool 5573noncall_uses_reg (rtx reg, rtx_insn *insn, rtx *set) 5574{ 5575 rtx pattern, reg2; 5576 5577 *set = NULL_RTX; 5578 5579 reg2 = sfunc_uses_reg (insn); 5580 if (reg2 && REGNO (reg2) == REGNO (reg)) 5581 { 5582 pattern = single_set (insn); 5583 if (pattern 5584 && REG_P (SET_DEST (pattern)) 5585 && REGNO (reg) == REGNO (SET_DEST (pattern))) 5586 *set = pattern; 5587 return false; 5588 } 5589 if (!CALL_P (insn)) 5590 { 5591 /* We don't use rtx_equal_p because we don't care if the mode is 5592 different. */ 5593 pattern = single_set (insn); 5594 if (pattern 5595 && REG_P (SET_DEST (pattern)) 5596 && REGNO (reg) == REGNO (SET_DEST (pattern))) 5597 { 5598 rtx par, part; 5599 int i; 5600 5601 *set = pattern; 5602 par = PATTERN (insn); 5603 if (GET_CODE (par) == PARALLEL) 5604 for (i = XVECLEN (par, 0) - 1; i >= 0; i--) 5605 { 5606 part = XVECEXP (par, 0, i); 5607 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part)) 5608 return true; 5609 } 5610 return reg_mentioned_p (reg, SET_SRC (pattern)); 5611 } 5612 5613 return true; 5614 } 5615 5616 pattern = PATTERN (insn); 5617 5618 if (GET_CODE (pattern) == PARALLEL) 5619 { 5620 int i; 5621 5622 for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--) 5623 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i))) 5624 return true; 5625 pattern = XVECEXP (pattern, 0, 0); 5626 } 5627 5628 if (GET_CODE (pattern) == SET) 5629 { 5630 if (reg_mentioned_p (reg, SET_DEST (pattern))) 5631 { 5632 /* We don't use rtx_equal_p, because we don't care if the 5633 mode is different. */ 5634 if (!REG_P (SET_DEST (pattern)) 5635 || REGNO (reg) != REGNO (SET_DEST (pattern))) 5636 return true; 5637 5638 *set = pattern; 5639 } 5640 5641 pattern = SET_SRC (pattern); 5642 } 5643 5644 if (GET_CODE (pattern) != CALL 5645 || !MEM_P (XEXP (pattern, 0)) 5646 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0))) 5647 return true; 5648 5649 return false; 5650} 5651 5652/* Given a X, a pattern of an insn or a part of it, return a mask of used 5653 general registers. Bits 0..15 mean that the respective registers 5654 are used as inputs in the instruction. Bits 16..31 mean that the 5655 registers 0..15, respectively, are used as outputs, or are clobbered. 5656 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */ 5657int 5658regs_used (rtx x, int is_dest) 5659{ 5660 enum rtx_code code; 5661 const char *fmt; 5662 int i, used = 0; 5663 5664 if (! x) 5665 return used; 5666 code = GET_CODE (x); 5667 switch (code) 5668 { 5669 case REG: 5670 if (REGNO (x) < 16) 5671 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1) 5672 << (REGNO (x) + is_dest)); 5673 return 0; 5674 case SUBREG: 5675 { 5676 rtx y = SUBREG_REG (x); 5677 5678 if (!REG_P (y)) 5679 break; 5680 if (REGNO (y) < 16) 5681 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1) 5682 << (REGNO (y) + 5683 subreg_regno_offset (REGNO (y), 5684 GET_MODE (y), 5685 SUBREG_BYTE (x), 5686 GET_MODE (x)) + is_dest)); 5687 return 0; 5688 } 5689 case SET: 5690 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16); 5691 case RETURN: 5692 /* If there was a return value, it must have been indicated with USE. */ 5693 return 0x00ffff00; 5694 case CLOBBER: 5695 is_dest = 1; 5696 break; 5697 case MEM: 5698 is_dest = 0; 5699 break; 5700 case CALL: 5701 used |= 0x00ff00f0; 5702 break; 5703 default: 5704 break; 5705 } 5706 5707 fmt = GET_RTX_FORMAT (code); 5708 5709 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--) 5710 { 5711 if (fmt[i] == 'E') 5712 { 5713 int j; 5714 for (j = XVECLEN (x, i) - 1; j >= 0; j--) 5715 used |= regs_used (XVECEXP (x, i, j), is_dest); 5716 } 5717 else if (fmt[i] == 'e') 5718 used |= regs_used (XEXP (x, i), is_dest); 5719 } 5720 return used; 5721} 5722 5723/* Create an instruction that prevents redirection of a conditional branch 5724 to the destination of the JUMP with address ADDR. 5725 If the branch needs to be implemented as an indirect jump, try to find 5726 a scratch register for it. 5727 If NEED_BLOCK is 0, don't do anything unless we need a scratch register. 5728 If any preceding insn that doesn't fit into a delay slot is good enough, 5729 pass 1. Pass 2 if a definite blocking insn is needed. 5730 -1 is used internally to avoid deep recursion. 5731 If a blocking instruction is made or recognized, return it. */ 5732static rtx_insn * 5733gen_block_redirect (rtx_insn *jump, int addr, int need_block) 5734{ 5735 int dead = 0; 5736 rtx_insn *prev = prev_nonnote_insn (jump); 5737 rtx dest; 5738 5739 /* First, check if we already have an instruction that satisfies our need. */ 5740 if (prev && NONJUMP_INSN_P (prev) && ! prev->deleted ()) 5741 { 5742 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch) 5743 return prev; 5744 if (GET_CODE (PATTERN (prev)) == USE 5745 || GET_CODE (PATTERN (prev)) == CLOBBER 5746 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES) 5747 prev = jump; 5748 else if ((need_block &= ~1) < 0) 5749 return prev; 5750 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect) 5751 need_block = 0; 5752 } 5753 if (GET_CODE (PATTERN (jump)) == RETURN) 5754 { 5755 if (! need_block) 5756 return prev; 5757 /* Reorg even does nasty things with return insns that cause branches 5758 to go out of range - see find_end_label and callers. */ 5759 return emit_insn_before (gen_block_branch_redirect (const0_rtx) , jump); 5760 } 5761 /* We can't use JUMP_LABEL here because it might be undefined 5762 when not optimizing. */ 5763 dest = XEXP (SET_SRC (PATTERN (jump)), 0); 5764 /* If the branch is out of range, try to find a scratch register for it. */ 5765 if (optimize 5766 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092 5767 > 4092 + 4098)) 5768 { 5769 rtx_insn *scan; 5770 /* Don't look for the stack pointer as a scratch register, 5771 it would cause trouble if an interrupt occurred. */ 5772 unsigned attempt = 0x7fff, used; 5773 int jump_left = flag_expensive_optimizations + 1; 5774 5775 /* It is likely that the most recent eligible instruction is wanted for 5776 the delay slot. Therefore, find out which registers it uses, and 5777 try to avoid using them. */ 5778 5779 for (scan = jump; (scan = PREV_INSN (scan)); ) 5780 { 5781 enum rtx_code code; 5782 5783 if (scan->deleted ()) 5784 continue; 5785 code = GET_CODE (scan); 5786 if (code == CODE_LABEL || code == JUMP_INSN) 5787 break; 5788 if (code == INSN 5789 && GET_CODE (PATTERN (scan)) != USE 5790 && GET_CODE (PATTERN (scan)) != CLOBBER 5791 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES) 5792 { 5793 attempt &= ~regs_used (PATTERN (scan), 0); 5794 break; 5795 } 5796 } 5797 for (used = dead = 0, scan = JUMP_LABEL_AS_INSN (jump); 5798 (scan = NEXT_INSN (scan)); ) 5799 { 5800 enum rtx_code code; 5801 5802 if (scan->deleted ()) 5803 continue; 5804 code = GET_CODE (scan); 5805 if (INSN_P (scan)) 5806 { 5807 used |= regs_used (PATTERN (scan), 0); 5808 if (code == CALL_INSN) 5809 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0); 5810 dead |= (used >> 16) & ~used; 5811 if (dead & attempt) 5812 { 5813 dead &= attempt; 5814 break; 5815 } 5816 if (code == JUMP_INSN) 5817 { 5818 if (jump_left-- && simplejump_p (scan)) 5819 scan = JUMP_LABEL_AS_INSN (scan); 5820 else 5821 break; 5822 } 5823 } 5824 } 5825 /* Mask out the stack pointer again, in case it was 5826 the only 'free' register we have found. */ 5827 dead &= 0x7fff; 5828 } 5829 /* If the immediate destination is still in range, check for possible 5830 threading with a jump beyond the delay slot insn. 5831 Don't check if we are called recursively; the jump has been or will be 5832 checked in a different invocation then. */ 5833 5834 else if (optimize && need_block >= 0) 5835 { 5836 rtx_insn *next = next_active_insn (next_active_insn (dest)); 5837 if (next && JUMP_P (next) 5838 && GET_CODE (PATTERN (next)) == SET 5839 && recog_memoized (next) == CODE_FOR_jump_compact) 5840 { 5841 dest = JUMP_LABEL (next); 5842 if (dest 5843 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092 5844 > 4092 + 4098)) 5845 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1); 5846 } 5847 } 5848 5849 if (dead) 5850 { 5851 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead)); 5852 5853 /* It would be nice if we could convert the jump into an indirect 5854 jump / far branch right now, and thus exposing all constituent 5855 instructions to further optimization. However, reorg uses 5856 simplejump_p to determine if there is an unconditional jump where 5857 it should try to schedule instructions from the target of the 5858 branch; simplejump_p fails for indirect jumps even if they have 5859 a JUMP_LABEL. */ 5860 rtx_insn *insn = emit_insn_before (gen_indirect_jump_scratch 5861 (reg, GEN_INT (unspec_bbr_uid++)), 5862 jump); 5863 /* ??? We would like this to have the scope of the jump, but that 5864 scope will change when a delay slot insn of an inner scope is added. 5865 Hence, after delay slot scheduling, we'll have to expect 5866 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and 5867 the jump. */ 5868 5869 INSN_LOCATION (insn) = INSN_LOCATION (jump); 5870 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch; 5871 return insn; 5872 } 5873 else if (need_block) 5874 /* We can't use JUMP_LABEL here because it might be undefined 5875 when not optimizing. */ 5876 return emit_insn_before (gen_block_branch_redirect 5877 (GEN_INT (unspec_bbr_uid++)), 5878 jump); 5879 return prev; 5880} 5881 5882#define CONDJUMP_MIN -252 5883#define CONDJUMP_MAX 262 5884struct far_branch 5885{ 5886 /* A label (to be placed) in front of the jump 5887 that jumps to our ultimate destination. */ 5888 rtx_insn *near_label; 5889 /* Where we are going to insert it if we cannot move the jump any farther, 5890 or the jump itself if we have picked up an existing jump. */ 5891 rtx_insn *insert_place; 5892 /* The ultimate destination. */ 5893 rtx_insn *far_label; 5894 struct far_branch *prev; 5895 /* If the branch has already been created, its address; 5896 else the address of its first prospective user. */ 5897 int address; 5898}; 5899 5900static void gen_far_branch (struct far_branch *); 5901enum mdep_reorg_phase_e mdep_reorg_phase; 5902static void 5903gen_far_branch (struct far_branch *bp) 5904{ 5905 rtx_insn *insn = bp->insert_place; 5906 rtx_insn *jump; 5907 rtx_code_label *label = gen_label_rtx (); 5908 int ok; 5909 5910 emit_label_after (label, insn); 5911 if (bp->far_label) 5912 { 5913 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn); 5914 LABEL_NUSES (bp->far_label)++; 5915 } 5916 else 5917 jump = emit_jump_insn_after (gen_return (), insn); 5918 5919 /* Emit a barrier so that reorg knows that any following instructions 5920 are not reachable via a fall-through path. 5921 But don't do this when not optimizing, since we wouldn't suppress the 5922 alignment for the barrier then, and could end up with out-of-range 5923 pc-relative loads. */ 5924 if (optimize) 5925 emit_barrier_after (jump); 5926 emit_label_after (bp->near_label, insn); 5927 5928 if (bp->far_label) 5929 JUMP_LABEL (jump) = bp->far_label; 5930 else 5931 { 5932 rtx pat = PATTERN (jump); 5933 gcc_assert (ANY_RETURN_P (pat)); 5934 JUMP_LABEL (jump) = pat; 5935 } 5936 5937 ok = invert_jump (insn, label, 1); 5938 gcc_assert (ok); 5939 5940 /* If we are branching around a jump (rather than a return), prevent 5941 reorg from using an insn from the jump target as the delay slot insn - 5942 when reorg did this, it pessimized code (we rather hide the delay slot) 5943 and it could cause branches to go out of range. */ 5944 if (bp->far_label) 5945 (emit_insn_after 5946 (gen_stuff_delay_slot 5947 (GEN_INT (unspec_bbr_uid++), 5948 GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)), 5949 insn)); 5950 /* Prevent reorg from undoing our splits. */ 5951 gen_block_redirect (jump, bp->address += 2, 2); 5952} 5953 5954/* Fix up ADDR_DIFF_VECs. */ 5955void 5956fixup_addr_diff_vecs (rtx_insn *first) 5957{ 5958 rtx_insn *insn; 5959 5960 for (insn = first; insn; insn = NEXT_INSN (insn)) 5961 { 5962 rtx vec_lab, pat, prevpat, x, braf_label; 5963 rtx_insn *prev; 5964 5965 if (! JUMP_TABLE_DATA_P (insn) 5966 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC) 5967 continue; 5968 pat = PATTERN (insn); 5969 vec_lab = XEXP (XEXP (pat, 0), 0); 5970 5971 /* Search the matching casesi_jump_2. */ 5972 for (prev = as_a <rtx_insn *> (vec_lab); ; prev = PREV_INSN (prev)) 5973 { 5974 if (!JUMP_P (prev)) 5975 continue; 5976 prevpat = PATTERN (prev); 5977 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2) 5978 continue; 5979 x = XVECEXP (prevpat, 0, 1); 5980 if (GET_CODE (x) != USE) 5981 continue; 5982 x = XEXP (x, 0); 5983 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab) 5984 break; 5985 } 5986 /* FIXME: This is a bug in the optimizer, but it seems harmless 5987 to just avoid panicing. */ 5988 if (!prev) 5989 continue; 5990 5991 /* Emit the reference label of the braf where it belongs, right after 5992 the casesi_jump_2 (i.e. braf). */ 5993 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0); 5994 emit_label_after (braf_label, prev); 5995 5996 /* Fix up the ADDR_DIF_VEC to be relative 5997 to the reference address of the braf. */ 5998 XEXP (XEXP (pat, 0), 0) = braf_label; 5999 } 6000} 6001 6002/* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following 6003 a barrier. Return the base 2 logarithm of the desired alignment. */ 6004int 6005barrier_align (rtx_insn *barrier_or_label) 6006{ 6007 rtx next, pat; 6008 6009 if (! barrier_or_label) 6010 return 0; 6011 6012 if (LABEL_P (barrier_or_label) 6013 && NEXT_INSN (barrier_or_label) 6014 && JUMP_TABLE_DATA_P (NEXT_INSN (barrier_or_label))) 6015 return 2; 6016 6017 if (BARRIER_P (barrier_or_label) 6018 && PREV_INSN (barrier_or_label) 6019 && JUMP_TABLE_DATA_P (PREV_INSN (barrier_or_label))) 6020 { 6021 pat = PATTERN (PREV_INSN (barrier_or_label)); 6022 /* If this is a very small table, we want to keep the alignment after 6023 the table to the minimum for proper code alignment. */ 6024 return ((optimize_size 6025 || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat)) 6026 <= (unsigned) 1 << (CACHE_LOG - 2))) 6027 ? 1 << TARGET_SHMEDIA : align_jumps_log); 6028 } 6029 6030 next = next_active_insn (barrier_or_label); 6031 6032 if (! next) 6033 return 0; 6034 6035 pat = PATTERN (next); 6036 6037 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN) 6038 /* This is a barrier in front of a constant table. */ 6039 return 0; 6040 6041 if (optimize_size) 6042 return 0; 6043 6044 if (! TARGET_SH2 || ! optimize) 6045 return align_jumps_log; 6046 6047 /* When fixing up pcloads, a constant table might be inserted just before 6048 the basic block that ends with the barrier. Thus, we can't trust the 6049 instruction lengths before that. */ 6050 if (mdep_reorg_phase > SH_FIXUP_PCLOAD) 6051 { 6052 /* Check if there is an immediately preceding branch to the insn beyond 6053 the barrier. We must weight the cost of discarding useful information 6054 from the current cache line when executing this branch and there is 6055 an alignment, against that of fetching unneeded insn in front of the 6056 branch target when there is no alignment. */ 6057 6058 /* There are two delay_slot cases to consider. One is the simple case 6059 where the preceding branch is to the insn beyond the barrier (simple 6060 delay slot filling), and the other is where the preceding branch has 6061 a delay slot that is a duplicate of the insn after the barrier 6062 (fill_eager_delay_slots) and the branch is to the insn after the insn 6063 after the barrier. */ 6064 6065 int slot, credit; 6066 bool jump_to_next = false; 6067 6068 /* Skip to the insn before the JUMP_INSN before the barrier under 6069 investigation. */ 6070 rtx_insn *prev = prev_real_insn (prev_active_insn (barrier_or_label)); 6071 6072 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2; 6073 credit >= 0 && prev && NONJUMP_INSN_P (prev); 6074 prev = prev_real_insn (prev)) 6075 { 6076 jump_to_next = false; 6077 if (GET_CODE (PATTERN (prev)) == USE 6078 || GET_CODE (PATTERN (prev)) == CLOBBER) 6079 continue; 6080 if (rtx_sequence *prev_seq = dyn_cast <rtx_sequence *> (PATTERN (prev))) 6081 { 6082 prev = prev_seq->insn (1); 6083 if (INSN_UID (prev) == INSN_UID (next)) 6084 { 6085 /* Delay slot was filled with insn at jump target. */ 6086 jump_to_next = true; 6087 continue; 6088 } 6089 } 6090 6091 if (slot && 6092 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES) 6093 slot = 0; 6094 credit -= get_attr_length (prev); 6095 } 6096 if (prev && jump_to_label_p (prev)) 6097 { 6098 rtx_insn *x; 6099 if (jump_to_next 6100 || next_real_insn (JUMP_LABEL (prev)) == next 6101 /* If relax_delay_slots() decides NEXT was redundant 6102 with some previous instruction, it will have 6103 redirected PREV's jump to the following insn. */ 6104 || JUMP_LABEL (prev) == next_nonnote_insn (next) 6105 /* There is no upper bound on redundant instructions 6106 that might have been skipped, but we must not put an 6107 alignment where none had been before. */ 6108 || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))), 6109 (INSN_P (x) 6110 && (INSN_CODE (x) == CODE_FOR_block_branch_redirect 6111 || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch 6112 || INSN_CODE (x) == CODE_FOR_stuff_delay_slot)))) 6113 { 6114 rtx pat = PATTERN (prev); 6115 if (GET_CODE (pat) == PARALLEL) 6116 pat = XVECEXP (pat, 0, 0); 6117 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0)) 6118 return 0; 6119 } 6120 } 6121 } 6122 6123 return align_jumps_log; 6124} 6125 6126/* If we are inside a phony loop, almost any kind of label can turn up as the 6127 first one in the loop. Aligning a braf label causes incorrect switch 6128 destination addresses; we can detect braf labels because they are 6129 followed by a BARRIER. 6130 Applying loop alignment to small constant or switch tables is a waste 6131 of space, so we suppress this too. */ 6132int 6133sh_loop_align (rtx_insn *label) 6134{ 6135 rtx_insn *next = label; 6136 6137 if (! optimize || optimize_size) 6138 return 0; 6139 6140 do 6141 next = next_nonnote_insn (next); 6142 while (next && LABEL_P (next)); 6143 6144 if (! next 6145 || ! INSN_P (next) 6146 || recog_memoized (next) == CODE_FOR_consttable_2) 6147 return 0; 6148 6149 return align_loops_log; 6150} 6151 6152/* Do a final pass over the function, just before delayed branch 6153 scheduling. */ 6154static void 6155sh_reorg (void) 6156{ 6157 rtx_insn *first, *insn, *mova = NULL; 6158 int num_mova; 6159 rtx r0_rtx = gen_rtx_REG (Pmode, 0); 6160 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx); 6161 6162 first = get_insns (); 6163 max_labelno_before_reorg = max_label_num (); 6164 6165 /* We must split call insns before introducing `mova's. If we're 6166 optimizing, they'll have already been split. Otherwise, make 6167 sure we don't split them too late. */ 6168 if (! optimize) 6169 split_all_insns_noflow (); 6170 6171 if (TARGET_SHMEDIA) 6172 return; 6173 6174 /* If relaxing, generate pseudo-ops to associate function calls with 6175 the symbols they call. It does no harm to not generate these 6176 pseudo-ops. However, when we can generate them, it enables the 6177 linker to potentially relax the jsr to a bsr, and eliminate the 6178 register load and, possibly, the constant pool entry. */ 6179 6180 mdep_reorg_phase = SH_INSERT_USES_LABELS; 6181 if (TARGET_RELAX) 6182 { 6183 /* Remove all REG_LABEL_OPERAND notes. We want to use them for our 6184 own purposes. This works because none of the remaining passes 6185 need to look at them. 6186 6187 ??? But it may break in the future. We should use a machine 6188 dependent REG_NOTE, or some other approach entirely. */ 6189 for (insn = first; insn; insn = NEXT_INSN (insn)) 6190 { 6191 if (INSN_P (insn)) 6192 { 6193 rtx note; 6194 6195 while ((note = find_reg_note (insn, REG_LABEL_OPERAND, 6196 NULL_RTX)) != 0) 6197 remove_note (insn, note); 6198 } 6199 } 6200 6201 for (insn = first; insn; insn = NEXT_INSN (insn)) 6202 { 6203 rtx pattern, reg, set, dies; 6204 rtx_code_label *label; 6205 rtx_insn *link, *scan; 6206 int rescan = 0, foundinsn = 0; 6207 6208 if (CALL_P (insn)) 6209 { 6210 pattern = PATTERN (insn); 6211 6212 if (GET_CODE (pattern) == PARALLEL) 6213 pattern = XVECEXP (pattern, 0, 0); 6214 if (GET_CODE (pattern) == SET) 6215 pattern = SET_SRC (pattern); 6216 6217 if (GET_CODE (pattern) != CALL 6218 || !MEM_P (XEXP (pattern, 0))) 6219 continue; 6220 6221 reg = XEXP (XEXP (pattern, 0), 0); 6222 } 6223 else 6224 { 6225 reg = sfunc_uses_reg (insn); 6226 if (! reg) 6227 continue; 6228 } 6229 6230 if (!REG_P (reg)) 6231 continue; 6232 6233 /* Try scanning backward to find where the register is set. */ 6234 link = NULL; 6235 for (scan = PREV_INSN (insn); 6236 scan && !LABEL_P (scan); 6237 scan = PREV_INSN (scan)) 6238 { 6239 if (! INSN_P (scan)) 6240 continue; 6241 6242 if (! reg_mentioned_p (reg, scan)) 6243 continue; 6244 6245 if (noncall_uses_reg (reg, scan, &set)) 6246 break; 6247 6248 if (set) 6249 { 6250 link = scan; 6251 break; 6252 } 6253 } 6254 6255 if (! link) 6256 continue; 6257 6258 /* The register is set at LINK. */ 6259 6260 /* We can only optimize the function call if the register is 6261 being set to a symbol. In theory, we could sometimes 6262 optimize calls to a constant location, but the assembler 6263 and linker do not support that at present. */ 6264 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF 6265 && GET_CODE (SET_SRC (set)) != LABEL_REF) 6266 continue; 6267 6268 /* Scan forward from LINK to the place where REG dies, and 6269 make sure that the only insns which use REG are 6270 themselves function calls. */ 6271 6272 /* ??? This doesn't work for call targets that were allocated 6273 by reload, since there may not be a REG_DEAD note for the 6274 register. */ 6275 6276 dies = NULL_RTX; 6277 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan)) 6278 { 6279 rtx scanset; 6280 6281 /* Don't try to trace forward past a CODE_LABEL if we haven't 6282 seen INSN yet. Ordinarily, we will only find the setting insn 6283 if it is in the same basic block. However, 6284 cross-jumping can insert code labels in between the load and 6285 the call, and can result in situations where a single call 6286 insn may have two targets depending on where we came from. */ 6287 6288 if (LABEL_P (scan) && ! foundinsn) 6289 break; 6290 6291 if (! INSN_P (scan)) 6292 continue; 6293 6294 /* Don't try to trace forward past a JUMP. To optimize 6295 safely, we would have to check that all the 6296 instructions at the jump destination did not use REG. */ 6297 6298 if (JUMP_P (scan)) 6299 break; 6300 6301 if (! reg_mentioned_p (reg, scan)) 6302 continue; 6303 6304 if (noncall_uses_reg (reg, scan, &scanset)) 6305 break; 6306 6307 if (scan == insn) 6308 foundinsn = 1; 6309 6310 if (scan != insn 6311 && (CALL_P (scan) || sfunc_uses_reg (scan))) 6312 { 6313 /* There is a function call to this register other 6314 than the one we are checking. If we optimize 6315 this call, we need to rescan again below. */ 6316 rescan = 1; 6317 } 6318 6319 /* ??? We shouldn't have to worry about SCANSET here. 6320 We should just be able to check for a REG_DEAD note 6321 on a function call. However, the REG_DEAD notes are 6322 apparently not dependable around libcalls; c-torture 6323 execute/920501-2 is a test case. If SCANSET is set, 6324 then this insn sets the register, so it must have 6325 died earlier. Unfortunately, this will only handle 6326 the cases in which the register is, in fact, set in a 6327 later insn. */ 6328 6329 /* ??? We shouldn't have to use FOUNDINSN here. 6330 This dates back to when we used LOG_LINKS to find 6331 the most recent insn which sets the register. */ 6332 6333 if (foundinsn 6334 && (scanset 6335 || find_reg_note (scan, REG_DEAD, reg))) 6336 { 6337 dies = scan; 6338 break; 6339 } 6340 } 6341 6342 if (! dies) 6343 { 6344 /* Either there was a branch, or some insn used REG 6345 other than as a function call address. */ 6346 continue; 6347 } 6348 6349 /* Create a code label, and put it in a REG_LABEL_OPERAND note 6350 on the insn which sets the register, and on each call insn 6351 which uses the register. In final_prescan_insn we look for 6352 the REG_LABEL_OPERAND notes, and output the appropriate label 6353 or pseudo-op. */ 6354 6355 label = gen_label_rtx (); 6356 add_reg_note (link, REG_LABEL_OPERAND, label); 6357 add_reg_note (insn, REG_LABEL_OPERAND, label); 6358 if (rescan) 6359 { 6360 scan = link; 6361 do 6362 { 6363 rtx reg2; 6364 6365 scan = NEXT_INSN (scan); 6366 if (scan != insn 6367 && ((CALL_P (scan) 6368 && reg_mentioned_p (reg, scan)) 6369 || ((reg2 = sfunc_uses_reg (scan)) 6370 && REGNO (reg2) == REGNO (reg)))) 6371 add_reg_note (scan, REG_LABEL_OPERAND, label); 6372 } 6373 while (scan != dies); 6374 } 6375 } 6376 } 6377 6378 if (TARGET_SH2) 6379 fixup_addr_diff_vecs (first); 6380 6381 if (optimize) 6382 { 6383 mdep_reorg_phase = SH_SHORTEN_BRANCHES0; 6384 shorten_branches (first); 6385 } 6386 6387 /* Scan the function looking for move instructions which have to be 6388 changed to pc-relative loads and insert the literal tables. */ 6389 label_ref_list_pool = create_alloc_pool ("label references list", 6390 sizeof (struct label_ref_list_d), 6391 30); 6392 mdep_reorg_phase = SH_FIXUP_PCLOAD; 6393 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn)) 6394 { 6395 if (mova_p (insn)) 6396 { 6397 /* ??? basic block reordering can move a switch table dispatch 6398 below the switch table. Check if that has happened. 6399 We only have the addresses available when optimizing; but then, 6400 this check shouldn't be needed when not optimizing. */ 6401 if (!untangle_mova (&num_mova, &mova, insn)) 6402 { 6403 insn = mova; 6404 num_mova = 0; 6405 } 6406 } 6407 else if (JUMP_TABLE_DATA_P (insn) 6408 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC 6409 && num_mova 6410 /* ??? loop invariant motion can also move a mova out of a 6411 loop. Since loop does this code motion anyway, maybe we 6412 should wrap UNSPEC_MOVA into a CONST, so that reload can 6413 move it back. */ 6414 && ((num_mova > 1 6415 && GET_MODE (prev_nonnote_insn (insn)) == VOIDmode) 6416 || (prev_nonnote_insn (insn) 6417 == XEXP (MOVA_LABELREF (mova), 0)))) 6418 { 6419 rtx_insn *scan; 6420 int total; 6421 6422 num_mova--; 6423 6424 /* Some code might have been inserted between the mova and 6425 its ADDR_DIFF_VEC. Check if the mova is still in range. */ 6426 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan)) 6427 total += get_attr_length (scan); 6428 6429 /* range of mova is 1020, add 4 because pc counts from address of 6430 second instruction after this one, subtract 2 in case pc is 2 6431 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC 6432 cancels out with alignment effects of the mova itself. */ 6433 if (total > 1022) 6434 { 6435 /* Change the mova into a load, and restart scanning 6436 there. broken_move will then return true for mova. */ 6437 fixup_mova (mova); 6438 insn = mova; 6439 } 6440 } 6441 if (broken_move (insn) 6442 || (NONJUMP_INSN_P (insn) 6443 && recog_memoized (insn) == CODE_FOR_casesi_worker_2)) 6444 { 6445 rtx_insn *scan; 6446 /* Scan ahead looking for a barrier to stick the constant table 6447 behind. */ 6448 rtx_insn *barrier = find_barrier (num_mova, mova, insn); 6449 rtx_insn *last_float_move = NULL; 6450 rtx last_float = 0, *last_float_addr = NULL; 6451 int need_aligned_label = 0; 6452 6453 if (num_mova && ! mova_p (mova)) 6454 { 6455 /* find_barrier had to change the first mova into a 6456 pcload; thus, we have to start with this new pcload. */ 6457 insn = mova; 6458 num_mova = 0; 6459 } 6460 /* Now find all the moves between the points and modify them. */ 6461 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan)) 6462 { 6463 if (LABEL_P (scan)) 6464 last_float = 0; 6465 if (NONJUMP_INSN_P (scan) 6466 && recog_memoized (scan) == CODE_FOR_casesi_worker_2) 6467 need_aligned_label = 1; 6468 if (broken_move (scan)) 6469 { 6470 rtx *patp = &PATTERN (scan), pat = *patp; 6471 rtx src, dst; 6472 rtx lab; 6473 rtx newsrc; 6474 machine_mode mode; 6475 6476 if (GET_CODE (pat) == PARALLEL) 6477 patp = &XVECEXP (pat, 0, 0), pat = *patp; 6478 src = SET_SRC (pat); 6479 dst = SET_DEST (pat); 6480 mode = GET_MODE (dst); 6481 6482 if (mode == SImode && satisfies_constraint_I16 (src) 6483 && REGNO (dst) != FPUL_REG) 6484 { 6485 int offset = 0; 6486 6487 mode = HImode; 6488 while (GET_CODE (dst) == SUBREG) 6489 { 6490 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)), 6491 GET_MODE (SUBREG_REG (dst)), 6492 SUBREG_BYTE (dst), 6493 GET_MODE (dst)); 6494 dst = SUBREG_REG (dst); 6495 } 6496 dst = gen_rtx_REG (HImode, REGNO (dst) + offset); 6497 } 6498 if (REG_P (dst) && FP_ANY_REGISTER_P (REGNO (dst))) 6499 { 6500 /* This must be an insn that clobbers r0. */ 6501 rtx *clobberp = &XVECEXP (PATTERN (scan), 0, 6502 XVECLEN (PATTERN (scan), 0) 6503 - 1); 6504 rtx clobber = *clobberp; 6505 6506 gcc_assert (GET_CODE (clobber) == CLOBBER 6507 && rtx_equal_p (XEXP (clobber, 0), r0_rtx)); 6508 6509 if (last_float 6510 && reg_set_between_p (r0_rtx, last_float_move, scan)) 6511 last_float = 0; 6512 if (last_float 6513 && TARGET_SHCOMPACT 6514 && GET_MODE_SIZE (mode) != 4 6515 && GET_MODE_SIZE (GET_MODE (last_float)) == 4) 6516 last_float = 0; 6517 lab = add_constant (src, mode, last_float); 6518 if (lab) 6519 emit_insn_before (gen_mova (lab), scan); 6520 else 6521 { 6522 /* There will be a REG_UNUSED note for r0 on 6523 LAST_FLOAT_MOVE; we have to change it to REG_INC, 6524 lest reorg:mark_target_live_regs will not 6525 consider r0 to be used, and we end up with delay 6526 slot insn in front of SCAN that clobbers r0. */ 6527 rtx note 6528 = find_regno_note (last_float_move, REG_UNUSED, 0); 6529 6530 /* If we are not optimizing, then there may not be 6531 a note. */ 6532 if (note) 6533 PUT_REG_NOTE_KIND (note, REG_INC); 6534 6535 *last_float_addr = r0_inc_rtx; 6536 } 6537 last_float_move = scan; 6538 last_float = src; 6539 newsrc = gen_const_mem (mode, 6540 (((TARGET_SH4 && ! TARGET_FMOVD) 6541 || REGNO (dst) == FPUL_REG) 6542 ? r0_inc_rtx 6543 : r0_rtx)); 6544 last_float_addr = &XEXP (newsrc, 0); 6545 6546 /* Remove the clobber of r0. */ 6547 *clobberp = gen_rtx_CLOBBER (GET_MODE (clobber), 6548 gen_rtx_SCRATCH (Pmode)); 6549 } 6550 /* This is a mova needing a label. Create it. */ 6551 else if (GET_CODE (src) == UNSPEC 6552 && XINT (src, 1) == UNSPEC_MOVA 6553 && GET_CODE (XVECEXP (src, 0, 0)) == CONST) 6554 { 6555 lab = add_constant (XVECEXP (src, 0, 0), mode, 0); 6556 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab); 6557 newsrc = gen_rtx_UNSPEC (SImode, 6558 gen_rtvec (1, newsrc), 6559 UNSPEC_MOVA); 6560 } 6561 else if (GET_CODE (src) == UNSPEC_VOLATILE 6562 && XINT (src, 1) == UNSPECV_SP_SWITCH_B) 6563 { 6564 newsrc = XVECEXP (src, 0, 0); 6565 XVECEXP (src, 0, 0) = gen_const_mem (mode, newsrc); 6566 INSN_CODE (scan) = -1; 6567 continue; 6568 } 6569 else 6570 { 6571 lab = add_constant (src, mode, 0); 6572 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab); 6573 newsrc = gen_const_mem (mode, newsrc); 6574 } 6575 *patp = gen_rtx_SET (VOIDmode, dst, newsrc); 6576 INSN_CODE (scan) = -1; 6577 } 6578 } 6579 dump_table (need_aligned_label ? insn : 0, barrier); 6580 insn = barrier; 6581 } 6582 } 6583 free_alloc_pool (label_ref_list_pool); 6584 for (insn = first; insn; insn = NEXT_INSN (insn)) 6585 PUT_MODE (insn, VOIDmode); 6586 6587 mdep_reorg_phase = SH_SHORTEN_BRANCHES1; 6588 INSN_ADDRESSES_FREE (); 6589 split_branches (first); 6590 6591 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it 6592 also has an effect on the register that holds the address of the sfunc. 6593 Insert an extra dummy insn in front of each sfunc that pretends to 6594 use this register. */ 6595 if (flag_delayed_branch) 6596 { 6597 for (insn = first; insn; insn = NEXT_INSN (insn)) 6598 { 6599 rtx reg = sfunc_uses_reg (insn); 6600 6601 if (! reg) 6602 continue; 6603 emit_insn_before (gen_use_sfunc_addr (reg), insn); 6604 } 6605 } 6606 mdep_reorg_phase = SH_AFTER_MDEP_REORG; 6607} 6608 6609/* Return the UID of the insn that follows the specified label. */ 6610int 6611get_dest_uid (rtx label, int max_uid) 6612{ 6613 rtx_insn *dest = next_real_insn (label); 6614 int dest_uid; 6615 if (! dest) 6616 /* This can happen for an undefined label. */ 6617 return 0; 6618 dest_uid = INSN_UID (dest); 6619 /* If this is a newly created branch redirection blocking instruction, 6620 we cannot index the branch_uid or insn_addresses arrays with its 6621 uid. But then, we won't need to, because the actual destination is 6622 the following branch. */ 6623 while (dest_uid >= max_uid) 6624 { 6625 dest = NEXT_INSN (dest); 6626 dest_uid = INSN_UID (dest); 6627 } 6628 if (JUMP_P (dest) && GET_CODE (PATTERN (dest)) == RETURN) 6629 return 0; 6630 return dest_uid; 6631} 6632 6633/* Split condbranches that are out of range. Also add clobbers for 6634 scratch registers that are needed in far jumps. 6635 We do this before delay slot scheduling, so that it can take our 6636 newly created instructions into account. It also allows us to 6637 find branches with common targets more easily. */ 6638static void 6639split_branches (rtx_insn *first) 6640{ 6641 rtx_insn *insn; 6642 struct far_branch **uid_branch, *far_branch_list = 0; 6643 int max_uid = get_max_uid (); 6644 int ok; 6645 6646 /* Find out which branches are out of range. */ 6647 shorten_branches (first); 6648 6649 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch); 6650 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch); 6651 6652 for (insn = first; insn; insn = NEXT_INSN (insn)) 6653 if (! INSN_P (insn)) 6654 continue; 6655 else if (insn->deleted ()) 6656 { 6657 /* Shorten_branches would split this instruction again, 6658 so transform it into a note. */ 6659 SET_INSN_DELETED (insn); 6660 } 6661 else if (JUMP_P (insn)) 6662 { 6663 enum attr_type type = get_attr_type (insn); 6664 if (type == TYPE_CBRANCH) 6665 { 6666 rtx_insn *next, *beyond; 6667 6668 if (get_attr_length (insn) > 4) 6669 { 6670 rtx src = SET_SRC (PATTERN (insn)); 6671 rtx olabel = XEXP (XEXP (src, 1), 0); 6672 int addr = INSN_ADDRESSES (INSN_UID (insn)); 6673 rtx_insn *label = 0; 6674 int dest_uid = get_dest_uid (olabel, max_uid); 6675 struct far_branch *bp = uid_branch[dest_uid]; 6676 6677 /* redirect_jump needs a valid JUMP_LABEL, and it might delete 6678 the label if the LABEL_NUSES count drops to zero. There is 6679 always a jump_optimize pass that sets these values, but it 6680 proceeds to delete unreferenced code, and then if not 6681 optimizing, to un-delete the deleted instructions, thus 6682 leaving labels with too low uses counts. */ 6683 if (! optimize) 6684 { 6685 JUMP_LABEL (insn) = olabel; 6686 LABEL_NUSES (olabel)++; 6687 } 6688 if (! bp) 6689 { 6690 bp = (struct far_branch *) alloca (sizeof *bp); 6691 uid_branch[dest_uid] = bp; 6692 bp->prev = far_branch_list; 6693 far_branch_list = bp; 6694 bp->far_label = as_a <rtx_insn *> ( 6695 XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 6696 0)); 6697 LABEL_NUSES (bp->far_label)++; 6698 } 6699 else 6700 { 6701 label = bp->near_label; 6702 if (! label && bp->address - addr >= CONDJUMP_MIN) 6703 { 6704 rtx_insn *block = bp->insert_place; 6705 6706 if (GET_CODE (PATTERN (block)) == RETURN) 6707 block = PREV_INSN (block); 6708 else 6709 block = gen_block_redirect (block, 6710 bp->address, 2); 6711 label = emit_label_after (gen_label_rtx (), 6712 PREV_INSN (block)); 6713 bp->near_label = label; 6714 } 6715 else if (label && ! NEXT_INSN (label)) 6716 { 6717 if (addr + 2 - bp->address <= CONDJUMP_MAX) 6718 bp->insert_place = insn; 6719 else 6720 gen_far_branch (bp); 6721 } 6722 } 6723 if (! label 6724 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN)) 6725 { 6726 bp->near_label = label = gen_label_rtx (); 6727 bp->insert_place = insn; 6728 bp->address = addr; 6729 } 6730 ok = redirect_jump (insn, label, 0); 6731 gcc_assert (ok); 6732 } 6733 else 6734 { 6735 /* get_attr_length (insn) == 2 */ 6736 /* Check if we have a pattern where reorg wants to redirect 6737 the branch to a label from an unconditional branch that 6738 is too far away. */ 6739 /* We can't use JUMP_LABEL here because it might be undefined 6740 when not optimizing. */ 6741 /* A syntax error might cause beyond to be NULL_RTX. */ 6742 beyond 6743 = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 6744 0)); 6745 6746 if (beyond 6747 && (JUMP_P (beyond) 6748 || ((beyond = next_active_insn (beyond)) 6749 && JUMP_P (beyond))) 6750 && GET_CODE (PATTERN (beyond)) == SET 6751 && recog_memoized (beyond) == CODE_FOR_jump_compact 6752 && ((INSN_ADDRESSES 6753 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0))) 6754 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252) 6755 > 252 + 258 + 2)) 6756 gen_block_redirect (beyond, 6757 INSN_ADDRESSES (INSN_UID (beyond)), 1); 6758 } 6759 6760 next = next_active_insn (insn); 6761 6762 if (next 6763 && (JUMP_P (next) 6764 || ((next = next_active_insn (next)) 6765 && JUMP_P (next))) 6766 && GET_CODE (PATTERN (next)) == SET 6767 && recog_memoized (next) == CODE_FOR_jump_compact 6768 && ((INSN_ADDRESSES 6769 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0))) 6770 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252) 6771 > 252 + 258 + 2)) 6772 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1); 6773 } 6774 else if (type == TYPE_JUMP || type == TYPE_RETURN) 6775 { 6776 int addr = INSN_ADDRESSES (INSN_UID (insn)); 6777 rtx_insn *far_label = 0; 6778 int dest_uid = 0; 6779 struct far_branch *bp; 6780 6781 if (type == TYPE_JUMP) 6782 { 6783 far_label = as_a <rtx_insn *> ( 6784 XEXP (SET_SRC (PATTERN (insn)), 0)); 6785 dest_uid = get_dest_uid (far_label, max_uid); 6786 if (! dest_uid) 6787 { 6788 /* Parse errors can lead to labels outside 6789 the insn stream. */ 6790 if (! NEXT_INSN (far_label)) 6791 continue; 6792 6793 if (! optimize) 6794 { 6795 JUMP_LABEL (insn) = far_label; 6796 LABEL_NUSES (far_label)++; 6797 } 6798 redirect_jump (insn, ret_rtx, 1); 6799 far_label = 0; 6800 } 6801 } 6802 bp = uid_branch[dest_uid]; 6803 if (! bp) 6804 { 6805 bp = (struct far_branch *) alloca (sizeof *bp); 6806 uid_branch[dest_uid] = bp; 6807 bp->prev = far_branch_list; 6808 far_branch_list = bp; 6809 bp->near_label = 0; 6810 bp->far_label = far_label; 6811 if (far_label) 6812 LABEL_NUSES (far_label)++; 6813 } 6814 else if (bp->near_label && ! NEXT_INSN (bp->near_label)) 6815 if (addr - bp->address <= CONDJUMP_MAX) 6816 emit_label_after (bp->near_label, PREV_INSN (insn)); 6817 else 6818 { 6819 gen_far_branch (bp); 6820 bp->near_label = 0; 6821 } 6822 else 6823 bp->near_label = 0; 6824 bp->address = addr; 6825 bp->insert_place = insn; 6826 if (! far_label) 6827 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn); 6828 else 6829 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0); 6830 } 6831 } 6832 /* Generate all pending far branches, 6833 and free our references to the far labels. */ 6834 while (far_branch_list) 6835 { 6836 if (far_branch_list->near_label 6837 && ! NEXT_INSN (far_branch_list->near_label)) 6838 gen_far_branch (far_branch_list); 6839 if (optimize 6840 && far_branch_list->far_label 6841 && ! --LABEL_NUSES (far_branch_list->far_label)) 6842 delete_insn (far_branch_list->far_label); 6843 far_branch_list = far_branch_list->prev; 6844 } 6845 6846 /* Instruction length information is no longer valid due to the new 6847 instructions that have been generated. */ 6848 init_insn_lengths (); 6849} 6850 6851/* Dump out instruction addresses, which is useful for debugging the 6852 constant pool table stuff. 6853 6854 If relaxing, output the label and pseudo-ops used to link together 6855 calls and the instruction which set the registers. 6856 6857 ??? The addresses printed by this routine for insns are nonsense for 6858 insns which are inside of a sequence where none of the inner insns have 6859 variable length. This is because the second pass of shorten_branches 6860 does not bother to update them. */ 6861void 6862final_prescan_insn (rtx_insn *insn, rtx *opvec ATTRIBUTE_UNUSED, 6863 int noperands ATTRIBUTE_UNUSED) 6864{ 6865 if (TARGET_DUMPISIZE) 6866 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn))); 6867 6868 if (TARGET_RELAX) 6869 { 6870 rtx note; 6871 6872 note = find_reg_note (insn, REG_LABEL_OPERAND, NULL_RTX); 6873 if (note) 6874 { 6875 rtx pattern; 6876 6877 pattern = PATTERN (insn); 6878 if (GET_CODE (pattern) == PARALLEL) 6879 pattern = XVECEXP (pattern, 0, 0); 6880 switch (GET_CODE (pattern)) 6881 { 6882 case SET: 6883 if (GET_CODE (SET_SRC (pattern)) != CALL 6884 && get_attr_type (insn) != TYPE_SFUNC) 6885 { 6886 targetm.asm_out.internal_label 6887 (asm_out_file, "L", CODE_LABEL_NUMBER (XEXP (note, 0))); 6888 break; 6889 } 6890 /* else FALLTHROUGH */ 6891 case CALL: 6892 asm_fprintf (asm_out_file, "\t.uses %LL%d\n", 6893 CODE_LABEL_NUMBER (XEXP (note, 0))); 6894 break; 6895 6896 default: 6897 gcc_unreachable (); 6898 } 6899 } 6900 } 6901} 6902 6903/* Dump out any constants accumulated in the final pass. These will 6904 only be labels. */ 6905const char * 6906output_jump_label_table (void) 6907{ 6908 int i; 6909 6910 if (pool_size) 6911 { 6912 fprintf (asm_out_file, "\t.align 2\n"); 6913 for (i = 0; i < pool_size; i++) 6914 { 6915 pool_node *p = &pool_vector[i]; 6916 6917 (*targetm.asm_out.internal_label) (asm_out_file, "L", 6918 CODE_LABEL_NUMBER (p->label)); 6919 output_asm_insn (".long %O0", &p->value); 6920 } 6921 pool_size = 0; 6922 } 6923 6924 return ""; 6925} 6926 6927/* A full frame looks like: 6928 6929 arg-5 6930 arg-4 6931 [ if current_function_anonymous_args 6932 arg-3 6933 arg-2 6934 arg-1 6935 arg-0 ] 6936 saved-fp 6937 saved-r10 6938 saved-r11 6939 saved-r12 6940 saved-pr 6941 local-n 6942 .. 6943 local-1 6944 local-0 <- fp points here. 6945 6946 Number of bytes pushed for anonymous args, used to pass information 6947 between expand_prologue and expand_epilogue. 6948 6949 Adjust the stack by SIZE bytes. REG holds the rtl of the register to be 6950 adjusted. If epilogue_p is zero, this is for a prologue; otherwise, it's 6951 for an epilogue and a negative value means that it's for a sibcall 6952 epilogue. If LIVE_REGS_MASK is nonzero, it points to a HARD_REG_SET of 6953 all the registers that are about to be restored, and hence dead. */ 6954static void 6955output_stack_adjust (int size, rtx reg, int epilogue_p, 6956 HARD_REG_SET *live_regs_mask, bool frame_p) 6957{ 6958 rtx_insn *(*emit_fn) (rtx) = frame_p ? &frame_insn : &emit_insn; 6959 if (size) 6960 { 6961 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT; 6962 6963/* This test is bogus, as output_stack_adjust is used to re-align the 6964 stack. */ 6965#if 0 6966 gcc_assert (!(size % align)); 6967#endif 6968 6969 if (CONST_OK_FOR_ADD (size)) 6970 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size))); 6971 /* Try to do it with two partial adjustments; however, we must make 6972 sure that the stack is properly aligned at all times, in case 6973 an interrupt occurs between the two partial adjustments. */ 6974 else if (CONST_OK_FOR_ADD (size / 2 & -align) 6975 && CONST_OK_FOR_ADD (size - (size / 2 & -align))) 6976 { 6977 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align))); 6978 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align)))); 6979 } 6980 else 6981 { 6982 rtx const_reg; 6983 rtx insn; 6984 int temp = epilogue_p ? 7 : (TARGET_SH5 ? 0 : 1); 6985 int i; 6986 6987 /* If TEMP is invalid, we could temporarily save a general 6988 register to MACL. However, there is currently no need 6989 to handle this case, so just die when we see it. */ 6990 if (epilogue_p < 0 6991 || current_function_interrupt 6992 || ! call_really_used_regs[temp] || fixed_regs[temp]) 6993 temp = -1; 6994 if (temp < 0 && ! current_function_interrupt 6995 && (TARGET_SHMEDIA || epilogue_p >= 0)) 6996 { 6997 HARD_REG_SET temps; 6998 COPY_HARD_REG_SET (temps, call_used_reg_set); 6999 AND_COMPL_HARD_REG_SET (temps, call_fixed_reg_set); 7000 if (epilogue_p > 0) 7001 { 7002 int nreg = 0; 7003 if (crtl->return_rtx) 7004 { 7005 machine_mode mode; 7006 mode = GET_MODE (crtl->return_rtx); 7007 if (BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG) 7008 nreg = HARD_REGNO_NREGS (FIRST_RET_REG, mode); 7009 } 7010 for (i = 0; i < nreg; i++) 7011 CLEAR_HARD_REG_BIT (temps, FIRST_RET_REG + i); 7012 if (crtl->calls_eh_return) 7013 { 7014 CLEAR_HARD_REG_BIT (temps, EH_RETURN_STACKADJ_REGNO); 7015 for (i = 0; i <= 3; i++) 7016 CLEAR_HARD_REG_BIT (temps, EH_RETURN_DATA_REGNO (i)); 7017 } 7018 } 7019 if (TARGET_SHMEDIA && epilogue_p < 0) 7020 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++) 7021 CLEAR_HARD_REG_BIT (temps, i); 7022 if (epilogue_p <= 0) 7023 { 7024 for (i = FIRST_PARM_REG; 7025 i < FIRST_PARM_REG + NPARM_REGS (SImode); i++) 7026 CLEAR_HARD_REG_BIT (temps, i); 7027 if (cfun->static_chain_decl != NULL) 7028 CLEAR_HARD_REG_BIT (temps, STATIC_CHAIN_REGNUM); 7029 } 7030 temp = scavenge_reg (&temps); 7031 } 7032 if (temp < 0 && live_regs_mask) 7033 { 7034 HARD_REG_SET temps; 7035 7036 COPY_HARD_REG_SET (temps, *live_regs_mask); 7037 CLEAR_HARD_REG_BIT (temps, REGNO (reg)); 7038 temp = scavenge_reg (&temps); 7039 } 7040 if (temp < 0) 7041 { 7042 rtx adj_reg, tmp_reg, mem; 7043 7044 /* If we reached here, the most likely case is the (sibcall) 7045 epilogue for non SHmedia. Put a special push/pop sequence 7046 for such case as the last resort. This looks lengthy but 7047 would not be problem because it seems to be very 7048 rare. */ 7049 7050 gcc_assert (!TARGET_SHMEDIA && epilogue_p); 7051 7052 7053 /* ??? There is still the slight possibility that r4 or 7054 r5 have been reserved as fixed registers or assigned 7055 as global registers, and they change during an 7056 interrupt. There are possible ways to handle this: 7057 7058 - If we are adjusting the frame pointer (r14), we can do 7059 with a single temp register and an ordinary push / pop 7060 on the stack. 7061 - Grab any call-used or call-saved registers (i.e. not 7062 fixed or globals) for the temps we need. We might 7063 also grab r14 if we are adjusting the stack pointer. 7064 If we can't find enough available registers, issue 7065 a diagnostic and die - the user must have reserved 7066 way too many registers. 7067 But since all this is rather unlikely to happen and 7068 would require extra testing, we just die if r4 / r5 7069 are not available. */ 7070 gcc_assert (!fixed_regs[4] && !fixed_regs[5] 7071 && !global_regs[4] && !global_regs[5]); 7072 7073 adj_reg = gen_rtx_REG (GET_MODE (reg), 4); 7074 tmp_reg = gen_rtx_REG (GET_MODE (reg), 5); 7075 emit_move_insn (gen_tmp_stack_mem (Pmode, reg), adj_reg); 7076 emit_insn (GEN_MOV (adj_reg, GEN_INT (size))); 7077 emit_insn (GEN_ADD3 (adj_reg, adj_reg, reg)); 7078 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg)); 7079 emit_move_insn (mem, tmp_reg); 7080 emit_move_insn (tmp_reg, gen_tmp_stack_mem (Pmode, reg)); 7081 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg)); 7082 emit_move_insn (mem, tmp_reg); 7083 emit_move_insn (reg, adj_reg); 7084 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg)); 7085 emit_move_insn (adj_reg, mem); 7086 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg)); 7087 emit_move_insn (tmp_reg, mem); 7088 /* Tell flow the insns that pop r4/r5 aren't dead. */ 7089 emit_use (tmp_reg); 7090 emit_use (adj_reg); 7091 return; 7092 } 7093 const_reg = gen_rtx_REG (GET_MODE (reg), temp); 7094 7095 /* If SIZE is negative, subtract the positive value. 7096 This sometimes allows a constant pool entry to be shared 7097 between prologue and epilogue code. */ 7098 if (size < 0) 7099 { 7100 emit_insn (GEN_MOV (const_reg, GEN_INT (-size))); 7101 insn = emit_fn (GEN_SUB3 (reg, reg, const_reg)); 7102 } 7103 else 7104 { 7105 emit_insn (GEN_MOV (const_reg, GEN_INT (size))); 7106 insn = emit_fn (GEN_ADD3 (reg, reg, const_reg)); 7107 } 7108 add_reg_note (insn, REG_FRAME_RELATED_EXPR, 7109 gen_rtx_SET (VOIDmode, reg, 7110 gen_rtx_PLUS (SImode, reg, 7111 GEN_INT (size)))); 7112 } 7113 } 7114} 7115 7116/* Emit the specified insn and mark it as frame related. 7117 FIXME: Rename this to emit_frame_insn. */ 7118static rtx_insn * 7119frame_insn (rtx x) 7120{ 7121 rtx_insn *insn = emit_insn (x); 7122 RTX_FRAME_RELATED_P (insn) = 1; 7123 return insn; 7124} 7125 7126/* Output RTL to push register RN onto the stack. */ 7127static rtx 7128push (int rn) 7129{ 7130 rtx x; 7131 if (rn == FPUL_REG) 7132 x = gen_push_fpul (); 7133 else if (rn == FPSCR_REG) 7134 x = gen_push_fpscr (); 7135 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD 7136 && ! TARGET_FPU_SINGLE && FP_OR_XD_REGISTER_P (rn)) 7137 { 7138 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1) 7139 return NULL_RTX; 7140 x = gen_push_4 (gen_rtx_REG (DFmode, rn)); 7141 } 7142 else if (TARGET_SH2E && FP_REGISTER_P (rn)) 7143 x = gen_push_e (gen_rtx_REG (SFmode, rn)); 7144 else 7145 x = gen_push (gen_rtx_REG (SImode, rn)); 7146 7147 x = frame_insn (x); 7148 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM)); 7149 return x; 7150} 7151 7152/* Output RTL to pop register RN from the stack. */ 7153static void 7154pop (int rn) 7155{ 7156 rtx x, sp_reg, reg; 7157 if (rn == FPUL_REG) 7158 x = gen_pop_fpul (); 7159 else if (rn == FPSCR_REG) 7160 x = gen_pop_fpscr (); 7161 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD 7162 && ! TARGET_FPU_SINGLE && FP_OR_XD_REGISTER_P (rn)) 7163 { 7164 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1) 7165 return; 7166 x = gen_pop_4 (gen_rtx_REG (DFmode, rn)); 7167 } 7168 else if (TARGET_SH2E && FP_REGISTER_P (rn)) 7169 x = gen_pop_e (gen_rtx_REG (SFmode, rn)); 7170 else 7171 x = gen_pop (gen_rtx_REG (SImode, rn)); 7172 7173 x = emit_insn (x); 7174 7175 sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM); 7176 reg = copy_rtx (GET_CODE (PATTERN (x)) == PARALLEL 7177 ? SET_DEST (XVECEXP (PATTERN (x), 0, 0)) 7178 : SET_DEST (PATTERN (x))); 7179 add_reg_note (x, REG_CFA_RESTORE, reg); 7180 add_reg_note (x, REG_CFA_ADJUST_CFA, 7181 gen_rtx_SET (SImode, sp_reg, 7182 plus_constant (SImode, sp_reg, 7183 GET_MODE_SIZE (GET_MODE (reg))))); 7184 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM)); 7185 RTX_FRAME_RELATED_P (x) = 1; 7186} 7187 7188/* Generate code to push the regs specified in the mask. */ 7189static void 7190push_regs (HARD_REG_SET *mask, int interrupt_handler) 7191{ 7192 int i = interrupt_handler ? LAST_BANKED_REG + 1 : 0; 7193 int skip_fpscr = 0; 7194 7195 /* Push PR last; this gives better latencies after the prologue, and 7196 candidates for the return delay slot when there are no general 7197 registers pushed. */ 7198 for (; i < FIRST_PSEUDO_REGISTER; i++) 7199 { 7200 /* If this is an interrupt handler, and the SZ bit varies, 7201 and we have to push any floating point register, we need 7202 to switch to the correct precision first. */ 7203 if (i == FIRST_FP_REG && interrupt_handler && TARGET_FMOVD 7204 && hard_reg_set_intersect_p (*mask, reg_class_contents[DF_REGS])) 7205 { 7206 HARD_REG_SET unsaved; 7207 7208 push (FPSCR_REG); 7209 COMPL_HARD_REG_SET (unsaved, *mask); 7210 fpscr_set_from_mem (NORMAL_MODE (FP_MODE), unsaved); 7211 skip_fpscr = 1; 7212 } 7213 if (i != PR_REG 7214 && (i != FPSCR_REG || ! skip_fpscr) 7215 && TEST_HARD_REG_BIT (*mask, i)) 7216 { 7217 /* If the ISR has RESBANK attribute assigned, don't push any of 7218 the following registers - R0-R14, MACH, MACL and GBR. */ 7219 if (! (sh_cfun_resbank_handler_p () 7220 && ((i >= FIRST_GENERAL_REG && i < LAST_GENERAL_REG) 7221 || i == MACH_REG 7222 || i == MACL_REG 7223 || i == GBR_REG))) 7224 push (i); 7225 } 7226 } 7227 7228 /* Push banked registers last to improve delay slot opportunities. */ 7229 if (interrupt_handler) 7230 { 7231 bool use_movml = false; 7232 7233 if (TARGET_SH2A) 7234 { 7235 unsigned int count = 0; 7236 7237 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++) 7238 if (TEST_HARD_REG_BIT (*mask, i)) 7239 count++; 7240 else 7241 break; 7242 7243 /* Use movml when all banked registers are pushed. */ 7244 if (count == LAST_BANKED_REG - FIRST_BANKED_REG + 1) 7245 use_movml = true; 7246 } 7247 7248 if (sh_cfun_resbank_handler_p ()) 7249 ; /* Do nothing. */ 7250 else if (use_movml) 7251 { 7252 rtx x, mem, reg, set; 7253 rtx sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM); 7254 7255 /* We must avoid scheduling multiple store insn with another 7256 insns. */ 7257 emit_insn (gen_blockage ()); 7258 x = gen_movml_push_banked (sp_reg); 7259 x = frame_insn (x); 7260 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++) 7261 { 7262 mem = gen_rtx_MEM (SImode, plus_constant (Pmode, sp_reg, i * 4)); 7263 reg = gen_rtx_REG (SImode, i); 7264 add_reg_note (x, REG_CFA_OFFSET, gen_rtx_SET (SImode, mem, reg)); 7265 } 7266 7267 set = gen_rtx_SET (SImode, sp_reg, 7268 plus_constant (Pmode, sp_reg, - 32)); 7269 add_reg_note (x, REG_CFA_ADJUST_CFA, set); 7270 emit_insn (gen_blockage ()); 7271 } 7272 else 7273 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++) 7274 if (TEST_HARD_REG_BIT (*mask, i)) 7275 push (i); 7276 } 7277 7278 /* Don't push PR register for an ISR with RESBANK attribute assigned. */ 7279 if (TEST_HARD_REG_BIT (*mask, PR_REG) && !sh_cfun_resbank_handler_p ()) 7280 push (PR_REG); 7281} 7282 7283/* Calculate how much extra space is needed to save all callee-saved 7284 target registers. 7285 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */ 7286static int 7287shmedia_target_regs_stack_space (HARD_REG_SET *live_regs_mask) 7288{ 7289 int reg; 7290 int stack_space = 0; 7291 int interrupt_handler = sh_cfun_interrupt_handler_p (); 7292 7293 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--) 7294 if ((! call_really_used_regs[reg] || interrupt_handler) 7295 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg)) 7296 /* Leave space to save this target register on the stack, 7297 in case target register allocation wants to use it. */ 7298 stack_space += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg)); 7299 return stack_space; 7300} 7301 7302/* Decide whether we should reserve space for callee-save target registers, 7303 in case target register allocation wants to use them. REGS_SAVED is 7304 the space, in bytes, that is already required for register saves. 7305 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */ 7306static int 7307shmedia_reserve_space_for_target_registers_p (int regs_saved, 7308 HARD_REG_SET *live_regs_mask) 7309{ 7310 if (optimize_size) 7311 return 0; 7312 return shmedia_target_regs_stack_space (live_regs_mask) <= regs_saved; 7313} 7314 7315/* Decide how much space to reserve for callee-save target registers 7316 in case target register allocation wants to use them. 7317 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */ 7318static int 7319shmedia_target_regs_stack_adjust (HARD_REG_SET *live_regs_mask) 7320{ 7321 if (shmedia_space_reserved_for_target_registers) 7322 return shmedia_target_regs_stack_space (live_regs_mask); 7323 else 7324 return 0; 7325} 7326 7327/* Work out the registers which need to be saved, both as a mask and a 7328 count of saved words. Return the count. 7329 7330 If doing a pragma interrupt function, then push all regs used by the 7331 function, and if we call another function (we can tell by looking at PR), 7332 make sure that all the regs it clobbers are safe too. */ 7333static int 7334calc_live_regs (HARD_REG_SET *live_regs_mask) 7335{ 7336 unsigned int reg; 7337 int count; 7338 tree attrs; 7339 bool interrupt_or_trapa_handler, trapa_handler, interrupt_handler; 7340 bool nosave_low_regs; 7341 int pr_live, has_call; 7342 7343 attrs = DECL_ATTRIBUTES (current_function_decl); 7344 interrupt_or_trapa_handler = sh_cfun_interrupt_handler_p (); 7345 trapa_handler = lookup_attribute ("trapa_handler", attrs) != NULL_TREE; 7346 interrupt_handler = interrupt_or_trapa_handler && ! trapa_handler; 7347 nosave_low_regs = lookup_attribute ("nosave_low_regs", attrs) != NULL_TREE; 7348 7349 CLEAR_HARD_REG_SET (*live_regs_mask); 7350 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && interrupt_handler 7351 && df_regs_ever_live_p (FPSCR_REG)) 7352 target_flags &= ~MASK_FPU_SINGLE; 7353 /* If we can save a lot of saves by switching to double mode, do that. */ 7354 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD 7355 && TARGET_FPU_SINGLE) 7356 for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2) 7357 if (df_regs_ever_live_p (reg) && df_regs_ever_live_p (reg+1) 7358 && (! call_really_used_regs[reg] 7359 || interrupt_handler) 7360 && ++count > 2) 7361 { 7362 target_flags &= ~MASK_FPU_SINGLE; 7363 break; 7364 } 7365 /* PR_MEDIA_REG is a general purpose register, thus global_alloc already 7366 knows how to use it. That means the pseudo originally allocated for 7367 the initial value can become the PR_MEDIA_REG hard register, as seen for 7368 execute/20010122-1.c:test9. */ 7369 if (TARGET_SHMEDIA) 7370 /* ??? this function is called from initial_elimination_offset, hence we 7371 can't use the result of sh_media_register_for_return here. */ 7372 pr_live = sh_pr_n_sets (); 7373 else 7374 { 7375 rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG); 7376 pr_live = (pr_initial 7377 ? (!REG_P (pr_initial) 7378 || REGNO (pr_initial) != (PR_REG)) 7379 : df_regs_ever_live_p (PR_REG)); 7380 /* For Shcompact, if not optimizing, we end up with a memory reference 7381 using the return address pointer for __builtin_return_address even 7382 though there is no actual need to put the PR register on the stack. */ 7383 pr_live |= df_regs_ever_live_p (RETURN_ADDRESS_POINTER_REGNUM); 7384 } 7385 /* Force PR to be live if the prologue has to call the SHmedia 7386 argument decoder or register saver. */ 7387 if (TARGET_SHCOMPACT 7388 && ((crtl->args.info.call_cookie 7389 & ~ CALL_COOKIE_RET_TRAMP (1)) 7390 || crtl->saves_all_registers)) 7391 pr_live = 1; 7392 has_call = TARGET_SHMEDIA ? ! leaf_function_p () : pr_live; 7393 for (count = 0, reg = FIRST_PSEUDO_REGISTER; reg-- != 0; ) 7394 { 7395 if (reg == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG) 7396 ? pr_live 7397 : interrupt_handler 7398 ? (/* Need to save all the regs ever live. */ 7399 (df_regs_ever_live_p (reg) 7400 || (call_really_used_regs[reg] 7401 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG 7402 || reg == PIC_OFFSET_TABLE_REGNUM) 7403 && has_call) 7404 || (TARGET_SHMEDIA && has_call 7405 && REGISTER_NATURAL_MODE (reg) == SImode 7406 && (GENERAL_REGISTER_P (reg) || TARGET_REGISTER_P (reg)))) 7407 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM 7408 && reg != RETURN_ADDRESS_POINTER_REGNUM 7409 && reg != T_REG && reg != GBR_REG 7410 && reg != FPSCR_MODES_REG && reg != FPSCR_STAT_REG 7411 /* Push fpscr only on targets which have FPU */ 7412 && (reg != FPSCR_REG || TARGET_FPU_ANY)) 7413 : (/* Only push those regs which are used and need to be saved. */ 7414 (TARGET_SHCOMPACT 7415 && flag_pic 7416 && crtl->args.info.call_cookie 7417 && reg == PIC_OFFSET_TABLE_REGNUM) 7418 || (df_regs_ever_live_p (reg) 7419 && ((!call_really_used_regs[reg] 7420 && !(reg != PIC_OFFSET_TABLE_REGNUM 7421 && fixed_regs[reg] && call_used_regs[reg])) 7422 || (trapa_handler && reg == FPSCR_REG && TARGET_FPU_ANY))) 7423 || (crtl->calls_eh_return 7424 && (reg == EH_RETURN_DATA_REGNO (0) 7425 || reg == EH_RETURN_DATA_REGNO (1) 7426 || reg == EH_RETURN_DATA_REGNO (2) 7427 || reg == EH_RETURN_DATA_REGNO (3))) 7428 || ((reg == MACL_REG || reg == MACH_REG) 7429 && df_regs_ever_live_p (reg) 7430 && sh_cfun_attr_renesas_p ()) 7431 )) 7432 { 7433 SET_HARD_REG_BIT (*live_regs_mask, reg); 7434 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg)); 7435 7436 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE || TARGET_SH5) && TARGET_FMOVD 7437 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT) 7438 { 7439 if (FP_REGISTER_P (reg)) 7440 { 7441 if (! TARGET_FPU_SINGLE && ! df_regs_ever_live_p (reg ^ 1)) 7442 { 7443 SET_HARD_REG_BIT (*live_regs_mask, (reg ^ 1)); 7444 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1)); 7445 } 7446 } 7447 else if (XD_REGISTER_P (reg)) 7448 { 7449 /* Must switch to double mode to access these registers. */ 7450 target_flags &= ~MASK_FPU_SINGLE; 7451 } 7452 } 7453 } 7454 if (nosave_low_regs && reg == R8_REG) 7455 break; 7456 } 7457 /* If we have a target register optimization pass after prologue / epilogue 7458 threading, we need to assume all target registers will be live even if 7459 they aren't now. */ 7460 if (flag_branch_target_load_optimize2 7461 && TARGET_SAVE_ALL_TARGET_REGS 7462 && shmedia_space_reserved_for_target_registers) 7463 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--) 7464 if ((! call_really_used_regs[reg] || interrupt_handler) 7465 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg)) 7466 { 7467 SET_HARD_REG_BIT (*live_regs_mask, reg); 7468 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg)); 7469 } 7470 /* If this is an interrupt handler, we don't have any call-clobbered 7471 registers we can conveniently use for target register save/restore. 7472 Make sure we save at least one general purpose register when we need 7473 to save target registers. */ 7474 if (interrupt_handler 7475 && hard_reg_set_intersect_p (*live_regs_mask, 7476 reg_class_contents[TARGET_REGS]) 7477 && ! hard_reg_set_intersect_p (*live_regs_mask, 7478 reg_class_contents[GENERAL_REGS])) 7479 { 7480 SET_HARD_REG_BIT (*live_regs_mask, R0_REG); 7481 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (R0_REG)); 7482 } 7483 7484 return count; 7485} 7486 7487/* Code to generate prologue and epilogue sequences */ 7488 7489/* PUSHED is the number of bytes that are being pushed on the 7490 stack for register saves. Return the frame size, padded 7491 appropriately so that the stack stays properly aligned. */ 7492static HOST_WIDE_INT 7493rounded_frame_size (int pushed) 7494{ 7495 HOST_WIDE_INT size = get_frame_size (); 7496 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT; 7497 7498 if (ACCUMULATE_OUTGOING_ARGS) 7499 size += crtl->outgoing_args_size; 7500 7501 return ((size + pushed + align - 1) & -align) - pushed; 7502} 7503 7504/* Choose a call-clobbered target-branch register that remains 7505 unchanged along the whole function. We set it up as the return 7506 value in the prologue. */ 7507int 7508sh_media_register_for_return (void) 7509{ 7510 int regno; 7511 int tr0_used; 7512 7513 if (! crtl->is_leaf) 7514 return -1; 7515 if (lookup_attribute ("interrupt_handler", 7516 DECL_ATTRIBUTES (current_function_decl))) 7517 return -1; 7518 if (sh_cfun_interrupt_handler_p ()) 7519 return -1; 7520 7521 tr0_used = flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM); 7522 7523 for (regno = FIRST_TARGET_REG + tr0_used; regno <= LAST_TARGET_REG; regno++) 7524 if (call_really_used_regs[regno] && ! df_regs_ever_live_p (regno)) 7525 return regno; 7526 7527 return -1; 7528} 7529 7530/* The maximum registers we need to save are: 7531 - 62 general purpose registers (r15 is stack pointer, r63 is zero) 7532 - 32 floating point registers (for each pair, we save none, 7533 one single precision value, or a double precision value). 7534 - 8 target registers 7535 - add 1 entry for a delimiter. */ 7536#define MAX_SAVED_REGS (62+32+8) 7537 7538typedef struct save_entry_s 7539{ 7540 unsigned char reg; 7541 unsigned char mode; 7542 short offset; 7543} save_entry; 7544 7545#define MAX_TEMPS 4 7546 7547/* There will be a delimiter entry with VOIDmode both at the start and the 7548 end of a filled in schedule. The end delimiter has the offset of the 7549 save with the smallest (i.e. most negative) offset. */ 7550typedef struct save_schedule_s 7551{ 7552 save_entry entries[MAX_SAVED_REGS + 2]; 7553 int temps[MAX_TEMPS+1]; 7554} save_schedule; 7555 7556/* Fill in SCHEDULE according to LIVE_REGS_MASK. If RESTORE is nonzero, 7557 use reverse order. Returns the last entry written to (not counting 7558 the delimiter). OFFSET_BASE is a number to be added to all offset 7559 entries. */ 7560static save_entry * 7561sh5_schedule_saves (HARD_REG_SET *live_regs_mask, save_schedule *schedule, 7562 int offset_base) 7563{ 7564 int align, i; 7565 save_entry *entry = schedule->entries; 7566 int tmpx = 0; 7567 int offset; 7568 7569 if (! current_function_interrupt) 7570 for (i = FIRST_GENERAL_REG; tmpx < MAX_TEMPS && i <= LAST_GENERAL_REG; i++) 7571 if (call_really_used_regs[i] && ! fixed_regs[i] && i != PR_MEDIA_REG 7572 && ! FUNCTION_ARG_REGNO_P (i) 7573 && i != FIRST_RET_REG 7574 && ! (cfun->static_chain_decl != NULL && i == STATIC_CHAIN_REGNUM) 7575 && ! (crtl->calls_eh_return 7576 && (i == EH_RETURN_STACKADJ_REGNO 7577 || ((unsigned) i >= EH_RETURN_DATA_REGNO (0) 7578 && (unsigned) i <= EH_RETURN_DATA_REGNO (3))))) 7579 schedule->temps[tmpx++] = i; 7580 entry->reg = -1; 7581 entry->mode = VOIDmode; 7582 entry->offset = offset_base; 7583 entry++; 7584 /* We loop twice: first, we save 8-byte aligned registers in the 7585 higher addresses, that are known to be aligned. Then, we 7586 proceed to saving 32-bit registers that don't need 8-byte 7587 alignment. 7588 If this is an interrupt function, all registers that need saving 7589 need to be saved in full. moreover, we need to postpone saving 7590 target registers till we have saved some general purpose registers 7591 we can then use as scratch registers. */ 7592 offset = offset_base; 7593 for (align = 1; align >= 0; align--) 7594 { 7595 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--) 7596 if (TEST_HARD_REG_BIT (*live_regs_mask, i)) 7597 { 7598 machine_mode mode = REGISTER_NATURAL_MODE (i); 7599 int reg = i; 7600 7601 if (current_function_interrupt) 7602 { 7603 if (TARGET_REGISTER_P (i)) 7604 continue; 7605 if (GENERAL_REGISTER_P (i)) 7606 mode = DImode; 7607 } 7608 if (mode == SFmode && (i % 2) == 1 7609 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i) 7610 && (TEST_HARD_REG_BIT (*live_regs_mask, (i ^ 1)))) 7611 { 7612 mode = DFmode; 7613 i--; 7614 reg--; 7615 } 7616 7617 /* If we're doing the aligned pass and this is not aligned, 7618 or we're doing the unaligned pass and this is aligned, 7619 skip it. */ 7620 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT) == 0) 7621 != align) 7622 continue; 7623 7624 if (current_function_interrupt 7625 && GENERAL_REGISTER_P (i) 7626 && tmpx < MAX_TEMPS) 7627 schedule->temps[tmpx++] = i; 7628 7629 offset -= GET_MODE_SIZE (mode); 7630 entry->reg = i; 7631 entry->mode = mode; 7632 entry->offset = offset; 7633 entry++; 7634 } 7635 if (align && current_function_interrupt) 7636 for (i = LAST_TARGET_REG; i >= FIRST_TARGET_REG; i--) 7637 if (TEST_HARD_REG_BIT (*live_regs_mask, i)) 7638 { 7639 offset -= GET_MODE_SIZE (DImode); 7640 entry->reg = i; 7641 entry->mode = DImode; 7642 entry->offset = offset; 7643 entry++; 7644 } 7645 } 7646 entry->reg = -1; 7647 entry->mode = VOIDmode; 7648 entry->offset = offset; 7649 schedule->temps[tmpx] = -1; 7650 return entry - 1; 7651} 7652 7653/* Expand code for the function prologue. */ 7654void 7655sh_expand_prologue (void) 7656{ 7657 HARD_REG_SET live_regs_mask; 7658 int d, i; 7659 int d_rounding = 0; 7660 int save_flags = target_flags; 7661 int pretend_args; 7662 int stack_usage; 7663 tree sp_switch_attr 7664 = lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl)); 7665 7666 current_function_interrupt = sh_cfun_interrupt_handler_p (); 7667 7668 /* We have pretend args if we had an object sent partially in registers 7669 and partially on the stack, e.g. a large structure. */ 7670 pretend_args = crtl->args.pretend_args_size; 7671 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl) 7672 && (NPARM_REGS(SImode) 7673 > crtl->args.info.arg_count[(int) SH_ARG_INT])) 7674 pretend_args = 0; 7675 7676 output_stack_adjust (-pretend_args 7677 - crtl->args.info.stack_regs * 8, 7678 stack_pointer_rtx, 0, NULL, true); 7679 stack_usage = pretend_args + crtl->args.info.stack_regs * 8; 7680 7681 if (TARGET_SHCOMPACT && flag_pic && crtl->args.info.call_cookie) 7682 /* We're going to use the PIC register to load the address of the 7683 incoming-argument decoder and/or of the return trampoline from 7684 the GOT, so make sure the PIC register is preserved and 7685 initialized. */ 7686 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true); 7687 7688 if (TARGET_SHCOMPACT 7689 && (crtl->args.info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1))) 7690 { 7691 int reg; 7692 7693 /* First, make all registers with incoming arguments that will 7694 be pushed onto the stack live, so that register renaming 7695 doesn't overwrite them. */ 7696 for (reg = 0; reg < NPARM_REGS (SImode); reg++) 7697 if (CALL_COOKIE_STACKSEQ_GET (crtl->args.info.call_cookie) 7698 >= NPARM_REGS (SImode) - reg) 7699 for (; reg < NPARM_REGS (SImode); reg++) 7700 emit_insn (gen_shcompact_preserve_incoming_args 7701 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg))); 7702 else if (CALL_COOKIE_INT_REG_GET 7703 (crtl->args.info.call_cookie, reg) == 1) 7704 emit_insn (gen_shcompact_preserve_incoming_args 7705 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg))); 7706 7707 emit_move_insn (gen_rtx_REG (Pmode, MACL_REG), 7708 stack_pointer_rtx); 7709 emit_move_insn (gen_rtx_REG (SImode, R0_REG), 7710 GEN_INT (crtl->args.info.call_cookie)); 7711 emit_move_insn (gen_rtx_REG (SImode, MACH_REG), 7712 gen_rtx_REG (SImode, R0_REG)); 7713 } 7714 else if (TARGET_SHMEDIA) 7715 { 7716 int tr = sh_media_register_for_return (); 7717 7718 if (tr >= 0) 7719 emit_move_insn (gen_rtx_REG (DImode, tr), 7720 gen_rtx_REG (DImode, PR_MEDIA_REG)); 7721 } 7722 7723 /* Emit the code for SETUP_VARARGS. */ 7724 if (cfun->stdarg) 7725 { 7726 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl)) 7727 { 7728 /* Push arg regs as if they'd been provided by caller in stack. */ 7729 for (i = 0; i < NPARM_REGS(SImode); i++) 7730 { 7731 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1; 7732 7733 if (i >= (NPARM_REGS(SImode) 7734 - crtl->args.info.arg_count[(int) SH_ARG_INT] 7735 )) 7736 break; 7737 push (rn); 7738 stack_usage += GET_MODE_SIZE (SImode); 7739 } 7740 } 7741 } 7742 7743 /* If we're supposed to switch stacks at function entry, do so now. */ 7744 if (sp_switch_attr) 7745 { 7746 rtx lab, newsrc; 7747 /* The argument specifies a variable holding the address of the 7748 stack the interrupt function should switch to/from at entry/exit. */ 7749 tree arg = TREE_VALUE ( TREE_VALUE (sp_switch_attr)); 7750 const char *s 7751 = ggc_strdup (TREE_STRING_POINTER (arg)); 7752 rtx sp_switch = gen_rtx_SYMBOL_REF (Pmode, s); 7753 7754 lab = add_constant (sp_switch, SImode, 0); 7755 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab); 7756 7757 emit_insn (gen_sp_switch_1 (newsrc)); 7758 } 7759 7760 d = calc_live_regs (&live_regs_mask); 7761 /* ??? Maybe we could save some switching if we can move a mode switch 7762 that already happens to be at the function start into the prologue. */ 7763 if (target_flags != save_flags && ! current_function_interrupt) 7764 emit_insn (gen_toggle_sz ()); 7765 7766 if (TARGET_SH5) 7767 { 7768 int offset_base, offset; 7769 rtx r0 = NULL_RTX; 7770 int offset_in_r0 = -1; 7771 int sp_in_r0 = 0; 7772 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask); 7773 int total_size, save_size; 7774 save_schedule schedule; 7775 save_entry *entry; 7776 int *tmp_pnt; 7777 7778 if (call_really_used_regs[R0_REG] && ! fixed_regs[R0_REG] 7779 && ! current_function_interrupt) 7780 r0 = gen_rtx_REG (Pmode, R0_REG); 7781 7782 /* D is the actual number of bytes that we need for saving registers, 7783 however, in initial_elimination_offset we have committed to using 7784 an additional TREGS_SPACE amount of bytes - in order to keep both 7785 addresses to arguments supplied by the caller and local variables 7786 valid, we must keep this gap. Place it between the incoming 7787 arguments and the actually saved registers in a bid to optimize 7788 locality of reference. */ 7789 total_size = d + tregs_space; 7790 total_size += rounded_frame_size (total_size); 7791 save_size = total_size - rounded_frame_size (d); 7792 if (save_size % (STACK_BOUNDARY / BITS_PER_UNIT)) 7793 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT) 7794 - save_size % (STACK_BOUNDARY / BITS_PER_UNIT)); 7795 7796 /* If adjusting the stack in a single step costs nothing extra, do so. 7797 I.e. either if a single addi is enough, or we need a movi anyway, 7798 and we don't exceed the maximum offset range (the test for the 7799 latter is conservative for simplicity). */ 7800 if (TARGET_SHMEDIA 7801 && (CONST_OK_FOR_I10 (-total_size) 7802 || (! CONST_OK_FOR_I10 (-(save_size + d_rounding)) 7803 && total_size <= 2044))) 7804 d_rounding = total_size - save_size; 7805 7806 offset_base = d + d_rounding; 7807 7808 output_stack_adjust (-(save_size + d_rounding), stack_pointer_rtx, 7809 0, NULL, true); 7810 stack_usage += save_size + d_rounding; 7811 7812 sh5_schedule_saves (&live_regs_mask, &schedule, offset_base); 7813 tmp_pnt = schedule.temps; 7814 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++) 7815 { 7816 machine_mode mode = (machine_mode) entry->mode; 7817 unsigned int reg = entry->reg; 7818 rtx reg_rtx, mem_rtx, pre_dec = NULL_RTX; 7819 rtx orig_reg_rtx; 7820 7821 offset = entry->offset; 7822 7823 reg_rtx = gen_rtx_REG (mode, reg); 7824 7825 mem_rtx = gen_frame_mem (mode, 7826 gen_rtx_PLUS (Pmode, 7827 stack_pointer_rtx, 7828 GEN_INT (offset))); 7829 7830 if (!memory_address_p (mode, XEXP (mem_rtx, 0))) 7831 { 7832 gcc_assert (r0); 7833 mem_rtx = NULL_RTX; 7834 } 7835 7836 if (HAVE_PRE_DECREMENT 7837 && (offset_in_r0 - offset == GET_MODE_SIZE (mode) 7838 || mem_rtx == NULL_RTX 7839 || reg == PR_REG || SPECIAL_REGISTER_P (reg))) 7840 { 7841 pre_dec = gen_frame_mem (mode, gen_rtx_PRE_DEC (Pmode, r0)); 7842 7843 if (!memory_address_p (mode, XEXP (pre_dec, 0))) 7844 pre_dec = NULL_RTX; 7845 else 7846 { 7847 mem_rtx = NULL_RTX; 7848 offset += GET_MODE_SIZE (mode); 7849 } 7850 } 7851 7852 if (mem_rtx != NULL_RTX) 7853 goto addr_ok; 7854 7855 if (offset_in_r0 == -1) 7856 { 7857 emit_move_insn (r0, GEN_INT (offset)); 7858 offset_in_r0 = offset; 7859 } 7860 else if (offset != offset_in_r0) 7861 { 7862 emit_move_insn (r0, 7863 gen_rtx_PLUS 7864 (Pmode, r0, 7865 GEN_INT (offset - offset_in_r0))); 7866 offset_in_r0 += offset - offset_in_r0; 7867 } 7868 7869 if (pre_dec != NULL_RTX) 7870 { 7871 if (! sp_in_r0) 7872 { 7873 emit_move_insn (r0, 7874 gen_rtx_PLUS 7875 (Pmode, r0, stack_pointer_rtx)); 7876 sp_in_r0 = 1; 7877 } 7878 7879 offset -= GET_MODE_SIZE (mode); 7880 offset_in_r0 -= GET_MODE_SIZE (mode); 7881 7882 mem_rtx = pre_dec; 7883 } 7884 else if (sp_in_r0) 7885 mem_rtx = gen_frame_mem (mode, r0); 7886 else 7887 mem_rtx = gen_frame_mem (mode, 7888 gen_rtx_PLUS (Pmode, 7889 stack_pointer_rtx, 7890 r0)); 7891 7892 /* We must not use an r0-based address for target-branch 7893 registers or for special registers without pre-dec 7894 memory addresses, since we store their values in r0 7895 first. */ 7896 gcc_assert (!TARGET_REGISTER_P (reg) 7897 && ((reg != PR_REG && !SPECIAL_REGISTER_P (reg)) 7898 || mem_rtx == pre_dec)); 7899 7900 addr_ok: 7901 orig_reg_rtx = reg_rtx; 7902 if (TARGET_REGISTER_P (reg) 7903 || ((reg == PR_REG || SPECIAL_REGISTER_P (reg)) 7904 && mem_rtx != pre_dec)) 7905 { 7906 rtx tmp_reg = gen_rtx_REG (GET_MODE (reg_rtx), *tmp_pnt); 7907 7908 emit_move_insn (tmp_reg, reg_rtx); 7909 7910 if (REGNO (tmp_reg) == R0_REG) 7911 { 7912 offset_in_r0 = -1; 7913 sp_in_r0 = 0; 7914 gcc_assert (!refers_to_regno_p (R0_REG, mem_rtx)); 7915 } 7916 7917 if (*++tmp_pnt <= 0) 7918 tmp_pnt = schedule.temps; 7919 7920 reg_rtx = tmp_reg; 7921 } 7922 { 7923 rtx insn; 7924 7925 /* Mark as interesting for dwarf cfi generator */ 7926 insn = emit_move_insn (mem_rtx, reg_rtx); 7927 RTX_FRAME_RELATED_P (insn) = 1; 7928 /* If we use an intermediate register for the save, we can't 7929 describe this exactly in cfi as a copy of the to-be-saved 7930 register into the temporary register and then the temporary 7931 register on the stack, because the temporary register can 7932 have a different natural size than the to-be-saved register. 7933 Thus, we gloss over the intermediate copy and pretend we do 7934 a direct save from the to-be-saved register. */ 7935 if (REGNO (reg_rtx) != reg) 7936 { 7937 rtx set; 7938 7939 set = gen_rtx_SET (VOIDmode, mem_rtx, orig_reg_rtx); 7940 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set); 7941 } 7942 7943 if (TARGET_SHCOMPACT && (offset_in_r0 != -1)) 7944 { 7945 rtx reg_rtx = gen_rtx_REG (mode, reg); 7946 rtx set; 7947 rtx mem_rtx = gen_frame_mem (mode, 7948 gen_rtx_PLUS (Pmode, 7949 stack_pointer_rtx, 7950 GEN_INT (offset))); 7951 7952 set = gen_rtx_SET (VOIDmode, mem_rtx, reg_rtx); 7953 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set); 7954 } 7955 } 7956 } 7957 7958 gcc_assert (entry->offset == d_rounding); 7959 } 7960 else 7961 { 7962 push_regs (&live_regs_mask, current_function_interrupt); 7963 stack_usage += d; 7964 } 7965 7966 if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)) 7967 emit_insn (gen_GOTaddr2picreg (const0_rtx)); 7968 7969 if (SHMEDIA_REGS_STACK_ADJUST ()) 7970 { 7971 /* This must NOT go through the PLT, otherwise mach and macl 7972 may be clobbered. */ 7973 function_symbol (gen_rtx_REG (Pmode, R0_REG), 7974 (TARGET_FPU_ANY 7975 ? "__GCC_push_shmedia_regs" 7976 : "__GCC_push_shmedia_regs_nofpu"), SFUNC_GOT); 7977 emit_insn (gen_shmedia_save_restore_regs_compact 7978 (GEN_INT (-SHMEDIA_REGS_STACK_ADJUST ()))); 7979 } 7980 7981 if (target_flags != save_flags && ! current_function_interrupt) 7982 emit_insn (gen_toggle_sz ()); 7983 7984 target_flags = save_flags; 7985 7986 output_stack_adjust (-rounded_frame_size (d) + d_rounding, 7987 stack_pointer_rtx, 0, NULL, true); 7988 stack_usage += rounded_frame_size (d) - d_rounding; 7989 7990 if (frame_pointer_needed) 7991 frame_insn (GEN_MOV (hard_frame_pointer_rtx, stack_pointer_rtx)); 7992 7993 if (TARGET_SHCOMPACT 7994 && (crtl->args.info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1))) 7995 { 7996 /* This must NOT go through the PLT, otherwise mach and macl 7997 may be clobbered. */ 7998 function_symbol (gen_rtx_REG (Pmode, R0_REG), 7999 "__GCC_shcompact_incoming_args", SFUNC_GOT); 8000 emit_insn (gen_shcompact_incoming_args ()); 8001 } 8002 8003 /* If we are profiling, make sure no instructions are scheduled before 8004 the call to mcount. Similarly if some call instructions are swapped 8005 before frame related insns, it'll confuse the unwinder because 8006 currently SH has no unwind info for function epilogues. */ 8007 if (crtl->profile || flag_exceptions || flag_unwind_tables) 8008 emit_insn (gen_blockage ()); 8009 8010 if (flag_stack_usage_info) 8011 current_function_static_stack_size = stack_usage; 8012} 8013 8014/* Expand code for the function epilogue. */ 8015void 8016sh_expand_epilogue (bool sibcall_p) 8017{ 8018 HARD_REG_SET live_regs_mask; 8019 int d, i; 8020 int d_rounding = 0; 8021 8022 int save_flags = target_flags; 8023 int frame_size, save_size; 8024 int fpscr_deferred = 0; 8025 int e = sibcall_p ? -1 : 1; 8026 8027 d = calc_live_regs (&live_regs_mask); 8028 8029 save_size = d; 8030 frame_size = rounded_frame_size (d); 8031 8032 if (TARGET_SH5) 8033 { 8034 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask); 8035 int total_size; 8036 if (d % (STACK_BOUNDARY / BITS_PER_UNIT)) 8037 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT) 8038 - d % (STACK_BOUNDARY / BITS_PER_UNIT)); 8039 8040 total_size = d + tregs_space; 8041 total_size += rounded_frame_size (total_size); 8042 save_size = total_size - frame_size; 8043 8044 /* If adjusting the stack in a single step costs nothing extra, do so. 8045 I.e. either if a single addi is enough, or we need a movi anyway, 8046 and we don't exceed the maximum offset range (the test for the 8047 latter is conservative for simplicity). */ 8048 if (TARGET_SHMEDIA 8049 && ! frame_pointer_needed 8050 && (CONST_OK_FOR_I10 (total_size) 8051 || (! CONST_OK_FOR_I10 (save_size + d_rounding) 8052 && total_size <= 2044))) 8053 d_rounding = frame_size; 8054 8055 frame_size -= d_rounding; 8056 } 8057 8058 if (frame_pointer_needed) 8059 { 8060 /* We must avoid scheduling the epilogue with previous basic blocks. 8061 See PR/18032 and PR/40313. */ 8062 emit_insn (gen_blockage ()); 8063 output_stack_adjust (frame_size, hard_frame_pointer_rtx, e, 8064 &live_regs_mask, true); 8065 8066 /* We must avoid moving the stack pointer adjustment past code 8067 which reads from the local frame, else an interrupt could 8068 occur after the SP adjustment and clobber data in the local 8069 frame. */ 8070 emit_insn (gen_blockage ()); 8071 frame_insn (GEN_MOV (stack_pointer_rtx, hard_frame_pointer_rtx)); 8072 } 8073 else if (frame_size) 8074 { 8075 /* We must avoid moving the stack pointer adjustment past code 8076 which reads from the local frame, else an interrupt could 8077 occur after the SP adjustment and clobber data in the local 8078 frame. */ 8079 emit_insn (gen_blockage ()); 8080 output_stack_adjust (frame_size, stack_pointer_rtx, e, 8081 &live_regs_mask, true); 8082 } 8083 8084 if (SHMEDIA_REGS_STACK_ADJUST ()) 8085 { 8086 function_symbol (gen_rtx_REG (Pmode, R0_REG), 8087 (TARGET_FPU_ANY 8088 ? "__GCC_pop_shmedia_regs" 8089 : "__GCC_pop_shmedia_regs_nofpu"), SFUNC_GOT); 8090 /* This must NOT go through the PLT, otherwise mach and macl 8091 may be clobbered. */ 8092 emit_insn (gen_shmedia_save_restore_regs_compact 8093 (GEN_INT (SHMEDIA_REGS_STACK_ADJUST ()))); 8094 } 8095 8096 /* Pop all the registers. */ 8097 8098 if (target_flags != save_flags && ! current_function_interrupt) 8099 emit_insn (gen_toggle_sz ()); 8100 if (TARGET_SH5) 8101 { 8102 int offset_base, offset; 8103 int offset_in_r0 = -1; 8104 int sp_in_r0 = 0; 8105 rtx r0 = gen_rtx_REG (Pmode, R0_REG); 8106 save_schedule schedule; 8107 save_entry *entry; 8108 int *tmp_pnt; 8109 8110 entry = sh5_schedule_saves (&live_regs_mask, &schedule, d_rounding); 8111 offset_base = -entry[1].offset + d_rounding; 8112 tmp_pnt = schedule.temps; 8113 for (; entry->mode != VOIDmode; entry--) 8114 { 8115 machine_mode mode = (machine_mode) entry->mode; 8116 int reg = entry->reg; 8117 rtx reg_rtx, mem_rtx, post_inc = NULL_RTX; 8118 8119 offset = offset_base + entry->offset; 8120 reg_rtx = gen_rtx_REG (mode, reg); 8121 8122 mem_rtx = gen_frame_mem (mode, 8123 gen_rtx_PLUS (Pmode, 8124 stack_pointer_rtx, 8125 GEN_INT (offset))); 8126 8127 if (!memory_address_p (mode, XEXP (mem_rtx, 0))) 8128 mem_rtx = NULL_RTX; 8129 8130 if (HAVE_POST_INCREMENT 8131 && (offset == offset_in_r0 8132 || (offset + GET_MODE_SIZE (mode) != d + d_rounding 8133 && mem_rtx == NULL_RTX) 8134 || reg == PR_REG || SPECIAL_REGISTER_P (reg))) 8135 { 8136 post_inc = gen_frame_mem (mode, gen_rtx_POST_INC (Pmode, r0)); 8137 8138 if (!memory_address_p (mode, XEXP (post_inc, 0))) 8139 post_inc = NULL_RTX; 8140 else 8141 mem_rtx = NULL_RTX; 8142 } 8143 8144 if (mem_rtx != NULL_RTX) 8145 goto addr_ok; 8146 8147 if (offset_in_r0 == -1) 8148 { 8149 emit_move_insn (r0, GEN_INT (offset)); 8150 offset_in_r0 = offset; 8151 } 8152 else if (offset != offset_in_r0) 8153 { 8154 emit_move_insn (r0, 8155 gen_rtx_PLUS 8156 (Pmode, r0, 8157 GEN_INT (offset - offset_in_r0))); 8158 offset_in_r0 += offset - offset_in_r0; 8159 } 8160 8161 if (post_inc != NULL_RTX) 8162 { 8163 if (! sp_in_r0) 8164 { 8165 emit_move_insn (r0, 8166 gen_rtx_PLUS 8167 (Pmode, r0, stack_pointer_rtx)); 8168 sp_in_r0 = 1; 8169 } 8170 8171 mem_rtx = post_inc; 8172 8173 offset_in_r0 += GET_MODE_SIZE (mode); 8174 } 8175 else if (sp_in_r0) 8176 mem_rtx = gen_frame_mem (mode, r0); 8177 else 8178 mem_rtx = gen_frame_mem (mode, 8179 gen_rtx_PLUS (Pmode, 8180 stack_pointer_rtx, 8181 r0)); 8182 8183 gcc_assert ((reg != PR_REG && !SPECIAL_REGISTER_P (reg)) 8184 || mem_rtx == post_inc); 8185 8186 addr_ok: 8187 if ((reg == PR_REG || SPECIAL_REGISTER_P (reg)) 8188 && mem_rtx != post_inc) 8189 { 8190 emit_move_insn (r0, mem_rtx); 8191 mem_rtx = r0; 8192 } 8193 else if (TARGET_REGISTER_P (reg)) 8194 { 8195 rtx tmp_reg = gen_rtx_REG (mode, *tmp_pnt); 8196 8197 /* Give the scheduler a bit of freedom by using up to 8198 MAX_TEMPS registers in a round-robin fashion. */ 8199 emit_move_insn (tmp_reg, mem_rtx); 8200 mem_rtx = tmp_reg; 8201 if (*++tmp_pnt < 0) 8202 tmp_pnt = schedule.temps; 8203 } 8204 8205 emit_move_insn (reg_rtx, mem_rtx); 8206 } 8207 8208 gcc_assert (entry->offset + offset_base == d + d_rounding); 8209 } 8210 else /* ! TARGET_SH5 */ 8211 { 8212 int last_reg; 8213 8214 save_size = 0; 8215 /* For an ISR with RESBANK attribute assigned, don't pop PR 8216 register. */ 8217 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG) 8218 && !sh_cfun_resbank_handler_p ()) 8219 { 8220 if (!frame_pointer_needed) 8221 emit_insn (gen_blockage ()); 8222 pop (PR_REG); 8223 } 8224 8225 /* Banked registers are popped first to avoid being scheduled in the 8226 delay slot. RTE switches banks before the ds instruction. */ 8227 if (current_function_interrupt) 8228 { 8229 bool use_movml = false; 8230 8231 if (TARGET_SH2A) 8232 { 8233 unsigned int count = 0; 8234 8235 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++) 8236 if (TEST_HARD_REG_BIT (live_regs_mask, i)) 8237 count++; 8238 else 8239 break; 8240 8241 /* Use movml when all banked register are poped. */ 8242 if (count == LAST_BANKED_REG - FIRST_BANKED_REG + 1) 8243 use_movml = true; 8244 } 8245 8246 if (sh_cfun_resbank_handler_p ()) 8247 ; /* Do nothing. */ 8248 else if (use_movml) 8249 { 8250 rtx sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM); 8251 8252 /* We must avoid scheduling multiple load insn with another 8253 insns. */ 8254 emit_insn (gen_blockage ()); 8255 emit_insn (gen_movml_pop_banked (sp_reg)); 8256 emit_insn (gen_blockage ()); 8257 } 8258 else 8259 for (i = LAST_BANKED_REG; i >= FIRST_BANKED_REG; i--) 8260 if (TEST_HARD_REG_BIT (live_regs_mask, i)) 8261 pop (i); 8262 8263 last_reg = FIRST_PSEUDO_REGISTER - LAST_BANKED_REG - 1; 8264 } 8265 else 8266 last_reg = FIRST_PSEUDO_REGISTER; 8267 8268 for (i = 0; i < last_reg; i++) 8269 { 8270 int j = (FIRST_PSEUDO_REGISTER - 1) - i; 8271 8272 if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD 8273 && hard_reg_set_intersect_p (live_regs_mask, 8274 reg_class_contents[DF_REGS])) 8275 fpscr_deferred = 1; 8276 /* For an ISR with RESBANK attribute assigned, don't pop 8277 following registers, R0-R14, MACH, MACL and GBR. */ 8278 else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j) 8279 && ! (sh_cfun_resbank_handler_p () 8280 && ((j >= FIRST_GENERAL_REG 8281 && j < LAST_GENERAL_REG) 8282 || j == MACH_REG 8283 || j == MACL_REG 8284 || j == GBR_REG))) 8285 pop (j); 8286 8287 if (j == FIRST_FP_REG && fpscr_deferred) 8288 pop (FPSCR_REG); 8289 } 8290 } 8291 if (target_flags != save_flags && ! current_function_interrupt) 8292 emit_insn (gen_toggle_sz ()); 8293 target_flags = save_flags; 8294 8295 output_stack_adjust (crtl->args.pretend_args_size 8296 + save_size + d_rounding 8297 + crtl->args.info.stack_regs * 8, 8298 stack_pointer_rtx, e, NULL, true); 8299 8300 if (crtl->calls_eh_return) 8301 emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx, 8302 EH_RETURN_STACKADJ_RTX)); 8303 8304 /* Switch back to the normal stack if necessary. */ 8305 if (lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl))) 8306 emit_insn (gen_sp_switch_2 ()); 8307 8308 /* Tell flow the insn that pops PR isn't dead. */ 8309 /* PR_REG will never be live in SHmedia mode, and we don't need to 8310 USE PR_MEDIA_REG, since it will be explicitly copied to TR0_REG 8311 by the return pattern. */ 8312 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG)) 8313 emit_use (gen_rtx_REG (SImode, PR_REG)); 8314} 8315 8316/* Emit code to change the current function's return address to RA. 8317 TEMP is available as a scratch register, if needed. */ 8318void 8319sh_set_return_address (rtx ra, rtx tmp) 8320{ 8321 HARD_REG_SET live_regs_mask; 8322 int d; 8323 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG; 8324 int pr_offset; 8325 8326 d = calc_live_regs (&live_regs_mask); 8327 8328 /* If pr_reg isn't life, we can set it (or the register given in 8329 sh_media_register_for_return) directly. */ 8330 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg)) 8331 { 8332 rtx rr; 8333 8334 if (TARGET_SHMEDIA) 8335 { 8336 int rr_regno = sh_media_register_for_return (); 8337 8338 if (rr_regno < 0) 8339 rr_regno = pr_reg; 8340 8341 rr = gen_rtx_REG (DImode, rr_regno); 8342 } 8343 else 8344 rr = gen_rtx_REG (SImode, pr_reg); 8345 8346 emit_insn (GEN_MOV (rr, ra)); 8347 /* Tell flow the register for return isn't dead. */ 8348 emit_use (rr); 8349 return; 8350 } 8351 8352 if (TARGET_SH5) 8353 { 8354 int offset; 8355 save_schedule schedule; 8356 save_entry *entry; 8357 8358 entry = sh5_schedule_saves (&live_regs_mask, &schedule, 0); 8359 offset = entry[1].offset; 8360 for (; entry->mode != VOIDmode; entry--) 8361 if (entry->reg == pr_reg) 8362 goto found; 8363 8364 /* We can't find pr register. */ 8365 gcc_unreachable (); 8366 8367 found: 8368 offset = entry->offset - offset; 8369 pr_offset = (rounded_frame_size (d) + offset 8370 + SHMEDIA_REGS_STACK_ADJUST ()); 8371 } 8372 else 8373 pr_offset = rounded_frame_size (d); 8374 8375 emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset))); 8376 8377 if (frame_pointer_needed) 8378 emit_insn (GEN_ADD3 (tmp, tmp, hard_frame_pointer_rtx)); 8379 else 8380 emit_insn (GEN_ADD3 (tmp, tmp, stack_pointer_rtx)); 8381 8382 tmp = gen_frame_mem (Pmode, tmp); 8383 emit_insn (GEN_MOV (tmp, ra)); 8384 /* Tell this store isn't dead. */ 8385 emit_use (tmp); 8386} 8387 8388/* Clear variables at function end. */ 8389static void 8390sh_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED, 8391 HOST_WIDE_INT size ATTRIBUTE_UNUSED) 8392{ 8393} 8394 8395static rtx 8396sh_builtin_saveregs (void) 8397{ 8398 /* First unnamed integer register. */ 8399 int first_intreg = crtl->args.info.arg_count[(int) SH_ARG_INT]; 8400 /* Number of integer registers we need to save. */ 8401 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg); 8402 /* First unnamed SFmode float reg */ 8403 int first_floatreg = crtl->args.info.arg_count[(int) SH_ARG_FLOAT]; 8404 /* Number of SFmode float regs to save. */ 8405 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg); 8406 rtx regbuf, fpregs; 8407 int bufsize, regno; 8408 alias_set_type alias_set; 8409 8410 if (TARGET_SH5) 8411 { 8412 if (n_intregs) 8413 { 8414 int pushregs = n_intregs; 8415 8416 while (pushregs < NPARM_REGS (SImode) - 1 8417 && (CALL_COOKIE_INT_REG_GET 8418 (crtl->args.info.call_cookie, 8419 NPARM_REGS (SImode) - pushregs) 8420 == 1)) 8421 { 8422 crtl->args.info.call_cookie 8423 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode) 8424 - pushregs, 1); 8425 pushregs++; 8426 } 8427 8428 if (pushregs == NPARM_REGS (SImode)) 8429 crtl->args.info.call_cookie 8430 |= (CALL_COOKIE_INT_REG (0, 1) 8431 | CALL_COOKIE_STACKSEQ (pushregs - 1)); 8432 else 8433 crtl->args.info.call_cookie 8434 |= CALL_COOKIE_STACKSEQ (pushregs); 8435 8436 crtl->args.pretend_args_size += 8 * n_intregs; 8437 } 8438 if (TARGET_SHCOMPACT) 8439 return const0_rtx; 8440 } 8441 8442 if (! TARGET_SH2E && ! TARGET_SH4 && ! TARGET_SH5) 8443 { 8444 error ("__builtin_saveregs not supported by this subtarget"); 8445 return const0_rtx; 8446 } 8447 8448 if (TARGET_SHMEDIA) 8449 n_floatregs = 0; 8450 8451 /* Allocate block of memory for the regs. */ 8452 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte? 8453 Or can assign_stack_local accept a 0 SIZE argument? */ 8454 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD); 8455 8456 if (TARGET_SHMEDIA) 8457 regbuf = gen_frame_mem (BLKmode, gen_rtx_REG (Pmode, ARG_POINTER_REGNUM)); 8458 else if (n_floatregs & 1) 8459 { 8460 rtx addr; 8461 8462 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0); 8463 addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0)); 8464 emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD))); 8465 regbuf = change_address (regbuf, BLKmode, addr); 8466 } 8467 else if (STACK_BOUNDARY < 64 && TARGET_FPU_DOUBLE && n_floatregs) 8468 { 8469 rtx addr, mask; 8470 8471 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0); 8472 addr = copy_to_mode_reg (Pmode, plus_constant (Pmode, 8473 XEXP (regbuf, 0), 4)); 8474 mask = copy_to_mode_reg (Pmode, GEN_INT (-8)); 8475 emit_insn (gen_andsi3 (addr, addr, mask)); 8476 regbuf = change_address (regbuf, BLKmode, addr); 8477 } 8478 else 8479 regbuf = assign_stack_local (BLKmode, bufsize, TARGET_FPU_DOUBLE ? 64 : 0); 8480 alias_set = get_varargs_alias_set (); 8481 set_mem_alias_set (regbuf, alias_set); 8482 8483 /* Save int args. 8484 This is optimized to only save the regs that are necessary. Explicitly 8485 named args need not be saved. */ 8486 if (n_intregs > 0) 8487 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg, 8488 adjust_address (regbuf, BLKmode, 8489 n_floatregs * UNITS_PER_WORD), 8490 n_intregs); 8491 8492 if (TARGET_SHMEDIA) 8493 /* Return the address of the regbuf. */ 8494 return XEXP (regbuf, 0); 8495 8496 /* Save float args. 8497 This is optimized to only save the regs that are necessary. Explicitly 8498 named args need not be saved. 8499 We explicitly build a pointer to the buffer because it halves the insn 8500 count when not optimizing (otherwise the pointer is built for each reg 8501 saved). 8502 We emit the moves in reverse order so that we can use predecrement. */ 8503 8504 fpregs = copy_to_mode_reg (Pmode, 8505 plus_constant (Pmode, XEXP (regbuf, 0), 8506 n_floatregs * UNITS_PER_WORD)); 8507 if (TARGET_SH4 || TARGET_SH2A_DOUBLE) 8508 { 8509 rtx mem; 8510 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2) 8511 { 8512 emit_insn (gen_addsi3 (fpregs, fpregs, 8513 GEN_INT (-2 * UNITS_PER_WORD))); 8514 mem = change_address (regbuf, DFmode, fpregs); 8515 emit_move_insn (mem, 8516 gen_rtx_REG (DFmode, BASE_ARG_REG (DFmode) + regno)); 8517 } 8518 regno = first_floatreg; 8519 if (regno & 1) 8520 { 8521 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD))); 8522 mem = change_address (regbuf, SFmode, fpregs); 8523 emit_move_insn (mem, 8524 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) 8525 + regno - SH_REG_MSW_OFFSET)); 8526 } 8527 } 8528 else 8529 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--) 8530 { 8531 rtx mem; 8532 8533 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD))); 8534 mem = change_address (regbuf, SFmode, fpregs); 8535 emit_move_insn (mem, 8536 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno)); 8537 } 8538 8539 /* Return the address of the regbuf. */ 8540 return XEXP (regbuf, 0); 8541} 8542 8543/* Define the `__builtin_va_list' type for the ABI. */ 8544static tree 8545sh_build_builtin_va_list (void) 8546{ 8547 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack; 8548 tree record, type_decl; 8549 8550 if (TARGET_SH5 || (! TARGET_SH2E && ! TARGET_SH4) 8551 || TARGET_HITACHI || sh_cfun_attr_renesas_p ()) 8552 return ptr_type_node; 8553 8554 record = (*lang_hooks.types.make_type) (RECORD_TYPE); 8555 type_decl = build_decl (BUILTINS_LOCATION, 8556 TYPE_DECL, get_identifier ("__va_list_tag"), record); 8557 8558 f_next_o = build_decl (BUILTINS_LOCATION, 8559 FIELD_DECL, get_identifier ("__va_next_o"), 8560 ptr_type_node); 8561 f_next_o_limit = build_decl (BUILTINS_LOCATION, 8562 FIELD_DECL, 8563 get_identifier ("__va_next_o_limit"), 8564 ptr_type_node); 8565 f_next_fp = build_decl (BUILTINS_LOCATION, 8566 FIELD_DECL, get_identifier ("__va_next_fp"), 8567 ptr_type_node); 8568 f_next_fp_limit = build_decl (BUILTINS_LOCATION, 8569 FIELD_DECL, 8570 get_identifier ("__va_next_fp_limit"), 8571 ptr_type_node); 8572 f_next_stack = build_decl (BUILTINS_LOCATION, 8573 FIELD_DECL, get_identifier ("__va_next_stack"), 8574 ptr_type_node); 8575 8576 DECL_FIELD_CONTEXT (f_next_o) = record; 8577 DECL_FIELD_CONTEXT (f_next_o_limit) = record; 8578 DECL_FIELD_CONTEXT (f_next_fp) = record; 8579 DECL_FIELD_CONTEXT (f_next_fp_limit) = record; 8580 DECL_FIELD_CONTEXT (f_next_stack) = record; 8581 8582 TYPE_STUB_DECL (record) = type_decl; 8583 TYPE_NAME (record) = type_decl; 8584 TYPE_FIELDS (record) = f_next_o; 8585 DECL_CHAIN (f_next_o) = f_next_o_limit; 8586 DECL_CHAIN (f_next_o_limit) = f_next_fp; 8587 DECL_CHAIN (f_next_fp) = f_next_fp_limit; 8588 DECL_CHAIN (f_next_fp_limit) = f_next_stack; 8589 8590 layout_type (record); 8591 8592 return record; 8593} 8594 8595/* Implement `va_start' for varargs and stdarg. */ 8596static void 8597sh_va_start (tree valist, rtx nextarg) 8598{ 8599 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack; 8600 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack; 8601 tree t, u; 8602 int nfp, nint; 8603 8604 if (TARGET_SH5) 8605 { 8606 expand_builtin_saveregs (); 8607 std_expand_builtin_va_start (valist, nextarg); 8608 return; 8609 } 8610 8611 if ((! TARGET_SH2E && ! TARGET_SH4) 8612 || TARGET_HITACHI || sh_cfun_attr_renesas_p ()) 8613 { 8614 std_expand_builtin_va_start (valist, nextarg); 8615 return; 8616 } 8617 8618 f_next_o = TYPE_FIELDS (va_list_type_node); 8619 f_next_o_limit = DECL_CHAIN (f_next_o); 8620 f_next_fp = DECL_CHAIN (f_next_o_limit); 8621 f_next_fp_limit = DECL_CHAIN (f_next_fp); 8622 f_next_stack = DECL_CHAIN (f_next_fp_limit); 8623 8624 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o, 8625 NULL_TREE); 8626 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit), 8627 valist, f_next_o_limit, NULL_TREE); 8628 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp, 8629 NULL_TREE); 8630 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit), 8631 valist, f_next_fp_limit, NULL_TREE); 8632 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack), 8633 valist, f_next_stack, NULL_TREE); 8634 8635 /* Call __builtin_saveregs. */ 8636 u = make_tree (sizetype, expand_builtin_saveregs ()); 8637 u = fold_convert (ptr_type_node, u); 8638 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp, u); 8639 TREE_SIDE_EFFECTS (t) = 1; 8640 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 8641 8642 nfp = crtl->args.info.arg_count[SH_ARG_FLOAT]; 8643 if (nfp < 8) 8644 nfp = 8 - nfp; 8645 else 8646 nfp = 0; 8647 u = fold_build_pointer_plus_hwi (u, UNITS_PER_WORD * nfp); 8648 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp_limit, u); 8649 TREE_SIDE_EFFECTS (t) = 1; 8650 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 8651 8652 t = build2 (MODIFY_EXPR, ptr_type_node, next_o, u); 8653 TREE_SIDE_EFFECTS (t) = 1; 8654 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 8655 8656 nint = crtl->args.info.arg_count[SH_ARG_INT]; 8657 if (nint < 4) 8658 nint = 4 - nint; 8659 else 8660 nint = 0; 8661 u = fold_build_pointer_plus_hwi (u, UNITS_PER_WORD * nint); 8662 t = build2 (MODIFY_EXPR, ptr_type_node, next_o_limit, u); 8663 TREE_SIDE_EFFECTS (t) = 1; 8664 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 8665 8666 u = make_tree (ptr_type_node, nextarg); 8667 t = build2 (MODIFY_EXPR, ptr_type_node, next_stack, u); 8668 TREE_SIDE_EFFECTS (t) = 1; 8669 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 8670} 8671 8672/* TYPE is a RECORD_TYPE. If there is only a single nonzero-sized 8673 member, return it. */ 8674static tree 8675find_sole_member (tree type) 8676{ 8677 tree field, member = NULL_TREE; 8678 8679 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field)) 8680 { 8681 if (TREE_CODE (field) != FIELD_DECL) 8682 continue; 8683 if (!DECL_SIZE (field)) 8684 return NULL_TREE; 8685 if (integer_zerop (DECL_SIZE (field))) 8686 continue; 8687 if (member) 8688 return NULL_TREE; 8689 member = field; 8690 } 8691 return member; 8692} 8693 8694/* Implement `va_arg'. */ 8695static tree 8696sh_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p, 8697 gimple_seq *post_p ATTRIBUTE_UNUSED) 8698{ 8699 HOST_WIDE_INT size, rsize; 8700 tree tmp, pptr_type_node; 8701 tree addr, lab_over = NULL, result = NULL; 8702 bool pass_by_ref; 8703 tree eff_type; 8704 8705 if (!VOID_TYPE_P (type)) 8706 pass_by_ref = targetm.calls.must_pass_in_stack (TYPE_MODE (type), type); 8707 else 8708 pass_by_ref = false; 8709 8710 if (pass_by_ref) 8711 type = build_pointer_type (type); 8712 8713 size = int_size_in_bytes (type); 8714 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD; 8715 pptr_type_node = build_pointer_type (ptr_type_node); 8716 8717 if (! TARGET_SH5 && (TARGET_SH2E || TARGET_SH4) 8718 && ! (TARGET_HITACHI || sh_cfun_attr_renesas_p ())) 8719 { 8720 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack; 8721 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack; 8722 int pass_as_float; 8723 tree lab_false; 8724 tree member; 8725 8726 f_next_o = TYPE_FIELDS (va_list_type_node); 8727 f_next_o_limit = DECL_CHAIN (f_next_o); 8728 f_next_fp = DECL_CHAIN (f_next_o_limit); 8729 f_next_fp_limit = DECL_CHAIN (f_next_fp); 8730 f_next_stack = DECL_CHAIN (f_next_fp_limit); 8731 8732 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o, 8733 NULL_TREE); 8734 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit), 8735 valist, f_next_o_limit, NULL_TREE); 8736 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp), 8737 valist, f_next_fp, NULL_TREE); 8738 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit), 8739 valist, f_next_fp_limit, NULL_TREE); 8740 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack), 8741 valist, f_next_stack, NULL_TREE); 8742 8743 /* Structures with a single member with a distinct mode are passed 8744 like their member. This is relevant if the latter has a REAL_TYPE 8745 or COMPLEX_TYPE type. */ 8746 eff_type = type; 8747 while (TREE_CODE (eff_type) == RECORD_TYPE 8748 && (member = find_sole_member (eff_type)) 8749 && (TREE_CODE (TREE_TYPE (member)) == REAL_TYPE 8750 || TREE_CODE (TREE_TYPE (member)) == COMPLEX_TYPE 8751 || TREE_CODE (TREE_TYPE (member)) == RECORD_TYPE)) 8752 { 8753 tree field_type = TREE_TYPE (member); 8754 8755 if (TYPE_MODE (eff_type) == TYPE_MODE (field_type)) 8756 eff_type = field_type; 8757 else 8758 { 8759 gcc_assert ((TYPE_ALIGN (eff_type) 8760 < GET_MODE_ALIGNMENT (TYPE_MODE (field_type))) 8761 || (TYPE_ALIGN (eff_type) 8762 > GET_MODE_BITSIZE (TYPE_MODE (field_type)))); 8763 break; 8764 } 8765 } 8766 8767 if (TARGET_SH4 || TARGET_SH2A_DOUBLE) 8768 { 8769 pass_as_float = ((TREE_CODE (eff_type) == REAL_TYPE && size <= 8) 8770 || (TREE_CODE (eff_type) == COMPLEX_TYPE 8771 && TREE_CODE (TREE_TYPE (eff_type)) == REAL_TYPE 8772 && size <= 16)); 8773 } 8774 else 8775 { 8776 pass_as_float = (TREE_CODE (eff_type) == REAL_TYPE && size == 4); 8777 } 8778 8779 addr = create_tmp_var (pptr_type_node); 8780 lab_false = create_artificial_label (UNKNOWN_LOCATION); 8781 lab_over = create_artificial_label (UNKNOWN_LOCATION); 8782 8783 valist = build_simple_mem_ref (addr); 8784 8785 if (pass_as_float) 8786 { 8787 tree next_fp_tmp = create_tmp_var (TREE_TYPE (f_next_fp)); 8788 tree cmp; 8789 bool is_double = size == 8 && TREE_CODE (eff_type) == REAL_TYPE; 8790 8791 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_fp)); 8792 gimplify_assign (unshare_expr (addr), tmp, pre_p); 8793 8794 gimplify_assign (unshare_expr (next_fp_tmp), valist, pre_p); 8795 tmp = next_fp_limit; 8796 if (size > 4 && !is_double) 8797 tmp = fold_build_pointer_plus_hwi (unshare_expr (tmp), 4 - size); 8798 tmp = build2 (GE_EXPR, boolean_type_node, 8799 unshare_expr (next_fp_tmp), unshare_expr (tmp)); 8800 cmp = build3 (COND_EXPR, void_type_node, tmp, 8801 build1 (GOTO_EXPR, void_type_node, 8802 unshare_expr (lab_false)), NULL_TREE); 8803 if (!is_double) 8804 gimplify_and_add (cmp, pre_p); 8805 8806 if (TYPE_ALIGN (eff_type) > BITS_PER_WORD 8807 || (is_double || size == 16)) 8808 { 8809 tmp = fold_convert (sizetype, next_fp_tmp); 8810 tmp = build2 (BIT_AND_EXPR, sizetype, tmp, 8811 size_int (UNITS_PER_WORD)); 8812 tmp = fold_build_pointer_plus (unshare_expr (next_fp_tmp), tmp); 8813 gimplify_assign (unshare_expr (next_fp_tmp), tmp, pre_p); 8814 } 8815 if (is_double) 8816 gimplify_and_add (cmp, pre_p); 8817 8818#ifdef FUNCTION_ARG_SCmode_WART 8819 if (TYPE_MODE (eff_type) == SCmode 8820 && TARGET_SH4 && TARGET_LITTLE_ENDIAN) 8821 { 8822 tree subtype = TREE_TYPE (eff_type); 8823 tree real, imag; 8824 8825 imag 8826 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL); 8827 imag = get_initialized_tmp_var (imag, pre_p, NULL); 8828 8829 real 8830 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL); 8831 real = get_initialized_tmp_var (real, pre_p, NULL); 8832 8833 result = build2 (COMPLEX_EXPR, eff_type, real, imag); 8834 if (type != eff_type) 8835 result = build1 (VIEW_CONVERT_EXPR, type, result); 8836 result = get_initialized_tmp_var (result, pre_p, NULL); 8837 } 8838#endif /* FUNCTION_ARG_SCmode_WART */ 8839 8840 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over)); 8841 gimplify_and_add (tmp, pre_p); 8842 8843 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false)); 8844 gimplify_and_add (tmp, pre_p); 8845 8846 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack)); 8847 gimplify_assign (unshare_expr (addr), tmp, pre_p); 8848 gimplify_assign (unshare_expr (next_fp_tmp), 8849 unshare_expr (valist), pre_p); 8850 8851 gimplify_assign (unshare_expr (valist), 8852 unshare_expr (next_fp_tmp), post_p); 8853 valist = next_fp_tmp; 8854 } 8855 else 8856 { 8857 tmp = fold_build_pointer_plus_hwi (unshare_expr (next_o), rsize); 8858 tmp = build2 (GT_EXPR, boolean_type_node, tmp, 8859 unshare_expr (next_o_limit)); 8860 tmp = build3 (COND_EXPR, void_type_node, tmp, 8861 build1 (GOTO_EXPR, void_type_node, 8862 unshare_expr (lab_false)), 8863 NULL_TREE); 8864 gimplify_and_add (tmp, pre_p); 8865 8866 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_o)); 8867 gimplify_assign (unshare_expr (addr), tmp, pre_p); 8868 8869 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over)); 8870 gimplify_and_add (tmp, pre_p); 8871 8872 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false)); 8873 gimplify_and_add (tmp, pre_p); 8874 8875 if (size > 4 && ! (TARGET_SH4 || TARGET_SH2A)) 8876 gimplify_assign (unshare_expr (next_o), 8877 unshare_expr (next_o_limit), pre_p); 8878 8879 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack)); 8880 gimplify_assign (unshare_expr (addr), tmp, pre_p); 8881 } 8882 8883 if (!result) 8884 { 8885 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over)); 8886 gimplify_and_add (tmp, pre_p); 8887 } 8888 } 8889 8890 /* ??? In va-sh.h, there had been code to make values larger than 8891 size 8 indirect. This does not match the FUNCTION_ARG macros. */ 8892 8893 tmp = std_gimplify_va_arg_expr (valist, type, pre_p, NULL); 8894 if (result) 8895 { 8896 gimplify_assign (result, tmp, pre_p); 8897 result = build1 (NOP_EXPR, TREE_TYPE (result), result); 8898 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over)); 8899 gimplify_and_add (tmp, pre_p); 8900 } 8901 else 8902 result = tmp; 8903 8904 if (pass_by_ref) 8905 result = build_va_arg_indirect_ref (result); 8906 8907 return result; 8908} 8909 8910/* 64 bit floating points memory transfers are paired single precision loads 8911 or store. So DWARF information needs fixing in little endian (unless 8912 PR=SZ=1 in FPSCR). */ 8913rtx 8914sh_dwarf_register_span (rtx reg) 8915{ 8916 unsigned regno = REGNO (reg); 8917 8918 if (WORDS_BIG_ENDIAN || GET_MODE (reg) != DFmode) 8919 return NULL_RTX; 8920 8921 return 8922 gen_rtx_PARALLEL (VOIDmode, 8923 gen_rtvec (2, 8924 gen_rtx_REG (SFmode, regno + 1), 8925 gen_rtx_REG (SFmode, regno))); 8926} 8927 8928static machine_mode 8929sh_promote_function_mode (const_tree type, machine_mode mode, 8930 int *punsignedp, const_tree funtype, 8931 int for_return) 8932{ 8933 if (sh_promote_prototypes (funtype)) 8934 return promote_mode (type, mode, punsignedp); 8935 else 8936 return default_promote_function_mode (type, mode, punsignedp, funtype, 8937 for_return); 8938} 8939 8940static bool 8941sh_promote_prototypes (const_tree type) 8942{ 8943 if (TARGET_HITACHI) 8944 return false; 8945 if (! type) 8946 return true; 8947 return ! sh_attr_renesas_p (type); 8948} 8949 8950/* Whether an argument must be passed by reference. On SHcompact, we 8951 pretend arguments wider than 32-bits that would have been passed in 8952 registers are passed by reference, so that an SHmedia trampoline 8953 loads them into the full 64-bits registers. */ 8954static int 8955shcompact_byref (const CUMULATIVE_ARGS *cum, machine_mode mode, 8956 const_tree type, bool named) 8957{ 8958 unsigned HOST_WIDE_INT size; 8959 8960 if (type) 8961 size = int_size_in_bytes (type); 8962 else 8963 size = GET_MODE_SIZE (mode); 8964 8965 if (cum->arg_count[SH_ARG_INT] < NPARM_REGS (SImode) 8966 && (!named 8967 || GET_SH_ARG_CLASS (mode) == SH_ARG_INT 8968 || (GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT 8969 && cum->arg_count[SH_ARG_FLOAT] >= NPARM_REGS (SFmode))) 8970 && size > 4 8971 && !SHCOMPACT_FORCE_ON_STACK (mode, type) 8972 && !SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named)) 8973 return size; 8974 else 8975 return 0; 8976} 8977 8978static bool 8979sh_pass_by_reference (cumulative_args_t cum_v, machine_mode mode, 8980 const_tree type, bool named) 8981{ 8982 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); 8983 8984 if (targetm.calls.must_pass_in_stack (mode, type)) 8985 return true; 8986 8987 /* ??? std_gimplify_va_arg_expr passes NULL for cum. That function 8988 wants to know about pass-by-reference semantics for incoming 8989 arguments. */ 8990 if (! cum) 8991 return false; 8992 8993 if (TARGET_SHCOMPACT) 8994 { 8995 cum->byref = shcompact_byref (cum, mode, type, named); 8996 return cum->byref != 0; 8997 } 8998 8999 return false; 9000} 9001 9002static bool 9003sh_callee_copies (cumulative_args_t cum, machine_mode mode, 9004 const_tree type, bool named ATTRIBUTE_UNUSED) 9005{ 9006 /* ??? How can it possibly be correct to return true only on the 9007 caller side of the equation? Is there someplace else in the 9008 sh backend that's magically producing the copies? */ 9009 return (get_cumulative_args (cum)->outgoing 9010 && ((mode == BLKmode ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode)) 9011 % SH_MIN_ALIGN_FOR_CALLEE_COPY == 0)); 9012} 9013 9014/* Round a register number up to a proper boundary for an arg of mode 9015 MODE. 9016 The SH doesn't care about double alignment, so we only 9017 round doubles to even regs when asked to explicitly. */ 9018static int 9019sh_round_reg (const CUMULATIVE_ARGS& cum, machine_mode mode) 9020{ 9021 /* FIXME: This used to be a macro and has been copy pasted into this 9022 function as is. Make this more readable. */ 9023 return 9024 (((TARGET_ALIGN_DOUBLE 9025 || ((TARGET_SH4 || TARGET_SH2A_DOUBLE) 9026 && (mode == DFmode || mode == DCmode) 9027 && cum.arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (mode))) 9028 && GET_MODE_UNIT_SIZE (mode) > UNITS_PER_WORD) 9029 ? (cum.arg_count[(int) GET_SH_ARG_CLASS (mode)] 9030 + (cum.arg_count[(int) GET_SH_ARG_CLASS (mode)] & 1)) 9031 : cum.arg_count[(int) GET_SH_ARG_CLASS (mode)]); 9032} 9033 9034/* Return true if arg of the specified mode should be be passed in a register 9035 or false otherwise. */ 9036static bool 9037sh_pass_in_reg_p (const CUMULATIVE_ARGS& cum, machine_mode mode, 9038 const_tree type) 9039{ 9040 /* FIXME: This used to be a macro and has been copy pasted into this 9041 function as is. Make this more readable. */ 9042 return 9043 ((type == 0 9044 || (! TREE_ADDRESSABLE (type) 9045 && (! (TARGET_HITACHI || cum.renesas_abi) 9046 || ! (AGGREGATE_TYPE_P (type) 9047 || (!TARGET_FPU_ANY 9048 && (GET_MODE_CLASS (mode) == MODE_FLOAT 9049 && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SFmode))))))) 9050 && ! cum.force_mem 9051 && (TARGET_SH2E 9052 ? ((mode) == BLKmode 9053 ? ((cum.arg_count[(int) SH_ARG_INT] * UNITS_PER_WORD 9054 + int_size_in_bytes (type)) 9055 <= NPARM_REGS (SImode) * UNITS_PER_WORD) 9056 : ((sh_round_reg (cum, mode) 9057 + HARD_REGNO_NREGS (BASE_ARG_REG (mode), mode)) 9058 <= NPARM_REGS (mode))) 9059 : sh_round_reg (cum, mode) < NPARM_REGS (mode))); 9060} 9061 9062static int 9063sh_arg_partial_bytes (cumulative_args_t cum_v, machine_mode mode, 9064 tree type, bool named ATTRIBUTE_UNUSED) 9065{ 9066 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); 9067 int words = 0; 9068 9069 if (!TARGET_SH5 9070 && sh_pass_in_reg_p (*cum, mode, type) 9071 && !(TARGET_SH4 || TARGET_SH2A_DOUBLE) 9072 && (sh_round_reg (*cum, mode) 9073 + (mode != BLKmode 9074 ? CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD) 9075 : CEIL (int_size_in_bytes (type), UNITS_PER_WORD)) 9076 > NPARM_REGS (mode))) 9077 words = NPARM_REGS (mode) - sh_round_reg (*cum, mode); 9078 9079 else if (!TARGET_SHCOMPACT 9080 && SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named)) 9081 words = NPARM_REGS (SImode) - cum->arg_count[SH_ARG_INT]; 9082 9083 return words * UNITS_PER_WORD; 9084} 9085 9086 9087/* Define where to put the arguments to a function. 9088 Value is zero to push the argument on the stack, 9089 or a hard register in which to store the argument. 9090 9091 MODE is the argument's machine mode. 9092 TYPE is the data type of the argument (as a tree). 9093 This is null for libcalls where that information may 9094 not be available. 9095 CUM is a variable of type CUMULATIVE_ARGS which gives info about 9096 the preceding args and about the function being called. 9097 NAMED is nonzero if this argument is a named parameter 9098 (otherwise it is an extra parameter matching an ellipsis). 9099 9100 On SH the first args are normally in registers 9101 and the rest are pushed. Any arg that starts within the first 9102 NPARM_REGS words is at least partially passed in a register unless 9103 its data type forbids. */ 9104static rtx 9105sh_function_arg (cumulative_args_t ca_v, machine_mode mode, 9106 const_tree type, bool named) 9107{ 9108 CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v); 9109 9110 if (! TARGET_SH5 && mode == VOIDmode) 9111 return GEN_INT (ca->renesas_abi ? 1 : 0); 9112 9113 if (! TARGET_SH5 9114 && sh_pass_in_reg_p (*ca, mode, type) 9115 && (named || ! (TARGET_HITACHI || ca->renesas_abi))) 9116 { 9117 int regno; 9118 9119 if (mode == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN 9120 && (! FUNCTION_ARG_SCmode_WART || (sh_round_reg (*ca, mode) & 1))) 9121 { 9122 rtx r1 = gen_rtx_EXPR_LIST (VOIDmode, 9123 gen_rtx_REG (SFmode, 9124 BASE_ARG_REG (mode) 9125 + (sh_round_reg (*ca, mode) ^ 1)), 9126 const0_rtx); 9127 rtx r2 = gen_rtx_EXPR_LIST (VOIDmode, 9128 gen_rtx_REG (SFmode, 9129 BASE_ARG_REG (mode) 9130 + ((sh_round_reg (*ca, mode) + 1) ^ 1)), 9131 GEN_INT (4)); 9132 return gen_rtx_PARALLEL(SCmode, gen_rtvec(2, r1, r2)); 9133 } 9134 9135 /* If the alignment of a DF value causes an SF register to be 9136 skipped, we will use that skipped register for the next SF 9137 value. */ 9138 if ((TARGET_HITACHI || ca->renesas_abi) 9139 && ca->free_single_fp_reg 9140 && mode == SFmode) 9141 return gen_rtx_REG (mode, ca->free_single_fp_reg); 9142 9143 regno = (BASE_ARG_REG (mode) + sh_round_reg (*ca, mode)) 9144 ^ (mode == SFmode && TARGET_SH4 9145 && TARGET_LITTLE_ENDIAN 9146 && ! TARGET_HITACHI && ! ca->renesas_abi); 9147 return gen_rtx_REG (mode, regno); 9148 9149 } 9150 9151 if (TARGET_SH5) 9152 { 9153 if (mode == VOIDmode && TARGET_SHCOMPACT) 9154 return GEN_INT (ca->call_cookie); 9155 9156 /* The following test assumes unnamed arguments are promoted to 9157 DFmode. */ 9158 if (mode == SFmode && ca->free_single_fp_reg) 9159 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode, ca->free_single_fp_reg); 9160 9161 if ((GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT) 9162 && (named || ! ca->prototype_p) 9163 && ca->arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (SFmode)) 9164 { 9165 if (! ca->prototype_p && TARGET_SHMEDIA) 9166 return SH5_PROTOTYPELESS_FLOAT_ARG (*ca, mode); 9167 9168 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode, 9169 FIRST_FP_PARM_REG 9170 + ca->arg_count[(int) SH_ARG_FLOAT]); 9171 } 9172 9173 if (ca->arg_count[(int) SH_ARG_INT] < NPARM_REGS (SImode) 9174 && (! TARGET_SHCOMPACT 9175 || (! SHCOMPACT_FORCE_ON_STACK (mode, type) 9176 && ! SH5_WOULD_BE_PARTIAL_NREGS (*ca, mode, 9177 type, named)))) 9178 { 9179 return gen_rtx_REG (mode, (FIRST_PARM_REG 9180 + ca->arg_count[(int) SH_ARG_INT])); 9181 } 9182 9183 return NULL_RTX; 9184 } 9185 9186 return NULL_RTX; 9187} 9188 9189/* Update the data in CUM to advance over an argument 9190 of mode MODE and data type TYPE. 9191 (TYPE is null for libcalls where that information may not be 9192 available.) */ 9193static void 9194sh_function_arg_advance (cumulative_args_t ca_v, machine_mode mode, 9195 const_tree type, bool named) 9196{ 9197 CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v); 9198 9199 if (ca->force_mem) 9200 ca->force_mem = 0; 9201 else if (TARGET_SH5) 9202 { 9203 const_tree type2 = (ca->byref && type 9204 ? TREE_TYPE (type) 9205 : type); 9206 machine_mode mode2 = (ca->byref && type 9207 ? TYPE_MODE (type2) 9208 : mode); 9209 int dwords = ((ca->byref 9210 ? ca->byref 9211 : mode2 == BLKmode 9212 ? int_size_in_bytes (type2) 9213 : GET_MODE_SIZE (mode2)) + 7) / 8; 9214 int numregs = MIN (dwords, NPARM_REGS (SImode) 9215 - ca->arg_count[(int) SH_ARG_INT]); 9216 9217 if (numregs) 9218 { 9219 ca->arg_count[(int) SH_ARG_INT] += numregs; 9220 if (TARGET_SHCOMPACT 9221 && SHCOMPACT_FORCE_ON_STACK (mode2, type2)) 9222 { 9223 ca->call_cookie 9224 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT] 9225 - numregs, 1); 9226 /* N.B. We want this also for outgoing. */ 9227 ca->stack_regs += numregs; 9228 } 9229 else if (ca->byref) 9230 { 9231 if (! ca->outgoing) 9232 ca->stack_regs += numregs; 9233 ca->byref_regs += numregs; 9234 ca->byref = 0; 9235 do 9236 ca->call_cookie 9237 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT] 9238 - numregs, 2); 9239 while (--numregs); 9240 ca->call_cookie 9241 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT] 9242 - 1, 1); 9243 } 9244 else if (dwords > numregs) 9245 { 9246 int pushregs = numregs; 9247 9248 if (TARGET_SHCOMPACT) 9249 ca->stack_regs += numregs; 9250 while (pushregs < NPARM_REGS (SImode) - 1 9251 && (CALL_COOKIE_INT_REG_GET 9252 (ca->call_cookie, 9253 NPARM_REGS (SImode) - pushregs) 9254 == 1)) 9255 { 9256 ca->call_cookie 9257 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode) 9258 - pushregs, 1); 9259 pushregs++; 9260 } 9261 if (numregs == NPARM_REGS (SImode)) 9262 ca->call_cookie 9263 |= CALL_COOKIE_INT_REG (0, 1) 9264 | CALL_COOKIE_STACKSEQ (numregs - 1); 9265 else 9266 ca->call_cookie 9267 |= CALL_COOKIE_STACKSEQ (numregs); 9268 } 9269 } 9270 if (GET_SH_ARG_CLASS (mode2) == SH_ARG_FLOAT 9271 && (named || ! ca->prototype_p)) 9272 { 9273 if (mode2 == SFmode && ca->free_single_fp_reg) 9274 ca->free_single_fp_reg = 0; 9275 else if (ca->arg_count[(int) SH_ARG_FLOAT] 9276 < NPARM_REGS (SFmode)) 9277 { 9278 int numfpregs 9279 = MIN ((GET_MODE_SIZE (mode2) + 7) / 8 * 2, 9280 NPARM_REGS (SFmode) 9281 - ca->arg_count[(int) SH_ARG_FLOAT]); 9282 9283 ca->arg_count[(int) SH_ARG_FLOAT] += numfpregs; 9284 9285 if (TARGET_SHCOMPACT && ! ca->prototype_p) 9286 { 9287 if (ca->outgoing && numregs > 0) 9288 do 9289 { 9290 ca->call_cookie 9291 |= (CALL_COOKIE_INT_REG 9292 (ca->arg_count[(int) SH_ARG_INT] 9293 - numregs + ((numfpregs - 2) / 2), 9294 4 + (ca->arg_count[(int) SH_ARG_FLOAT] 9295 - numfpregs) / 2)); 9296 } 9297 while (numfpregs -= 2); 9298 } 9299 else if (mode2 == SFmode && (named) 9300 && (ca->arg_count[(int) SH_ARG_FLOAT] 9301 < NPARM_REGS (SFmode))) 9302 ca->free_single_fp_reg 9303 = FIRST_FP_PARM_REG - numfpregs 9304 + ca->arg_count[(int) SH_ARG_FLOAT] + 1; 9305 } 9306 } 9307 return; 9308 } 9309 9310 if ((TARGET_HITACHI || ca->renesas_abi) && TARGET_FPU_DOUBLE) 9311 { 9312 /* Note that we've used the skipped register. */ 9313 if (mode == SFmode && ca->free_single_fp_reg) 9314 { 9315 ca->free_single_fp_reg = 0; 9316 return; 9317 } 9318 /* When we have a DF after an SF, there's an SF register that get 9319 skipped in order to align the DF value. We note this skipped 9320 register, because the next SF value will use it, and not the 9321 SF that follows the DF. */ 9322 if (mode == DFmode 9323 && sh_round_reg (*ca, DFmode) != sh_round_reg (*ca, SFmode)) 9324 { 9325 ca->free_single_fp_reg = (sh_round_reg (*ca, SFmode) 9326 + BASE_ARG_REG (mode)); 9327 } 9328 } 9329 9330 if (! ((TARGET_SH4 || TARGET_SH2A) || ca->renesas_abi) 9331 || sh_pass_in_reg_p (*ca, mode, type)) 9332 (ca->arg_count[(int) GET_SH_ARG_CLASS (mode)] 9333 = (sh_round_reg (*ca, mode) 9334 + (mode == BLKmode 9335 ? CEIL (int_size_in_bytes (type), UNITS_PER_WORD) 9336 : CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD)))); 9337} 9338 9339/* The Renesas calling convention doesn't quite fit into this scheme since 9340 the address is passed like an invisible argument, but one that is always 9341 passed in memory. */ 9342static rtx 9343sh_struct_value_rtx (tree fndecl, int incoming ATTRIBUTE_UNUSED) 9344{ 9345 if (TARGET_HITACHI || sh_attr_renesas_p (fndecl)) 9346 return NULL_RTX; 9347 return gen_rtx_REG (Pmode, 2); 9348} 9349 9350/* Worker function for TARGET_FUNCTION_VALUE. 9351 9352 For the SH, this is like LIBCALL_VALUE, except that we must change the 9353 mode like PROMOTE_MODE does. 9354 ??? PROMOTE_MODE is ignored for non-scalar types. The set of types 9355 tested here has to be kept in sync with the one in 9356 explow.c:promote_mode. */ 9357static rtx 9358sh_function_value (const_tree valtype, 9359 const_tree fn_decl_or_type, 9360 bool outgoing ATTRIBUTE_UNUSED) 9361{ 9362 if (fn_decl_or_type 9363 && !DECL_P (fn_decl_or_type)) 9364 fn_decl_or_type = NULL; 9365 9366 return gen_rtx_REG ( 9367 ((GET_MODE_CLASS (TYPE_MODE (valtype)) == MODE_INT 9368 && GET_MODE_SIZE (TYPE_MODE (valtype)) < 4 9369 && (TREE_CODE (valtype) == INTEGER_TYPE 9370 || TREE_CODE (valtype) == ENUMERAL_TYPE 9371 || TREE_CODE (valtype) == BOOLEAN_TYPE 9372 || TREE_CODE (valtype) == REAL_TYPE 9373 || TREE_CODE (valtype) == OFFSET_TYPE)) 9374 && sh_promote_prototypes (fn_decl_or_type) 9375 ? (TARGET_SHMEDIA64 ? DImode : SImode) : TYPE_MODE (valtype)), 9376 BASE_RETURN_VALUE_REG (TYPE_MODE (valtype))); 9377} 9378 9379/* Worker function for TARGET_LIBCALL_VALUE. */ 9380static rtx 9381sh_libcall_value (machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED) 9382{ 9383 return gen_rtx_REG (mode, BASE_RETURN_VALUE_REG (mode)); 9384} 9385 9386/* Return true if N is a possible register number of function value. */ 9387static bool 9388sh_function_value_regno_p (const unsigned int regno) 9389{ 9390 return ((regno) == FIRST_RET_REG 9391 || (TARGET_SH2E && (regno) == FIRST_FP_RET_REG) 9392 || (TARGET_SHMEDIA_FPU && (regno) == FIRST_FP_RET_REG)); 9393} 9394 9395/* Worker function for TARGET_RETURN_IN_MEMORY. */ 9396static bool 9397sh_return_in_memory (const_tree type, const_tree fndecl) 9398{ 9399 if (TARGET_SH5) 9400 { 9401 if (TYPE_MODE (type) == BLKmode) 9402 return ((unsigned HOST_WIDE_INT) int_size_in_bytes (type)) > 8; 9403 else 9404 return GET_MODE_SIZE (TYPE_MODE (type)) > 8; 9405 } 9406 else 9407 { 9408 return (TYPE_MODE (type) == BLKmode 9409 || ((TARGET_HITACHI || sh_attr_renesas_p (fndecl)) 9410 && TREE_CODE (type) == RECORD_TYPE)); 9411 } 9412} 9413 9414/* We actually emit the code in sh_expand_prologue. We used to use 9415 a static variable to flag that we need to emit this code, but that 9416 doesn't when inlining, when functions are deferred and then emitted 9417 later. Fortunately, we already have two flags that are part of struct 9418 function that tell if a function uses varargs or stdarg. */ 9419static void 9420sh_setup_incoming_varargs (cumulative_args_t ca, 9421 machine_mode mode, 9422 tree type, 9423 int *pretend_arg_size, 9424 int second_time ATTRIBUTE_UNUSED) 9425{ 9426 gcc_assert (cfun->stdarg); 9427 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl)) 9428 { 9429 int named_parm_regs, anon_parm_regs; 9430 9431 named_parm_regs = (sh_round_reg (*get_cumulative_args (ca), mode) 9432 + (mode == BLKmode 9433 ? CEIL (int_size_in_bytes (type), UNITS_PER_WORD) 9434 : CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD))); 9435 anon_parm_regs = NPARM_REGS (SImode) - named_parm_regs; 9436 if (anon_parm_regs > 0) 9437 *pretend_arg_size = anon_parm_regs * 4; 9438 } 9439} 9440 9441static bool 9442sh_strict_argument_naming (cumulative_args_t ca ATTRIBUTE_UNUSED) 9443{ 9444 return TARGET_SH5; 9445} 9446 9447static bool 9448sh_pretend_outgoing_varargs_named (cumulative_args_t ca_v) 9449{ 9450 CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v); 9451 9452 return ! (TARGET_HITACHI || ca->renesas_abi) && ! TARGET_SH5; 9453} 9454 9455 9456/* Define the offset between two registers, one to be eliminated, and 9457 the other its replacement, at the start of a routine. */ 9458int 9459initial_elimination_offset (int from, int to) 9460{ 9461 int regs_saved; 9462 int regs_saved_rounding = 0; 9463 int total_saved_regs_space; 9464 int total_auto_space; 9465 int save_flags = target_flags; 9466 int copy_flags; 9467 HARD_REG_SET live_regs_mask; 9468 9469 shmedia_space_reserved_for_target_registers = false; 9470 regs_saved = calc_live_regs (&live_regs_mask); 9471 regs_saved += SHMEDIA_REGS_STACK_ADJUST (); 9472 9473 if (shmedia_reserve_space_for_target_registers_p (regs_saved, &live_regs_mask)) 9474 { 9475 shmedia_space_reserved_for_target_registers = true; 9476 regs_saved += shmedia_target_regs_stack_adjust (&live_regs_mask); 9477 } 9478 9479 if (TARGET_SH5 && regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT)) 9480 regs_saved_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT) 9481 - regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT)); 9482 9483 total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding; 9484 copy_flags = target_flags; 9485 target_flags = save_flags; 9486 9487 total_saved_regs_space = regs_saved + regs_saved_rounding; 9488 9489 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM) 9490 return total_saved_regs_space + total_auto_space 9491 + crtl->args.info.byref_regs * 8; 9492 9493 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM) 9494 return total_saved_regs_space + total_auto_space 9495 + crtl->args.info.byref_regs * 8; 9496 9497 /* Initial gap between fp and sp is 0. */ 9498 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM) 9499 return 0; 9500 9501 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM) 9502 return rounded_frame_size (0); 9503 9504 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM) 9505 return rounded_frame_size (0); 9506 9507 gcc_assert (from == RETURN_ADDRESS_POINTER_REGNUM 9508 && (to == HARD_FRAME_POINTER_REGNUM 9509 || to == STACK_POINTER_REGNUM)); 9510 if (TARGET_SH5) 9511 { 9512 int n = total_saved_regs_space; 9513 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG; 9514 save_schedule schedule; 9515 save_entry *entry; 9516 9517 n += total_auto_space; 9518 9519 /* If it wasn't saved, there's not much we can do. */ 9520 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg)) 9521 return n; 9522 9523 target_flags = copy_flags; 9524 9525 sh5_schedule_saves (&live_regs_mask, &schedule, n); 9526 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++) 9527 if (entry->reg == pr_reg) 9528 { 9529 target_flags = save_flags; 9530 return entry->offset; 9531 } 9532 gcc_unreachable (); 9533 } 9534 else 9535 return total_auto_space; 9536} 9537 9538/* Parse the -mfixed-range= option string. */ 9539void 9540sh_fix_range (const char *const_str) 9541{ 9542 int i, first, last; 9543 char *str, *dash, *comma; 9544 9545 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and 9546 REG2 are either register names or register numbers. The effect 9547 of this option is to mark the registers in the range from REG1 to 9548 REG2 as ``fixed'' so they won't be used by the compiler. */ 9549 9550 i = strlen (const_str); 9551 str = (char *) alloca (i + 1); 9552 memcpy (str, const_str, i + 1); 9553 9554 while (1) 9555 { 9556 dash = strchr (str, '-'); 9557 if (!dash) 9558 { 9559 warning (0, "value of -mfixed-range must have form REG1-REG2"); 9560 return; 9561 } 9562 *dash = '\0'; 9563 comma = strchr (dash + 1, ','); 9564 if (comma) 9565 *comma = '\0'; 9566 9567 first = decode_reg_name (str); 9568 if (first < 0) 9569 { 9570 warning (0, "unknown register name: %s", str); 9571 return; 9572 } 9573 9574 last = decode_reg_name (dash + 1); 9575 if (last < 0) 9576 { 9577 warning (0, "unknown register name: %s", dash + 1); 9578 return; 9579 } 9580 9581 *dash = '-'; 9582 9583 if (first > last) 9584 { 9585 warning (0, "%s-%s is an empty range", str, dash + 1); 9586 return; 9587 } 9588 9589 for (i = first; i <= last; ++i) 9590 fixed_regs[i] = call_used_regs[i] = 1; 9591 9592 if (!comma) 9593 break; 9594 9595 *comma = ','; 9596 str = comma + 1; 9597 } 9598} 9599 9600/* Insert any deferred function attributes from earlier pragmas. */ 9601static void 9602sh_insert_attributes (tree node, tree *attributes) 9603{ 9604 tree attrs; 9605 9606 if (TREE_CODE (node) != FUNCTION_DECL) 9607 return; 9608 9609 /* We are only interested in fields. */ 9610 if (!DECL_P (node)) 9611 return; 9612 9613 /* Append the attributes to the deferred attributes. */ 9614 *sh_deferred_function_attributes_tail = *attributes; 9615 attrs = sh_deferred_function_attributes; 9616 if (!attrs) 9617 return; 9618 9619 /* Some attributes imply or require the interrupt attribute. */ 9620 if (!lookup_attribute ("interrupt_handler", attrs) 9621 && !lookup_attribute ("interrupt_handler", DECL_ATTRIBUTES (node))) 9622 { 9623 /* If we have a trapa_handler, but no interrupt_handler attribute, 9624 insert an interrupt_handler attribute. */ 9625 if (lookup_attribute ("trapa_handler", attrs) != NULL_TREE) 9626 /* We can't use sh_pr_interrupt here because that's not in the 9627 java frontend. */ 9628 attrs 9629 = tree_cons (get_identifier("interrupt_handler"), NULL_TREE, attrs); 9630 /* However, for sp_switch, trap_exit, nosave_low_regs and resbank, 9631 if the interrupt attribute is missing, we ignore the attribute 9632 and warn. */ 9633 else if (lookup_attribute ("sp_switch", attrs) 9634 || lookup_attribute ("trap_exit", attrs) 9635 || lookup_attribute ("nosave_low_regs", attrs) 9636 || lookup_attribute ("resbank", attrs)) 9637 { 9638 tree *tail; 9639 9640 for (tail = attributes; attrs; attrs = TREE_CHAIN (attrs)) 9641 { 9642 if (is_attribute_p ("sp_switch", TREE_PURPOSE (attrs)) 9643 || is_attribute_p ("trap_exit", TREE_PURPOSE (attrs)) 9644 || is_attribute_p ("nosave_low_regs", TREE_PURPOSE (attrs)) 9645 || is_attribute_p ("resbank", TREE_PURPOSE (attrs))) 9646 warning (OPT_Wattributes, 9647 "%qE attribute only applies to interrupt functions", 9648 TREE_PURPOSE (attrs)); 9649 else 9650 { 9651 *tail = tree_cons (TREE_PURPOSE (attrs), NULL_TREE, 9652 NULL_TREE); 9653 tail = &TREE_CHAIN (*tail); 9654 } 9655 } 9656 attrs = *attributes; 9657 } 9658 } 9659 9660 /* Install the processed list. */ 9661 *attributes = attrs; 9662 9663 /* Clear deferred attributes. */ 9664 sh_deferred_function_attributes = NULL_TREE; 9665 sh_deferred_function_attributes_tail = &sh_deferred_function_attributes; 9666 9667 return; 9668} 9669 9670/*------------------------------------------------------------------------------ 9671 Target specific attributes 9672 Supported attributes are: 9673 9674 * interrupt_handler 9675 Specifies this function is an interrupt handler. 9676 9677 * trapa_handler 9678 Like interrupt_handler, but don't save all registers. 9679 9680 * sp_switch 9681 Specifies an alternate stack for an interrupt handler to run on. 9682 9683 * trap_exit 9684 Use a trapa to exit an interrupt function instead of rte. 9685 9686 * nosave_low_regs 9687 Don't save r0..r7 in an interrupt handler function. 9688 This is useful on SH3* and SH4*, which have a separate set of low 9689 regs for user and privileged modes. 9690 This is mainly to be used for non-reentrant interrupt handlers (i.e. 9691 those that run with interrupts disabled and thus can't be 9692 interrupted thenselves). 9693 9694 * renesas 9695 Use Renesas calling/layout conventions (functions and structures). 9696 9697 * resbank 9698 In case of an interrupt handler function, use a register bank to 9699 save registers R0-R14, MACH, MACL, GBR and PR. 9700 This is available only on SH2A targets. 9701 9702 * function_vector 9703 Declares a function to be called using the TBR relative addressing 9704 mode. Takes an argument that specifies the slot number in the table 9705 where this function can be looked up by the JSR/N @@(disp8,TBR) insn. 9706*/ 9707 9708/* Handle a 'resbank' attribute. */ 9709static tree 9710sh_handle_resbank_handler_attribute (tree * node, tree name, 9711 tree args ATTRIBUTE_UNUSED, 9712 int flags ATTRIBUTE_UNUSED, 9713 bool * no_add_attrs) 9714{ 9715 if (!TARGET_SH2A) 9716 { 9717 warning (OPT_Wattributes, "%qE attribute is supported only for SH2A", 9718 name); 9719 *no_add_attrs = true; 9720 } 9721 if (TREE_CODE (*node) != FUNCTION_DECL) 9722 { 9723 warning (OPT_Wattributes, "%qE attribute only applies to functions", 9724 name); 9725 *no_add_attrs = true; 9726 } 9727 9728 return NULL_TREE; 9729} 9730 9731/* Handle an "interrupt_handler" attribute; arguments as in 9732 struct attribute_spec.handler. */ 9733static tree 9734sh_handle_interrupt_handler_attribute (tree *node, tree name, 9735 tree args ATTRIBUTE_UNUSED, 9736 int flags ATTRIBUTE_UNUSED, 9737 bool *no_add_attrs) 9738{ 9739 if (TREE_CODE (*node) != FUNCTION_DECL) 9740 { 9741 warning (OPT_Wattributes, "%qE attribute only applies to functions", 9742 name); 9743 *no_add_attrs = true; 9744 } 9745 else if (TARGET_SHCOMPACT) 9746 { 9747 error ("attribute interrupt_handler is not compatible with -m5-compact"); 9748 *no_add_attrs = true; 9749 } 9750 9751 return NULL_TREE; 9752} 9753 9754/* Handle an 'function_vector' attribute; arguments as in 9755 struct attribute_spec.handler. */ 9756static tree 9757sh2a_handle_function_vector_handler_attribute (tree * node, tree name, 9758 tree args ATTRIBUTE_UNUSED, 9759 int flags ATTRIBUTE_UNUSED, 9760 bool * no_add_attrs) 9761{ 9762 if (!TARGET_SH2A) 9763 { 9764 warning (OPT_Wattributes, "%qE attribute only applies to SH2A", 9765 name); 9766 *no_add_attrs = true; 9767 } 9768 else if (TREE_CODE (*node) != FUNCTION_DECL) 9769 { 9770 warning (OPT_Wattributes, "%qE attribute only applies to functions", 9771 name); 9772 *no_add_attrs = true; 9773 } 9774 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST) 9775 { 9776 /* The argument must be a constant integer. */ 9777 warning (OPT_Wattributes, 9778 "%qE attribute argument not an integer constant", 9779 name); 9780 *no_add_attrs = true; 9781 } 9782 else if (TREE_INT_CST_LOW (TREE_VALUE (args)) > 255) 9783 { 9784 /* The argument value must be between 0 to 255. */ 9785 warning (OPT_Wattributes, 9786 "%qE attribute argument should be between 0 to 255", 9787 name); 9788 *no_add_attrs = true; 9789 } 9790 return NULL_TREE; 9791} 9792 9793/* Returns true if current function has been assigned the attribute 9794 'function_vector'. */ 9795bool 9796sh2a_is_function_vector_call (rtx x) 9797{ 9798 if (GET_CODE (x) == SYMBOL_REF 9799 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION)) 9800 { 9801 tree tr = SYMBOL_REF_DECL (x); 9802 9803 if (sh2a_function_vector_p (tr)) 9804 return true; 9805 } 9806 9807 return false; 9808} 9809 9810/* Returns the function vector number, if the attribute 9811 'function_vector' is assigned, otherwise returns zero. */ 9812int 9813sh2a_get_function_vector_number (rtx x) 9814{ 9815 int num; 9816 tree list, t; 9817 9818 if ((GET_CODE (x) == SYMBOL_REF) 9819 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION)) 9820 { 9821 t = SYMBOL_REF_DECL (x); 9822 9823 if (TREE_CODE (t) != FUNCTION_DECL) 9824 return 0; 9825 9826 list = SH_ATTRIBUTES (t); 9827 while (list) 9828 { 9829 if (is_attribute_p ("function_vector", TREE_PURPOSE (list))) 9830 { 9831 num = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (list))); 9832 return num; 9833 } 9834 9835 list = TREE_CHAIN (list); 9836 } 9837 9838 return 0; 9839 } 9840 else 9841 return 0; 9842} 9843 9844/* Handle an "sp_switch" attribute; arguments as in 9845 struct attribute_spec.handler. */ 9846static tree 9847sh_handle_sp_switch_attribute (tree *node, tree name, tree args, 9848 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs) 9849{ 9850 if (TREE_CODE (*node) != FUNCTION_DECL) 9851 { 9852 warning (OPT_Wattributes, "%qE attribute only applies to functions", 9853 name); 9854 *no_add_attrs = true; 9855 } 9856 else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST) 9857 { 9858 /* The argument must be a constant string. */ 9859 warning (OPT_Wattributes, "%qE attribute argument not a string constant", 9860 name); 9861 *no_add_attrs = true; 9862 } 9863 9864 return NULL_TREE; 9865} 9866 9867/* Handle an "trap_exit" attribute; arguments as in 9868 struct attribute_spec.handler. */ 9869static tree 9870sh_handle_trap_exit_attribute (tree *node, tree name, tree args, 9871 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs) 9872{ 9873 if (TREE_CODE (*node) != FUNCTION_DECL) 9874 { 9875 warning (OPT_Wattributes, "%qE attribute only applies to functions", 9876 name); 9877 *no_add_attrs = true; 9878 } 9879 /* The argument specifies a trap number to be used in a trapa instruction 9880 at function exit (instead of an rte instruction). */ 9881 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST) 9882 { 9883 /* The argument must be a constant integer. */ 9884 warning (OPT_Wattributes, "%qE attribute argument not an " 9885 "integer constant", name); 9886 *no_add_attrs = true; 9887 } 9888 9889 return NULL_TREE; 9890} 9891 9892static tree 9893sh_handle_renesas_attribute (tree *node ATTRIBUTE_UNUSED, 9894 tree name ATTRIBUTE_UNUSED, 9895 tree args ATTRIBUTE_UNUSED, 9896 int flags ATTRIBUTE_UNUSED, 9897 bool *no_add_attrs ATTRIBUTE_UNUSED) 9898{ 9899 return NULL_TREE; 9900} 9901 9902/* True if __attribute__((renesas)) or -mrenesas. */ 9903bool 9904sh_attr_renesas_p (const_tree td) 9905{ 9906 if (TARGET_HITACHI) 9907 return true; 9908 if (td == NULL_TREE) 9909 return false; 9910 if (DECL_P (td)) 9911 td = TREE_TYPE (td); 9912 if (td == error_mark_node) 9913 return false; 9914 return (lookup_attribute ("renesas", TYPE_ATTRIBUTES (td)) 9915 != NULL_TREE); 9916} 9917 9918/* True if __attribute__((renesas)) or -mrenesas, for the current 9919 function. */ 9920bool 9921sh_cfun_attr_renesas_p (void) 9922{ 9923 return sh_attr_renesas_p (current_function_decl); 9924} 9925 9926/* Returns true if the current function has the "interrupt_handler" 9927 attribute set. */ 9928bool 9929sh_cfun_interrupt_handler_p (void) 9930{ 9931 return (lookup_attribute ("interrupt_handler", 9932 DECL_ATTRIBUTES (current_function_decl)) 9933 != NULL_TREE); 9934} 9935 9936/* Returns true if FUNC has been assigned the attribute 9937 "function_vector". */ 9938bool 9939sh2a_function_vector_p (tree func) 9940{ 9941 tree list; 9942 if (TREE_CODE (func) != FUNCTION_DECL) 9943 return false; 9944 9945 list = SH_ATTRIBUTES (func); 9946 while (list) 9947 { 9948 if (is_attribute_p ("function_vector", TREE_PURPOSE (list))) 9949 return true; 9950 9951 list = TREE_CHAIN (list); 9952 } 9953 return false; 9954} 9955 9956/* Returns true if given tree has the "resbank" attribute set. */ 9957bool 9958sh_cfun_resbank_handler_p (void) 9959{ 9960 return ((lookup_attribute ("resbank", 9961 DECL_ATTRIBUTES (current_function_decl)) 9962 != NULL_TREE) 9963 && (lookup_attribute ("interrupt_handler", 9964 DECL_ATTRIBUTES (current_function_decl)) 9965 != NULL_TREE) && TARGET_SH2A); 9966} 9967 9968/* Returns true if the current function has a "trap_exit" attribute set. */ 9969bool 9970sh_cfun_trap_exit_p (void) 9971{ 9972 return lookup_attribute ("trap_exit", DECL_ATTRIBUTES (current_function_decl)) 9973 != NULL_TREE; 9974} 9975 9976/* Implement TARGET_CHECK_PCH_TARGET_FLAGS. */ 9977static const char * 9978sh_check_pch_target_flags (int old_flags) 9979{ 9980 if ((old_flags ^ target_flags) & (MASK_SH1 | MASK_SH2 | MASK_SH3 9981 | MASK_SH_E | MASK_HARD_SH4 9982 | MASK_FPU_SINGLE | MASK_SH4)) 9983 return _("created and used with different architectures / ABIs"); 9984 if ((old_flags ^ target_flags) & MASK_HITACHI) 9985 return _("created and used with different ABIs"); 9986 if ((old_flags ^ target_flags) & MASK_LITTLE_ENDIAN) 9987 return _("created and used with different endianness"); 9988 return NULL; 9989} 9990 9991/* Predicates used by the templates. */ 9992 9993/* Returns true if OP is MACL, MACH or PR. The input must be a REG rtx. 9994 Used only in general_movsrc_operand. */ 9995bool 9996system_reg_operand (rtx op, machine_mode mode ATTRIBUTE_UNUSED) 9997{ 9998 switch (REGNO (op)) 9999 { 10000 case PR_REG: 10001 case MACL_REG: 10002 case MACH_REG: 10003 return true; 10004 } 10005 return false; 10006} 10007 10008/* Returns true if OP is a floating point value with value 0.0. */ 10009bool 10010fp_zero_operand (rtx op) 10011{ 10012 REAL_VALUE_TYPE r; 10013 10014 if (GET_MODE (op) != SFmode) 10015 return false; 10016 10017 REAL_VALUE_FROM_CONST_DOUBLE (r, op); 10018 return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r); 10019} 10020 10021/* Returns true if OP is a floating point value with value 1.0. */ 10022bool 10023fp_one_operand (rtx op) 10024{ 10025 REAL_VALUE_TYPE r; 10026 10027 if (GET_MODE (op) != SFmode) 10028 return false; 10029 10030 REAL_VALUE_FROM_CONST_DOUBLE (r, op); 10031 return REAL_VALUES_EQUAL (r, dconst1); 10032} 10033 10034/* Return the TLS type for TLS symbols. */ 10035enum tls_model 10036tls_symbolic_operand (rtx op, machine_mode mode ATTRIBUTE_UNUSED) 10037{ 10038 if (GET_CODE (op) != SYMBOL_REF) 10039 return TLS_MODEL_NONE; 10040 return SYMBOL_REF_TLS_MODEL (op); 10041} 10042 10043/* Return the destination address of a branch. */ 10044static int 10045branch_dest (rtx branch) 10046{ 10047 rtx dest = SET_SRC (PATTERN (branch)); 10048 int dest_uid; 10049 10050 if (GET_CODE (dest) == IF_THEN_ELSE) 10051 dest = XEXP (dest, 1); 10052 dest = XEXP (dest, 0); 10053 dest_uid = INSN_UID (dest); 10054 return INSN_ADDRESSES (dest_uid); 10055} 10056 10057/* Return nonzero if REG is not used after INSN. 10058 We assume REG is a reload reg, and therefore does 10059 not live past labels. It may live past calls or jumps though. */ 10060bool 10061reg_unused_after (rtx reg, rtx_insn *insn) 10062{ 10063 enum rtx_code code; 10064 rtx set; 10065 10066 /* If the reg is set by this instruction, then it is safe for our 10067 case. Disregard the case where this is a store to memory, since 10068 we are checking a register used in the store address. */ 10069 set = single_set (insn); 10070 if (set && !MEM_P (SET_DEST (set)) 10071 && reg_overlap_mentioned_p (reg, SET_DEST (set))) 10072 return true; 10073 10074 while ((insn = NEXT_INSN (insn))) 10075 { 10076 rtx set; 10077 if (!INSN_P (insn)) 10078 continue; 10079 10080 code = GET_CODE (insn); 10081 10082#if 0 10083 /* If this is a label that existed before reload, then the register 10084 is dead here. However, if this is a label added by reorg, then 10085 the register may still be live here. We can't tell the difference, 10086 so we just ignore labels completely. */ 10087 if (code == CODE_LABEL) 10088 return 1; 10089 /* else */ 10090#endif 10091 10092 if (code == JUMP_INSN) 10093 return false; 10094 10095 /* If this is a sequence, we must handle them all at once. 10096 We could have for instance a call that sets the target register, 10097 and an insn in a delay slot that uses the register. In this case, 10098 we must return 0. */ 10099 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE) 10100 { 10101 rtx_sequence *seq = as_a <rtx_sequence *> (PATTERN (insn)); 10102 int i; 10103 int retval = 0; 10104 10105 for (i = 0; i < seq->len (); i++) 10106 { 10107 rtx_insn *this_insn = seq->insn (i); 10108 rtx set = single_set (this_insn); 10109 10110 if (CALL_P (this_insn)) 10111 code = CALL_INSN; 10112 else if (JUMP_P (this_insn)) 10113 { 10114 if (INSN_ANNULLED_BRANCH_P (this_insn)) 10115 return false; 10116 code = JUMP_INSN; 10117 } 10118 10119 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set))) 10120 return false; 10121 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set))) 10122 { 10123 if (!MEM_P (SET_DEST (set))) 10124 retval = true; 10125 else 10126 return false; 10127 } 10128 if (set == NULL_RTX 10129 && reg_overlap_mentioned_p (reg, PATTERN (this_insn))) 10130 return false; 10131 } 10132 if (retval == 1) 10133 return true; 10134 else if (code == JUMP_INSN) 10135 return false; 10136 } 10137 10138 set = single_set (insn); 10139 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set))) 10140 return false; 10141 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set))) 10142 return !MEM_P (SET_DEST (set)); 10143 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn))) 10144 return false; 10145 10146 if (code == CALL_INSN && call_really_used_regs[REGNO (reg)]) 10147 return true; 10148 } 10149 return true; 10150} 10151 10152 10153static GTY(()) rtx t_reg_rtx; 10154rtx 10155get_t_reg_rtx (void) 10156{ 10157 if (! t_reg_rtx) 10158 t_reg_rtx = gen_rtx_REG (SImode, T_REG); 10159 return t_reg_rtx; 10160} 10161 10162static GTY(()) tree fpscr_values; 10163 10164static void 10165emit_fpu_switch (rtx scratch, int index) 10166{ 10167 rtx src; 10168 10169 if (fpscr_values == NULL) 10170 { 10171 tree t; 10172 10173 t = build_index_type (integer_one_node); 10174 t = build_array_type (integer_type_node, t); 10175 t = build_decl (BUILTINS_LOCATION, 10176 VAR_DECL, get_identifier ("__fpscr_values"), t); 10177 DECL_ARTIFICIAL (t) = 1; 10178 DECL_IGNORED_P (t) = 1; 10179 DECL_EXTERNAL (t) = 1; 10180 TREE_STATIC (t) = 1; 10181 TREE_PUBLIC (t) = 1; 10182 TREE_USED (t) = 1; 10183 10184 fpscr_values = t; 10185 } 10186 10187 src = DECL_RTL (fpscr_values); 10188 if (!can_create_pseudo_p ()) 10189 { 10190 emit_move_insn (scratch, XEXP (src, 0)); 10191 if (index != 0) 10192 emit_insn (gen_addsi3 (scratch, scratch, GEN_INT (index * 4))); 10193 src = adjust_automodify_address (src, SImode, scratch, index * 4); 10194 } 10195 else 10196 src = adjust_address (src, SImode, index * 4); 10197 10198 emit_insn (gen_lds_fpscr (src)); 10199} 10200 10201static rtx get_free_reg (HARD_REG_SET); 10202 10203/* This function returns a register to use to load the address to load 10204 the fpscr from. Currently it always returns r1 or r7, but when we are 10205 able to use pseudo registers after combine, or have a better mechanism 10206 for choosing a register, it should be done here. */ 10207/* REGS_LIVE is the liveness information for the point for which we 10208 need this allocation. In some bare-bones exit blocks, r1 is live at the 10209 start. We can even have all of r0..r3 being live: 10210__complex__ long long f (double d) { if (d == 0) return 2; else return 3; } 10211 INSN before which new insns are placed with will clobber the register 10212 we return. If a basic block consists only of setting the return value 10213 register to a pseudo and using that register, the return value is not 10214 live before or after this block, yet we we'll insert our insns right in 10215 the middle. */ 10216static rtx 10217get_free_reg (HARD_REG_SET regs_live) 10218{ 10219 if (! TEST_HARD_REG_BIT (regs_live, 1)) 10220 return gen_rtx_REG (Pmode, 1); 10221 10222 /* Hard reg 1 is live; since this is a small register classes target, 10223 there shouldn't be anything but a jump before the function end. */ 10224 gcc_assert (!TEST_HARD_REG_BIT (regs_live, 7)); 10225 return gen_rtx_REG (Pmode, 7); 10226} 10227 10228/* This function will set the fpscr from memory. 10229 MODE is the mode we are setting it to. */ 10230void 10231fpscr_set_from_mem (int mode, HARD_REG_SET regs_live) 10232{ 10233 enum attr_fp_mode fp_mode = (enum attr_fp_mode) mode; 10234 enum attr_fp_mode norm_mode = ACTUAL_NORMAL_MODE (FP_MODE); 10235 rtx addr_reg; 10236 10237 addr_reg = !can_create_pseudo_p () ? get_free_reg (regs_live) : NULL_RTX; 10238 emit_fpu_switch (addr_reg, fp_mode == norm_mode); 10239} 10240 10241/* Is the given character a logical line separator for the assembler? */ 10242#ifndef IS_ASM_LOGICAL_LINE_SEPARATOR 10243#define IS_ASM_LOGICAL_LINE_SEPARATOR(C, STR) ((C) == ';') 10244#endif 10245 10246static bool 10247sequence_insn_p (rtx_insn *insn) 10248{ 10249 rtx_insn *prev, *next; 10250 10251 prev = PREV_INSN (insn); 10252 if (prev == NULL) 10253 return false; 10254 10255 next = NEXT_INSN (prev); 10256 if (next == NULL) 10257 return false; 10258 10259 return INSN_P (next) && GET_CODE (PATTERN (next)) == SEQUENCE; 10260} 10261 10262int 10263sh_insn_length_adjustment (rtx_insn *insn) 10264{ 10265 /* Instructions with unfilled delay slots take up an extra two bytes for 10266 the nop in the delay slot. */ 10267 if (((NONJUMP_INSN_P (insn) 10268 && GET_CODE (PATTERN (insn)) != USE 10269 && GET_CODE (PATTERN (insn)) != CLOBBER) 10270 || CALL_P (insn) || JUMP_P (insn)) 10271 && ! sequence_insn_p (insn) 10272 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES) 10273 return 2; 10274 10275 /* Increase the insn length of a cbranch without a delay slot insn to 10276 force a delay slot which will be stuffed with a nop. */ 10277 if (TARGET_CBRANCH_FORCE_DELAY_SLOT && TARGET_SH2 10278 && JUMP_P (insn) && get_attr_type (insn) == TYPE_CBRANCH 10279 && ! sequence_insn_p (insn)) 10280 return 2; 10281 10282 /* sh-dsp parallel processing insn take four bytes instead of two. */ 10283 10284 if (NONJUMP_INSN_P (insn)) 10285 { 10286 int sum = 0; 10287 rtx body = PATTERN (insn); 10288 const char *templ; 10289 char c; 10290 bool maybe_label = true; 10291 10292 if (GET_CODE (body) == ASM_INPUT) 10293 templ = XSTR (body, 0); 10294 else if (asm_noperands (body) >= 0) 10295 templ 10296 = decode_asm_operands (body, NULL, NULL, NULL, NULL, NULL); 10297 else 10298 return 0; 10299 do 10300 { 10301 int ppi_adjust = 0; 10302 10303 do 10304 c = *templ++; 10305 while (c == ' ' || c == '\t'); 10306 /* all sh-dsp parallel-processing insns start with p. 10307 The only non-ppi sh insn starting with p is pref. 10308 The only ppi starting with pr is prnd. */ 10309 if ((c == 'p' || c == 'P') && strncasecmp ("re", templ, 2)) 10310 ppi_adjust = 2; 10311 /* The repeat pseudo-insn expands two three insns, a total of 10312 six bytes in size. */ 10313 else if ((c == 'r' || c == 'R') 10314 && ! strncasecmp ("epeat", templ, 5)) 10315 ppi_adjust = 4; 10316 while (c && c != '\n' 10317 && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c, templ)) 10318 { 10319 /* If this is a label, it is obviously not a ppi insn. */ 10320 if (c == ':' && maybe_label) 10321 { 10322 ppi_adjust = 0; 10323 break; 10324 } 10325 else if (c == '\'' || c == '"') 10326 maybe_label = false; 10327 c = *templ++; 10328 } 10329 sum += ppi_adjust; 10330 maybe_label = c != ':'; 10331 } 10332 while (c); 10333 return sum; 10334 } 10335 return 0; 10336} 10337 10338/* Return TRUE for a valid displacement for the REG+disp addressing 10339 with MODE. */ 10340bool 10341sh_legitimate_index_p (machine_mode mode, rtx op, bool consider_sh2a, 10342 bool allow_zero) 10343{ 10344 if (! CONST_INT_P (op)) 10345 return false; 10346 10347 if (TARGET_SHMEDIA) 10348 { 10349 int size; 10350 10351 /* Check if this is the address of an unaligned load / store. */ 10352 if (mode == VOIDmode) 10353 return satisfies_constraint_I06 (op); 10354 10355 size = GET_MODE_SIZE (mode); 10356 return (!(INTVAL (op) & (size - 1)) 10357 && INTVAL (op) >= -512 * size 10358 && INTVAL (op) < 512 * size); 10359 } 10360 else 10361 { 10362 const HOST_WIDE_INT offset = INTVAL (op); 10363 const int max_disp = sh_max_mov_insn_displacement (mode, consider_sh2a); 10364 const int align_mask = mov_insn_alignment_mask (mode, consider_sh2a); 10365 10366 /* If the mode does not support any displacement always return false. 10367 Even though an index of '0' is actually always valid, it will cause 10368 troubles when e.g. a DFmode move is split into two SFmode moves, 10369 where one SFmode move will have index '0' and the other move will 10370 have index '4'. */ 10371 if (!allow_zero && max_disp < 1) 10372 return false; 10373 10374 return offset >= 0 && offset <= max_disp && (offset & align_mask) == 0; 10375 } 10376} 10377 10378/* Recognize an RTL expression that is a valid memory address for 10379 an instruction. 10380 The MODE argument is the machine mode for the MEM expression 10381 that wants to use this address. 10382 Allow REG 10383 REG+disp 10384 REG+r0 10385 REG++ 10386 --REG 10387 GBR 10388 GBR+disp */ 10389static bool 10390sh_legitimate_address_p (machine_mode mode, rtx x, bool strict) 10391{ 10392 if (! ALLOW_INDEXED_ADDRESS 10393 && GET_CODE (x) == PLUS && REG_P (XEXP (x, 0)) && REG_P (XEXP (x, 1))) 10394 return false; 10395 10396 if (REG_P (x) && REGNO (x) == GBR_REG) 10397 return true; 10398 10399 if (MAYBE_BASE_REGISTER_RTX_P (x, strict)) 10400 return true; 10401 else if ((GET_CODE (x) == POST_INC || GET_CODE (x) == PRE_DEC) 10402 && ! TARGET_SHMEDIA 10403 && MAYBE_BASE_REGISTER_RTX_P (XEXP (x, 0), strict)) 10404 return true; 10405 else if (GET_CODE (x) == PLUS) 10406 { 10407 rtx xop0 = XEXP (x, 0); 10408 rtx xop1 = XEXP (x, 1); 10409 10410 if (REG_P (xop0) && REGNO (xop0) == GBR_REG) 10411 return gbr_displacement (xop1, mode); 10412 10413 if (GET_MODE_SIZE (mode) <= 8 10414 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict) 10415 && sh_legitimate_index_p (mode, xop1, TARGET_SH2A, false)) 10416 return true; 10417 10418 if ((ALLOW_INDEXED_ADDRESS || GET_MODE (x) == DImode 10419 || ((xop0 == stack_pointer_rtx 10420 || xop0 == hard_frame_pointer_rtx) 10421 && REG_P (xop1) && REGNO (xop1) == R0_REG) 10422 || ((xop1 == stack_pointer_rtx 10423 || xop1 == hard_frame_pointer_rtx) 10424 && REG_P (xop0) && REGNO (xop0) == R0_REG)) 10425 && ((!TARGET_SHMEDIA && GET_MODE_SIZE (mode) <= 4) 10426 || (TARGET_SHMEDIA && GET_MODE_SIZE (mode) <= 8) 10427 || ((TARGET_SH4 || TARGET_SH2A_DOUBLE) 10428 && TARGET_FMOVD && mode == DFmode))) 10429 { 10430 if (MAYBE_BASE_REGISTER_RTX_P (xop1, strict) 10431 && MAYBE_INDEX_REGISTER_RTX_P (xop0, strict)) 10432 return true; 10433 if (MAYBE_INDEX_REGISTER_RTX_P (xop1, strict) 10434 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict)) 10435 return true; 10436 } 10437 } 10438 10439 return false; 10440} 10441 10442/* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol 10443 isn't protected by a PIC unspec. */ 10444bool 10445nonpic_symbol_mentioned_p (rtx x) 10446{ 10447 const char *fmt; 10448 int i; 10449 10450 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF 10451 || GET_CODE (x) == PC) 10452 return true; 10453 10454 /* We don't want to look into the possible MEM location of a 10455 CONST_DOUBLE, since we're not going to use it, in general. */ 10456 if (GET_CODE (x) == CONST_DOUBLE) 10457 return false; 10458 10459 if (GET_CODE (x) == UNSPEC 10460 && (XINT (x, 1) == UNSPEC_PIC 10461 || XINT (x, 1) == UNSPEC_GOT 10462 || XINT (x, 1) == UNSPEC_GOTOFF 10463 || XINT (x, 1) == UNSPEC_GOTPLT 10464 || XINT (x, 1) == UNSPEC_GOTTPOFF 10465 || XINT (x, 1) == UNSPEC_DTPOFF 10466 || XINT (x, 1) == UNSPEC_TPOFF 10467 || XINT (x, 1) == UNSPEC_PLT 10468 || XINT (x, 1) == UNSPEC_SYMOFF 10469 || XINT (x, 1) == UNSPEC_PCREL_SYMOFF)) 10470 return false; 10471 10472 fmt = GET_RTX_FORMAT (GET_CODE (x)); 10473 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--) 10474 { 10475 if (fmt[i] == 'E') 10476 { 10477 int j; 10478 for (j = XVECLEN (x, i) - 1; j >= 0; j--) 10479 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j))) 10480 return true; 10481 } 10482 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i))) 10483 return true; 10484 } 10485 10486 return false; 10487} 10488 10489/* Convert a non-PIC address in `orig' to a PIC address using @GOT or 10490 @GOTOFF in `reg'. */ 10491rtx 10492legitimize_pic_address (rtx orig, machine_mode mode ATTRIBUTE_UNUSED, 10493 rtx reg) 10494{ 10495 if (tls_symbolic_operand (orig, Pmode) != TLS_MODEL_NONE) 10496 return orig; 10497 10498 if (GET_CODE (orig) == LABEL_REF 10499 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig))) 10500 { 10501 if (reg == NULL_RTX) 10502 reg = gen_reg_rtx (Pmode); 10503 10504 emit_insn (gen_symGOTOFF2reg (reg, orig)); 10505 return reg; 10506 } 10507 else if (GET_CODE (orig) == SYMBOL_REF) 10508 { 10509 if (reg == NULL_RTX) 10510 reg = gen_reg_rtx (Pmode); 10511 10512 emit_insn (gen_symGOT2reg (reg, orig)); 10513 return reg; 10514 } 10515 return orig; 10516} 10517 10518/* Given a (logical) mode size and an offset in bytes, try to find a the 10519 appropriate displacement value for a mov insn. On SH the displacements 10520 are limited to max. 60 bytes for SImode, max. 30 bytes in HImode and max. 10521 15 bytes in QImode. To compensate this we create a new base address by 10522 adding an adjustment value to it. 10523 10524 If the originally requested offset is greater than 127 we prefer using 10525 values 124..127 over 128..131 to increase opportunities to use the 10526 add #imm, Rn insn. 10527 10528 In some cases it is possible that a requested offset might seem unaligned 10529 or inappropriate for the mode size, like offset = 2 and mode size = 4. 10530 This is compensated by adjusting the base address so that the effective 10531 address of the displacement move insn will be aligned. 10532 10533 This is not the best possible way of rebasing the base address, as it 10534 does not look at other present displacement addressings around it. 10535 In some cases this can create more base address adjustments than would 10536 actually be necessary. */ 10537struct disp_adjust 10538{ 10539 rtx offset_adjust; 10540 rtx mov_disp; 10541}; 10542 10543static struct disp_adjust 10544sh_find_mov_disp_adjust (machine_mode mode, HOST_WIDE_INT offset) 10545{ 10546 struct disp_adjust res = { NULL_RTX, NULL_RTX }; 10547 10548 /* Do not try to use SH2A's large displacements here, because this would 10549 effectively disable the small displacement insns. */ 10550 const int mode_sz = GET_MODE_SIZE (mode); 10551 const int mov_insn_sz = mov_insn_size (mode, false); 10552 const int max_disp = sh_max_mov_insn_displacement (mode, false); 10553 const int max_disp_next = max_disp + mov_insn_sz; 10554 HOST_WIDE_INT align_modifier = offset > 127 ? mov_insn_sz : 0; 10555 HOST_WIDE_INT offset_adjust; 10556 10557 /* In some cases this actually does happen and we must check for it. */ 10558 if (mode_sz < 1 || mode_sz > 8 || max_disp < 1) 10559 return res; 10560 10561 /* Keeps the previous behavior for QImode displacement addressing. 10562 This just decides how the offset is re-based. Removing this special 10563 case will result in slightly bigger code on average, but it's not that 10564 bad actually. */ 10565 if (mov_insn_sz == 1) 10566 align_modifier = 0; 10567 10568 offset_adjust = ((offset + align_modifier) & ~max_disp) - align_modifier; 10569 10570 if (mode_sz + offset - offset_adjust <= max_disp_next) 10571 { 10572 res.offset_adjust = GEN_INT (offset_adjust); 10573 res.mov_disp = GEN_INT (offset - offset_adjust); 10574 } 10575 10576 return res; 10577} 10578 10579/* Try to modify an illegitimate address and make it legitimate. 10580 If we find one, return the new, valid address. 10581 Otherwise, return the original address. */ 10582static rtx 10583sh_legitimize_address (rtx x, rtx oldx, machine_mode mode) 10584{ 10585 if (flag_pic) 10586 x = legitimize_pic_address (oldx, mode, NULL_RTX); 10587 10588 if (TARGET_SHMEDIA) 10589 return x; 10590 10591 if (((TARGET_SH4 || TARGET_SH2A_DOUBLE) && mode == DFmode) 10592 || (TARGET_SH2E && mode == SFmode)) 10593 return x; 10594 10595 if (GET_CODE (x) == PLUS && CONST_INT_P (XEXP (x, 1)) 10596 && BASE_REGISTER_RTX_P (XEXP (x, 0))) 10597 { 10598 struct disp_adjust adj = sh_find_mov_disp_adjust (mode, 10599 INTVAL (XEXP (x, 1))); 10600 10601 if (adj.offset_adjust != NULL_RTX && adj.mov_disp != NULL_RTX) 10602 { 10603 rtx sum = expand_binop (Pmode, add_optab, XEXP (x, 0), 10604 adj.offset_adjust, NULL_RTX, 0, 10605 OPTAB_LIB_WIDEN); 10606 return gen_rtx_PLUS (Pmode, sum, adj.mov_disp); 10607 } 10608 } 10609 return x; 10610} 10611 10612/* Attempt to replace *p, which is an address that needs reloading, with 10613 a valid memory address for an operand of mode MODE. 10614 Like for sh_legitimize_address, for the SH we try to get a normal form 10615 of the address. That will allow inheritance of the address reloads. */ 10616bool 10617sh_legitimize_reload_address (rtx *p, machine_mode mode, int opnum, 10618 int itype) 10619{ 10620 enum reload_type type = (enum reload_type) itype; 10621 const int mode_sz = GET_MODE_SIZE (mode); 10622 10623 if (sh_lra_p ()) 10624 return false; 10625 10626 if (! ALLOW_INDEXED_ADDRESS 10627 && GET_CODE (*p) == PLUS 10628 && REG_P (XEXP (*p, 0)) && REG_P (XEXP (*p, 1))) 10629 { 10630 *p = copy_rtx (*p); 10631 push_reload (*p, NULL_RTX, p, NULL, 10632 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type); 10633 return true; 10634 } 10635 10636 if (! ALLOW_INDEXED_ADDRESS 10637 && GET_CODE (*p) == PLUS 10638 && GET_CODE (XEXP (*p, 0)) == PLUS) 10639 { 10640 rtx sum = gen_rtx_PLUS (Pmode, XEXP (XEXP (*p, 0), 0), 10641 XEXP (XEXP (*p, 0), 1)); 10642 *p = gen_rtx_PLUS (Pmode, sum, XEXP (*p, 1)); 10643 push_reload (sum, NULL_RTX, &XEXP (*p, 0), NULL, 10644 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type); 10645 return true; 10646 } 10647 10648 if (TARGET_SHMEDIA) 10649 return false; 10650 10651 if (GET_CODE (*p) == PLUS && CONST_INT_P (XEXP (*p, 1)) 10652 && MAYBE_BASE_REGISTER_RTX_P (XEXP (*p, 0), true) 10653 && (ALLOW_INDEXED_ADDRESS 10654 || XEXP (*p, 0) == stack_pointer_rtx 10655 || XEXP (*p, 0) == hard_frame_pointer_rtx)) 10656 { 10657 const HOST_WIDE_INT offset = INTVAL (XEXP (*p, 1)); 10658 struct disp_adjust adj = sh_find_mov_disp_adjust (mode, offset); 10659 10660 if (TARGET_SH2A && mode == DFmode && (offset & 0x7)) 10661 { 10662 push_reload (*p, NULL_RTX, p, NULL, 10663 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type); 10664 return true; 10665 } 10666 10667 if (TARGET_SH2E && mode == SFmode) 10668 { 10669 *p = copy_rtx (*p); 10670 push_reload (*p, NULL_RTX, p, NULL, 10671 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type); 10672 return true; 10673 } 10674 10675 /* FIXME: Do not allow to legitimize QImode and HImode displacement 10676 moves because then reload has a problem figuring the constraint 10677 that the move insn target/source reg must be R0. 10678 Or maybe some handling is wrong in sh_secondary_reload for this 10679 to work properly? */ 10680 if ((mode_sz == 4 || mode_sz == 8) 10681 && ! (TARGET_SH4 && mode == DFmode) 10682 && adj.offset_adjust != NULL_RTX && adj.mov_disp != NULL_RTX) 10683 { 10684 rtx sum = gen_rtx_PLUS (Pmode, XEXP (*p, 0), adj.offset_adjust); 10685 *p = gen_rtx_PLUS (Pmode, sum, adj.mov_disp); 10686 push_reload (sum, NULL_RTX, &XEXP (*p, 0), NULL, 10687 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type); 10688 return true; 10689 } 10690 } 10691 10692 /* We must re-recognize what we created before. */ 10693 if (GET_CODE (*p) == PLUS 10694 && (mode_sz == 4 || mode_sz == 8) 10695 && GET_CODE (XEXP (*p, 0)) == PLUS 10696 && CONST_INT_P (XEXP (XEXP (*p, 0), 1)) 10697 && MAYBE_BASE_REGISTER_RTX_P (XEXP (XEXP (*p, 0), 0), true) 10698 && CONST_INT_P (XEXP (*p, 1)) 10699 && ! (TARGET_SH2E && mode == SFmode)) 10700 { 10701 /* Because this address is so complex, we know it must have 10702 been created by LEGITIMIZE_RELOAD_ADDRESS before; thus, 10703 it is already unshared, and needs no further unsharing. */ 10704 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL, 10705 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type); 10706 return true; 10707 } 10708 10709 return false; 10710} 10711 10712/* In the name of slightly smaller debug output, and to cater to 10713 general assembler lossage, recognize various UNSPEC sequences 10714 and turn them back into a direct symbol reference. */ 10715static rtx 10716sh_delegitimize_address (rtx orig_x) 10717{ 10718 rtx x, y; 10719 10720 orig_x = delegitimize_mem_from_attrs (orig_x); 10721 10722 x = orig_x; 10723 if (MEM_P (x)) 10724 x = XEXP (x, 0); 10725 if (GET_CODE (x) == CONST) 10726 { 10727 y = XEXP (x, 0); 10728 if (GET_CODE (y) == UNSPEC) 10729 { 10730 if (XINT (y, 1) == UNSPEC_GOT 10731 || XINT (y, 1) == UNSPEC_GOTOFF 10732 || XINT (y, 1) == UNSPEC_SYMOFF) 10733 return XVECEXP (y, 0, 0); 10734 else if (XINT (y, 1) == UNSPEC_PCREL_SYMOFF) 10735 { 10736 if (GET_CODE (XVECEXP (y, 0, 0)) == CONST) 10737 { 10738 rtx symplt = XEXP (XVECEXP (y, 0, 0), 0); 10739 10740 if (GET_CODE (symplt) == UNSPEC 10741 && XINT (symplt, 1) == UNSPEC_PLT) 10742 return XVECEXP (symplt, 0, 0); 10743 } 10744 } 10745 else if (TARGET_SHMEDIA 10746 && (XINT (y, 1) == UNSPEC_EXTRACT_S16 10747 || XINT (y, 1) == UNSPEC_EXTRACT_U16)) 10748 { 10749 rtx offset = XVECEXP (y, 0, 1); 10750 10751 x = gen_rtx_PLUS (Pmode, XVECEXP (y, 0, 0), offset); 10752 if (MEM_P (orig_x)) 10753 x = replace_equiv_address_nv (orig_x, x); 10754 return x; 10755 } 10756 } 10757 } 10758 10759 return orig_x; 10760} 10761 10762/* Mark the use of a constant in the literal table. If the constant 10763 has multiple labels, make it unique. */ 10764static rtx 10765mark_constant_pool_use (rtx x) 10766{ 10767 rtx_insn *insn, *lab; 10768 rtx pattern; 10769 10770 if (x == NULL_RTX) 10771 return x; 10772 10773 switch (GET_CODE (x)) 10774 { 10775 case LABEL_REF: 10776 x = XEXP (x, 0); 10777 case CODE_LABEL: 10778 break; 10779 default: 10780 return x; 10781 } 10782 10783 /* Get the first label in the list of labels for the same constant 10784 and delete another labels in the list. */ 10785 lab = as_a <rtx_insn *> (x); 10786 for (insn = PREV_INSN (lab); insn; insn = PREV_INSN (insn)) 10787 { 10788 if (!LABEL_P (insn) 10789 || LABEL_REFS (insn) != NEXT_INSN (insn)) 10790 break; 10791 lab = insn; 10792 } 10793 10794 for (rtx insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn)) 10795 as_a<rtx_insn *> (insn)->set_deleted (); 10796 10797 /* Mark constants in a window. */ 10798 for (insn = NEXT_INSN (as_a <rtx_insn *> (x)); insn; insn = NEXT_INSN (insn)) 10799 { 10800 if (!NONJUMP_INSN_P (insn)) 10801 continue; 10802 10803 pattern = PATTERN (insn); 10804 if (GET_CODE (pattern) != UNSPEC_VOLATILE) 10805 continue; 10806 10807 switch (XINT (pattern, 1)) 10808 { 10809 case UNSPECV_CONST2: 10810 case UNSPECV_CONST4: 10811 case UNSPECV_CONST8: 10812 XVECEXP (pattern, 0, 1) = const1_rtx; 10813 break; 10814 case UNSPECV_WINDOW_END: 10815 if (XVECEXP (pattern, 0, 0) == x) 10816 return lab; 10817 break; 10818 case UNSPECV_CONST_END: 10819 return lab; 10820 default: 10821 break; 10822 } 10823 } 10824 10825 return lab; 10826} 10827 10828/* Return true if it's possible to redirect BRANCH1 to the destination 10829 of an unconditional jump BRANCH2. We only want to do this if the 10830 resulting branch will have a short displacement. */ 10831static bool 10832sh_can_follow_jump (const rtx_insn *branch1, const rtx_insn *branch2) 10833{ 10834 /* Don't follow if BRANCH2 is possible to be a jump crossing between 10835 hot and cold partitions. */ 10836 if (TARGET_SH1 10837 && flag_reorder_blocks_and_partition 10838 && simplejump_p (branch2) 10839 && CROSSING_JUMP_P (branch2)) 10840 return false; 10841 10842 if (flag_expensive_optimizations && simplejump_p (branch2)) 10843 { 10844 rtx dest = XEXP (SET_SRC (single_set (branch2)), 0); 10845 rtx_insn *insn; 10846 int distance; 10847 10848 for (distance = 0, insn = NEXT_INSN (branch1); 10849 insn && distance < 256; 10850 insn = PREV_INSN (insn)) 10851 { 10852 if (insn == dest) 10853 return true; 10854 else 10855 distance += get_attr_length (insn); 10856 } 10857 for (distance = 0, insn = NEXT_INSN (branch1); 10858 insn && distance < 256; 10859 insn = NEXT_INSN (insn)) 10860 { 10861 if (insn == dest) 10862 return true; 10863 else 10864 distance += get_attr_length (insn); 10865 } 10866 } 10867 return false; 10868} 10869 10870/* Return nonzero if register old_reg can be renamed to register new_reg. */ 10871bool 10872sh_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED, 10873 unsigned int new_reg) 10874{ 10875 /* Interrupt functions can only use registers that have already been 10876 saved by the prologue, even if they would normally be 10877 call-clobbered. */ 10878 if (sh_cfun_interrupt_handler_p () && !df_regs_ever_live_p (new_reg)) 10879 return false; 10880 10881 return true; 10882} 10883 10884/* Function to update the integer COST 10885 based on the relationship between INSN that is dependent on 10886 DEP_INSN through the dependence LINK. The default is to make no 10887 adjustment to COST. This can be used for example to specify to 10888 the scheduler that an output- or anti-dependence does not incur 10889 the same cost as a data-dependence. The return value should be 10890 the new value for COST. */ 10891static int 10892sh_adjust_cost (rtx_insn *insn, rtx link ATTRIBUTE_UNUSED, 10893 rtx_insn *dep_insn, int cost) 10894{ 10895 rtx reg, use_pat; 10896 10897 if (TARGET_SHMEDIA) 10898 { 10899 /* On SHmedia, if the dependence is an anti-dependence or 10900 output-dependence, there is no cost. */ 10901 if (REG_NOTE_KIND (link) != 0) 10902 { 10903 /* However, dependencies between target register loads and 10904 uses of the register in a subsequent block that are separated 10905 by a conditional branch are not modelled - we have to do with 10906 the anti-dependency between the target register load and the 10907 conditional branch that ends the current block. */ 10908 if (REG_NOTE_KIND (link) == REG_DEP_ANTI 10909 && GET_CODE (PATTERN (dep_insn)) == SET 10910 && (get_attr_type (dep_insn) == TYPE_PT_MEDIA 10911 || get_attr_type (dep_insn) == TYPE_PTABS_MEDIA) 10912 && get_attr_type (insn) == TYPE_CBRANCH_MEDIA) 10913 { 10914 int orig_cost = cost; 10915 rtx note = find_reg_note (insn, REG_BR_PROB, 0); 10916 rtx target = ((!note || XINT (note, 0) * 2 < REG_BR_PROB_BASE) 10917 ? insn : JUMP_LABEL (insn)); 10918 /* On the likely path, the branch costs 1, on the unlikely path, 10919 it costs 3. */ 10920 cost--; 10921 do 10922 target = next_active_insn (target); 10923 while (target && ! flow_dependent_p (target, dep_insn) 10924 && --cost > 0); 10925 /* If two branches are executed in immediate succession, with the 10926 first branch properly predicted, this causes a stall at the 10927 second branch, hence we won't need the target for the 10928 second branch for two cycles after the launch of the first 10929 branch. */ 10930 if (cost > orig_cost - 2) 10931 cost = orig_cost - 2; 10932 } 10933 else 10934 cost = 0; 10935 } 10936 10937 else if (get_attr_is_mac_media (insn) 10938 && get_attr_is_mac_media (dep_insn)) 10939 cost = 1; 10940 10941 else if (! reload_completed 10942 && GET_CODE (PATTERN (insn)) == SET 10943 && GET_CODE (SET_SRC (PATTERN (insn))) == FLOAT 10944 && GET_CODE (PATTERN (dep_insn)) == SET 10945 && fp_arith_reg_operand (SET_SRC (PATTERN (dep_insn)), VOIDmode) 10946 && cost < 4) 10947 cost = 4; 10948 /* Schedule the ptabs for a casesi_jump_media in preference to stuff 10949 that is needed at the target. */ 10950 else if (get_attr_type (insn) == TYPE_JUMP_MEDIA 10951 && ! flow_dependent_p (insn, dep_insn)) 10952 cost--; 10953 } 10954 else if (REG_NOTE_KIND (link) == 0) 10955 { 10956 enum attr_type type; 10957 rtx dep_set; 10958 10959 if (recog_memoized (insn) < 0 10960 || recog_memoized (dep_insn) < 0) 10961 return cost; 10962 10963 dep_set = single_set (dep_insn); 10964 10965 /* The latency that we specify in the scheduling description refers 10966 to the actual output, not to an auto-increment register; for that, 10967 the latency is one. */ 10968 if (dep_set && MEM_P (SET_SRC (dep_set)) && cost > 1) 10969 { 10970 rtx set = single_set (insn); 10971 10972 if (set 10973 && !reg_mentioned_p (SET_DEST (dep_set), SET_SRC (set)) 10974 && (!MEM_P (SET_DEST (set)) 10975 || !reg_mentioned_p (SET_DEST (dep_set), 10976 XEXP (SET_DEST (set), 0)))) 10977 cost = 1; 10978 } 10979 /* The only input for a call that is timing-critical is the 10980 function's address. */ 10981 if (CALL_P (insn)) 10982 { 10983 rtx call = get_call_rtx_from (insn); 10984 if (call 10985 /* sibcalli_thunk uses a symbol_ref in an unspec. */ 10986 && (GET_CODE (XEXP (XEXP (call, 0), 0)) == UNSPEC 10987 || ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn))) 10988 cost -= TARGET_SH4_300 ? 3 : 6; 10989 } 10990 /* Likewise, the most timing critical input for an sfuncs call 10991 is the function address. However, sfuncs typically start 10992 using their arguments pretty quickly. 10993 Assume a four cycle delay for SH4 before they are needed. 10994 Cached ST40-300 calls are quicker, so assume only a one 10995 cycle delay there. 10996 ??? Maybe we should encode the delays till input registers 10997 are needed by sfuncs into the sfunc call insn. */ 10998 /* All sfunc calls are parallels with at least four components. 10999 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */ 11000 else if (GET_CODE (PATTERN (insn)) == PARALLEL 11001 && XVECLEN (PATTERN (insn), 0) >= 4 11002 && (reg = sfunc_uses_reg (insn))) 11003 { 11004 if (! reg_set_p (reg, dep_insn)) 11005 cost -= TARGET_SH4_300 ? 1 : 4; 11006 } 11007 if (TARGET_HARD_SH4 && !TARGET_SH4_300) 11008 { 11009 enum attr_type dep_type = get_attr_type (dep_insn); 11010 11011 if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD) 11012 cost--; 11013 else if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI) 11014 && (type = get_attr_type (insn)) != TYPE_CALL 11015 && type != TYPE_SFUNC) 11016 cost--; 11017 /* When the preceding instruction loads the shift amount of 11018 the following SHAD/SHLD, the latency of the load is increased 11019 by 1 cycle. */ 11020 if (get_attr_type (insn) == TYPE_DYN_SHIFT 11021 && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES 11022 && reg_overlap_mentioned_p (SET_DEST (dep_set), 11023 XEXP (SET_SRC (single_set (insn)), 11024 1))) 11025 cost++; 11026 /* When an LS group instruction with a latency of less than 11027 3 cycles is followed by a double-precision floating-point 11028 instruction, FIPR, or FTRV, the latency of the first 11029 instruction is increased to 3 cycles. */ 11030 else if (cost < 3 11031 && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP 11032 && get_attr_dfp_comp (insn) == DFP_COMP_YES) 11033 cost = 3; 11034 /* The lsw register of a double-precision computation is ready one 11035 cycle earlier. */ 11036 else if (reload_completed 11037 && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES 11038 && (use_pat = single_set (insn)) 11039 && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))), 11040 SET_SRC (use_pat))) 11041 cost -= 1; 11042 11043 if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES 11044 && get_attr_late_fp_use (insn) == LATE_FP_USE_YES) 11045 cost -= 1; 11046 } 11047 else if (TARGET_SH4_300) 11048 { 11049 /* Stores need their input register two cycles later. */ 11050 if (dep_set && cost >= 1 11051 && ((type = get_attr_type (insn)) == TYPE_STORE 11052 || type == TYPE_PSTORE 11053 || type == TYPE_FSTORE || type == TYPE_MAC_MEM)) 11054 { 11055 rtx set = single_set (insn); 11056 11057 if (!reg_mentioned_p (SET_SRC (set), XEXP (SET_DEST (set), 0)) 11058 && rtx_equal_p (SET_SRC (set), SET_DEST (dep_set))) 11059 { 11060 cost -= 2; 11061 /* But don't reduce the cost below 1 if the address depends 11062 on a side effect of dep_insn. */ 11063 if (cost < 1 11064 && modified_in_p (XEXP (SET_DEST (set), 0), dep_insn)) 11065 cost = 1; 11066 } 11067 } 11068 } 11069 } 11070 /* An anti-dependence penalty of two applies if the first insn is a double 11071 precision fadd / fsub / fmul. */ 11072 else if (!TARGET_SH4_300 11073 && REG_NOTE_KIND (link) == REG_DEP_ANTI 11074 && recog_memoized (dep_insn) >= 0 11075 && (get_attr_type (dep_insn) == TYPE_DFP_ARITH 11076 || get_attr_type (dep_insn) == TYPE_DFP_MUL) 11077 /* A lot of alleged anti-flow dependences are fake, 11078 so check this one is real. */ 11079 && flow_dependent_p (dep_insn, insn)) 11080 cost = 2; 11081 11082 return cost; 11083} 11084 11085/* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check 11086 if DEP_INSN is anti-flow dependent on INSN. */ 11087static bool 11088flow_dependent_p (rtx insn, rtx dep_insn) 11089{ 11090 rtx tmp = PATTERN (insn); 11091 11092 note_stores (PATTERN (dep_insn), flow_dependent_p_1, &tmp); 11093 return tmp == NULL_RTX; 11094} 11095 11096/* A helper function for flow_dependent_p called through note_stores. */ 11097static void 11098flow_dependent_p_1 (rtx x, const_rtx pat ATTRIBUTE_UNUSED, void *data) 11099{ 11100 rtx * pinsn = (rtx *) data; 11101 11102 if (*pinsn && reg_referenced_p (x, *pinsn)) 11103 *pinsn = NULL_RTX; 11104} 11105 11106/* For use by sh_allocate_initial_value. Note that sh.md contains some 11107 'special function' patterns (type sfunc) that clobber pr, but that 11108 do not look like function calls to leaf_function_p. Hence we must 11109 do this extra check. */ 11110static int 11111sh_pr_n_sets (void) 11112{ 11113 return DF_REG_DEF_COUNT (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG); 11114} 11115 11116/* Return where to allocate pseudo for a given hard register initial 11117 value. */ 11118static rtx 11119sh_allocate_initial_value (rtx hard_reg) 11120{ 11121 rtx x; 11122 11123 if (REGNO (hard_reg) == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG)) 11124 { 11125 if (crtl->is_leaf 11126 && ! sh_pr_n_sets () 11127 && ! (TARGET_SHCOMPACT 11128 && ((crtl->args.info.call_cookie 11129 & ~ CALL_COOKIE_RET_TRAMP (1)) 11130 || crtl->saves_all_registers))) 11131 x = hard_reg; 11132 else 11133 x = gen_frame_mem (Pmode, return_address_pointer_rtx); 11134 } 11135 else 11136 x = NULL_RTX; 11137 11138 return x; 11139} 11140 11141/* This function returns "2" to indicate dual issue for the SH4 11142 processor. To be used by the DFA pipeline description. */ 11143static int 11144sh_issue_rate (void) 11145{ 11146 if (TARGET_SUPERSCALAR) 11147 return 2; 11148 else 11149 return 1; 11150} 11151 11152/* Functions for ready queue reordering for sched1. */ 11153 11154/* Get weight for mode for a set x. */ 11155static short 11156find_set_regmode_weight (rtx x, machine_mode mode) 11157{ 11158 if (GET_CODE (x) == CLOBBER && register_operand (SET_DEST (x), mode)) 11159 return 1; 11160 if (GET_CODE (x) == SET && register_operand (SET_DEST (x), mode)) 11161 { 11162 if (REG_P (SET_DEST (x))) 11163 { 11164 if (!reg_mentioned_p (SET_DEST (x), SET_SRC (x))) 11165 return 1; 11166 else 11167 return 0; 11168 } 11169 return 1; 11170 } 11171 return 0; 11172} 11173 11174/* Get regmode weight for insn. */ 11175static short 11176find_insn_regmode_weight (rtx insn, machine_mode mode) 11177{ 11178 short reg_weight = 0; 11179 rtx x; 11180 11181 /* Increment weight for each register born here. */ 11182 x = PATTERN (insn); 11183 reg_weight += find_set_regmode_weight (x, mode); 11184 if (GET_CODE (x) == PARALLEL) 11185 { 11186 int j; 11187 for (j = XVECLEN (x, 0) - 1; j >= 0; j--) 11188 { 11189 x = XVECEXP (PATTERN (insn), 0, j); 11190 reg_weight += find_set_regmode_weight (x, mode); 11191 } 11192 } 11193 /* Decrement weight for each register that dies here. */ 11194 for (x = REG_NOTES (insn); x; x = XEXP (x, 1)) 11195 { 11196 if (REG_NOTE_KIND (x) == REG_DEAD || REG_NOTE_KIND (x) == REG_UNUSED) 11197 { 11198 rtx note = XEXP (x, 0); 11199 if (REG_P (note) && GET_MODE (note) == mode) 11200 reg_weight--; 11201 } 11202 } 11203 return reg_weight; 11204} 11205 11206/* Calculate regmode weights for all insns of a basic block. */ 11207static void 11208find_regmode_weight (basic_block b, machine_mode mode) 11209{ 11210 rtx_insn *insn, *next_tail, *head, *tail; 11211 11212 get_ebb_head_tail (b, b, &head, &tail); 11213 next_tail = NEXT_INSN (tail); 11214 11215 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn)) 11216 { 11217 /* Handle register life information. */ 11218 if (!INSN_P (insn)) 11219 continue; 11220 11221 if (mode == SFmode) 11222 INSN_REGMODE_WEIGHT (insn, mode) = 11223 find_insn_regmode_weight (insn, mode) 11224 + 2 * find_insn_regmode_weight (insn, DFmode); 11225 else if (mode == SImode) 11226 INSN_REGMODE_WEIGHT (insn, mode) = 11227 find_insn_regmode_weight (insn, mode) 11228 + 2 * find_insn_regmode_weight (insn, DImode); 11229 } 11230} 11231 11232/* Comparison function for ready queue sorting. */ 11233static int 11234rank_for_reorder (const void *x, const void *y) 11235{ 11236 rtx_insn *tmp = *(rtx_insn * const *) y; 11237 rtx_insn *tmp2 = *(rtx_insn * const *) x; 11238 11239 /* The insn in a schedule group should be issued the first. */ 11240 if (SCHED_GROUP_P (tmp) != SCHED_GROUP_P (tmp2)) 11241 return SCHED_GROUP_P (tmp2) ? 1 : -1; 11242 11243 /* If insns are equally good, sort by INSN_LUID (original insn order), This 11244 minimizes instruction movement, thus minimizing sched's effect on 11245 register pressure. */ 11246 return INSN_LUID (tmp) - INSN_LUID (tmp2); 11247} 11248 11249/* Resort the array A in which only element at index N may be out of order. */ 11250static void 11251swap_reorder (rtx_insn **a, int n) 11252{ 11253 rtx_insn *insn = a[n - 1]; 11254 int i = n - 2; 11255 11256 while (i >= 0 && rank_for_reorder (a + i, &insn) >= 0) 11257 { 11258 a[i + 1] = a[i]; 11259 i -= 1; 11260 } 11261 a[i + 1] = insn; 11262} 11263 11264/* Sort the ready list by ascending priority. */ 11265static void 11266ready_reorder (rtx_insn **ready, int nready) 11267{ 11268 if (nready == 2) 11269 swap_reorder (ready, nready); 11270 else if (nready > 2) 11271 qsort (ready, nready, sizeof (rtx_insn *), rank_for_reorder); 11272} 11273 11274/* Count life regions of r0 for a block. */ 11275static int 11276find_r0_life_regions (basic_block b) 11277{ 11278 rtx_insn *end, *insn; 11279 rtx pset; 11280 rtx r0_reg; 11281 int live; 11282 int set; 11283 int death = 0; 11284 11285 if (REGNO_REG_SET_P (df_get_live_in (b), R0_REG)) 11286 { 11287 set = 1; 11288 live = 1; 11289 } 11290 else 11291 { 11292 set = 0; 11293 live = 0; 11294 } 11295 11296 insn = BB_HEAD (b); 11297 end = BB_END (b); 11298 r0_reg = gen_rtx_REG (SImode, R0_REG); 11299 while (1) 11300 { 11301 if (INSN_P (insn)) 11302 { 11303 if (find_regno_note (insn, REG_DEAD, R0_REG)) 11304 { 11305 death++; 11306 live = 0; 11307 } 11308 if (!live 11309 && (pset = single_set (insn)) 11310 && reg_overlap_mentioned_p (r0_reg, SET_DEST (pset)) 11311 && !find_regno_note (insn, REG_UNUSED, R0_REG)) 11312 { 11313 set++; 11314 live = 1; 11315 } 11316 } 11317 if (insn == end) 11318 break; 11319 insn = NEXT_INSN (insn); 11320 } 11321 return set - death; 11322} 11323 11324/* Calculate regmode weights for all insns of all basic block. */ 11325static void 11326sh_md_init_global (FILE *dump ATTRIBUTE_UNUSED, 11327 int verbose ATTRIBUTE_UNUSED, 11328 int old_max_uid) 11329{ 11330 basic_block b; 11331 11332 regmode_weight[0] = (short *) xcalloc (old_max_uid, sizeof (short)); 11333 regmode_weight[1] = (short *) xcalloc (old_max_uid, sizeof (short)); 11334 r0_life_regions = 0; 11335 11336 FOR_EACH_BB_REVERSE_FN (b, cfun) 11337 { 11338 find_regmode_weight (b, SImode); 11339 find_regmode_weight (b, SFmode); 11340 if (!reload_completed) 11341 r0_life_regions += find_r0_life_regions (b); 11342 } 11343 11344 CURR_REGMODE_PRESSURE (SImode) = 0; 11345 CURR_REGMODE_PRESSURE (SFmode) = 0; 11346} 11347 11348/* Cleanup. */ 11349static void 11350sh_md_finish_global (FILE *dump ATTRIBUTE_UNUSED, 11351 int verbose ATTRIBUTE_UNUSED) 11352{ 11353 if (regmode_weight[0]) 11354 { 11355 free (regmode_weight[0]); 11356 regmode_weight[0] = NULL; 11357 } 11358 if (regmode_weight[1]) 11359 { 11360 free (regmode_weight[1]); 11361 regmode_weight[1] = NULL; 11362 } 11363} 11364 11365/* The scalar modes supported differs from the default version in TImode 11366 for 32-bit SHMEDIA. */ 11367static bool 11368sh_scalar_mode_supported_p (machine_mode mode) 11369{ 11370 if (TARGET_SHMEDIA32 && mode == TImode) 11371 return false; 11372 11373 return default_scalar_mode_supported_p (mode); 11374} 11375 11376/* Cache the can_issue_more so that we can return it from reorder2. Also, 11377 keep count of register pressures on SImode and SFmode. */ 11378static int 11379sh_variable_issue (FILE *dump ATTRIBUTE_UNUSED, 11380 int sched_verbose ATTRIBUTE_UNUSED, 11381 rtx_insn *insn, 11382 int can_issue_more) 11383{ 11384 if (GET_CODE (PATTERN (insn)) != USE 11385 && GET_CODE (PATTERN (insn)) != CLOBBER) 11386 cached_can_issue_more = can_issue_more - 1; 11387 else 11388 cached_can_issue_more = can_issue_more; 11389 11390 if (reload_completed) 11391 return cached_can_issue_more; 11392 11393 CURR_REGMODE_PRESSURE (SImode) += INSN_REGMODE_WEIGHT (insn, SImode); 11394 CURR_REGMODE_PRESSURE (SFmode) += INSN_REGMODE_WEIGHT (insn, SFmode); 11395 11396 return cached_can_issue_more; 11397} 11398 11399static void 11400sh_md_init (FILE *dump ATTRIBUTE_UNUSED, 11401 int verbose ATTRIBUTE_UNUSED, 11402 int veclen ATTRIBUTE_UNUSED) 11403{ 11404 CURR_REGMODE_PRESSURE (SImode) = 0; 11405 CURR_REGMODE_PRESSURE (SFmode) = 0; 11406} 11407 11408/* Some magic numbers. */ 11409/* Pressure on register r0 can lead to spill failures. so avoid sched1 for 11410 functions that already have high pressure on r0. */ 11411#define R0_MAX_LIFE_REGIONS 2 11412/* Register Pressure thresholds for SImode and SFmode registers. */ 11413#define SIMODE_MAX_WEIGHT 5 11414#define SFMODE_MAX_WEIGHT 10 11415 11416/* Return true if the pressure is high for MODE. */ 11417static bool 11418high_pressure (machine_mode mode) 11419{ 11420 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for 11421 functions that already have high pressure on r0. */ 11422 if (r0_life_regions >= R0_MAX_LIFE_REGIONS) 11423 return true; 11424 11425 if (mode == SFmode) 11426 return (CURR_REGMODE_PRESSURE (SFmode) > SFMODE_MAX_WEIGHT); 11427 else 11428 return (CURR_REGMODE_PRESSURE (SImode) > SIMODE_MAX_WEIGHT); 11429} 11430 11431/* Reorder ready queue if register pressure is high. */ 11432static int 11433sh_reorder (FILE *dump ATTRIBUTE_UNUSED, 11434 int sched_verbose ATTRIBUTE_UNUSED, 11435 rtx_insn **ready, 11436 int *n_readyp, 11437 int clock_var ATTRIBUTE_UNUSED) 11438{ 11439 if (reload_completed) 11440 return sh_issue_rate (); 11441 11442 if (high_pressure (SFmode) || high_pressure (SImode)) 11443 { 11444 ready_reorder (ready, *n_readyp); 11445 } 11446 11447 return sh_issue_rate (); 11448} 11449 11450/* Skip cycles if the current register pressure is high. */ 11451static int 11452sh_reorder2 (FILE *dump ATTRIBUTE_UNUSED, 11453 int sched_verbose ATTRIBUTE_UNUSED, 11454 rtx_insn **ready ATTRIBUTE_UNUSED, 11455 int *n_readyp ATTRIBUTE_UNUSED, 11456 int clock_var ATTRIBUTE_UNUSED) 11457{ 11458 if (reload_completed) 11459 return cached_can_issue_more; 11460 11461 if (high_pressure(SFmode) || high_pressure (SImode)) 11462 skip_cycles = 1; 11463 11464 return cached_can_issue_more; 11465} 11466 11467/* Skip cycles without sorting the ready queue. This will move insn from 11468 Q->R. If this is the last cycle we are skipping; allow sorting of ready 11469 queue by sh_reorder. */ 11470 11471/* Generally, skipping these many cycles are sufficient for all insns to move 11472 from Q -> R. */ 11473#define MAX_SKIPS 8 11474 11475static int 11476sh_dfa_new_cycle (FILE *sched_dump ATTRIBUTE_UNUSED, 11477 int sched_verbose ATTRIBUTE_UNUSED, 11478 rtx_insn *insn ATTRIBUTE_UNUSED, 11479 int last_clock_var, 11480 int clock_var, 11481 int *sort_p) 11482{ 11483 if (reload_completed) 11484 return 0; 11485 11486 if (skip_cycles) 11487 { 11488 if ((clock_var - last_clock_var) < MAX_SKIPS) 11489 { 11490 *sort_p = 0; 11491 return 1; 11492 } 11493 /* If this is the last cycle we are skipping, allow reordering of R. */ 11494 if ((clock_var - last_clock_var) == MAX_SKIPS) 11495 { 11496 *sort_p = 1; 11497 return 1; 11498 } 11499 } 11500 11501 skip_cycles = 0; 11502 11503 return 0; 11504} 11505 11506/* SHmedia requires registers for branches, so we can't generate new 11507 branches past reload. */ 11508static bool 11509sh_cannot_modify_jumps_p (void) 11510{ 11511 return (TARGET_SHMEDIA && (reload_in_progress || reload_completed)); 11512} 11513 11514static reg_class_t 11515sh_target_reg_class (void) 11516{ 11517 return TARGET_SHMEDIA ? TARGET_REGS : NO_REGS; 11518} 11519 11520static bool 11521sh_optimize_target_register_callee_saved (bool after_prologue_epilogue_gen) 11522{ 11523 if (! shmedia_space_reserved_for_target_registers) 11524 return 0; 11525 if (after_prologue_epilogue_gen && ! TARGET_SAVE_ALL_TARGET_REGS) 11526 return 0; 11527 11528 HARD_REG_SET dummy; 11529 if (calc_live_regs (&dummy) >= 6 * 8) 11530 return 1; 11531 return 0; 11532} 11533 11534static bool 11535sh_ms_bitfield_layout_p (const_tree record_type ATTRIBUTE_UNUSED) 11536{ 11537 return (TARGET_SH5 || TARGET_HITACHI || sh_attr_renesas_p (record_type)); 11538} 11539 11540/* 11541 On the SH1..SH4, the trampoline looks like 11542 2 0002 D202 mov.l l2,r2 11543 1 0000 D301 mov.l l1,r3 11544 3 0004 422B jmp @r2 11545 4 0006 0009 nop 11546 5 0008 00000000 l1: .long area 11547 6 000c 00000000 l2: .long function 11548 11549 SH5 (compact) uses r1 instead of r3 for the static chain. */ 11550 11551 11552/* Emit RTL insns to initialize the variable parts of a trampoline. 11553 FNADDR is an RTX for the address of the function's pure code. 11554 CXT is an RTX for the static chain value for the function. */ 11555static void 11556sh_trampoline_init (rtx tramp_mem, tree fndecl, rtx cxt) 11557{ 11558 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0); 11559 rtx tramp = force_reg (Pmode, XEXP (tramp_mem, 0)); 11560 11561 if (TARGET_SHMEDIA64) 11562 { 11563 rtx tramp_templ; 11564 int fixed_len; 11565 11566 rtx movi1 = GEN_INT (0xcc000010); 11567 rtx shori1 = GEN_INT (0xc8000010); 11568 rtx src, dst; 11569 11570 /* The following trampoline works within a +- 128 KB range for cxt: 11571 ptb/u cxt,tr1; movi fnaddr >> 48,r0; shori fnaddr >> 32,r0; 11572 shori fnaddr >> 16,r0; shori fnaddr,r0; ptabs/l r0,tr0 11573 gettr tr1,r1; blink tr0,r63 */ 11574 /* Address rounding makes it hard to compute the exact bounds of the 11575 offset for this trampoline, but we have a rather generous offset 11576 range, so frame_offset should do fine as an upper bound. */ 11577 if (cxt == virtual_stack_vars_rtx && frame_offset < 0x20000) 11578 { 11579 /* ??? could optimize this trampoline initialization 11580 by writing DImode words with two insns each. */ 11581 rtx mask = force_reg (DImode, GEN_INT (0x3fffc00)); 11582 rtx insn = gen_rtx_MINUS (DImode, cxt, tramp); 11583 insn = gen_rtx_ASHIFT (DImode, insn, GEN_INT (10-2)); 11584 insn = gen_rtx_AND (DImode, insn, mask); 11585 /* Or in ptb/u .,tr1 pattern */ 11586 insn = gen_rtx_IOR (DImode, insn, gen_int_mode (0xec000010, SImode)); 11587 insn = force_operand (insn, NULL_RTX); 11588 insn = gen_lowpart (SImode, insn); 11589 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX), insn); 11590 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (38)); 11591 insn = gen_rtx_AND (DImode, insn, mask); 11592 insn = force_operand (gen_rtx_IOR (DImode, movi1, insn), NULL_RTX); 11593 insn = gen_lowpart (SImode, insn); 11594 emit_move_insn (adjust_address (tramp_mem, SImode, 4), insn); 11595 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (22)); 11596 insn = gen_rtx_AND (DImode, insn, mask); 11597 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX); 11598 insn = gen_lowpart (SImode, insn); 11599 emit_move_insn (adjust_address (tramp_mem, SImode, 8), insn); 11600 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (6)); 11601 insn = gen_rtx_AND (DImode, insn, mask); 11602 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX); 11603 insn = gen_lowpart (SImode, insn); 11604 emit_move_insn (adjust_address (tramp_mem, SImode, 12), insn); 11605 insn = gen_rtx_ASHIFT (DImode, fnaddr, GEN_INT (10)); 11606 insn = gen_rtx_AND (DImode, insn, mask); 11607 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX); 11608 insn = gen_lowpart (SImode, insn); 11609 emit_move_insn (adjust_address (tramp_mem, SImode, 16), insn); 11610 emit_move_insn (adjust_address (tramp_mem, SImode, 20), 11611 GEN_INT (0x6bf10600)); 11612 emit_move_insn (adjust_address (tramp_mem, SImode, 24), 11613 GEN_INT (0x4415fc10)); 11614 emit_move_insn (adjust_address (tramp_mem, SImode, 28), 11615 GEN_INT (0x4401fff0)); 11616 emit_insn (gen_ic_invalidate_line (tramp)); 11617 return; 11618 } 11619 tramp_templ = gen_rtx_SYMBOL_REF (Pmode,"__GCC_nested_trampoline"); 11620 fixed_len = TRAMPOLINE_SIZE - 2 * GET_MODE_SIZE (Pmode); 11621 11622 tramp_templ = gen_datalabel_ref (tramp_templ); 11623 dst = tramp_mem; 11624 src = gen_const_mem (BLKmode, tramp_templ); 11625 set_mem_align (dst, 256); 11626 set_mem_align (src, 64); 11627 emit_block_move (dst, src, GEN_INT (fixed_len), BLOCK_OP_NORMAL); 11628 11629 emit_move_insn (adjust_address (tramp_mem, Pmode, fixed_len), fnaddr); 11630 emit_move_insn (adjust_address (tramp_mem, Pmode, 11631 fixed_len + GET_MODE_SIZE (Pmode)), 11632 cxt); 11633 emit_insn (gen_ic_invalidate_line (tramp)); 11634 return; 11635 } 11636 else if (TARGET_SHMEDIA) 11637 { 11638 /* movi fnaddr >> 16,r1; shori fnaddr,r1; ptabs/l r1,tr0 11639 movi cxt >> 16,r1; shori cxt,r1; blink tr0,r63 */ 11640 rtx quad0 = gen_reg_rtx (DImode), cxtload = gen_reg_rtx (DImode); 11641 rtx quad1 = gen_reg_rtx (DImode), quad2 = gen_reg_rtx (DImode); 11642 /* movi 0,r1: 0xcc000010 shori 0,r1: c8000010 concatenated, 11643 rotated 10 right, and higher 16 bit of every 32 selected. */ 11644 rtx movishori 11645 = force_reg (V2HImode, (simplify_gen_subreg 11646 (V2HImode, GEN_INT (0x4330432), SImode, 0))); 11647 rtx ptabs = force_reg (DImode, GEN_INT (0x6bf10600)); 11648 rtx blink = force_reg (DImode, GEN_INT (0x4401fff0)); 11649 11650 fnaddr = force_reg (SImode, fnaddr); 11651 cxt = force_reg (SImode, cxt); 11652 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, quad0, 0), 11653 gen_rtx_SUBREG (V2HImode, fnaddr, 0), 11654 movishori)); 11655 emit_insn (gen_rotrdi3_mextr (quad0, quad0, 11656 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56))); 11657 emit_insn (gen_ashldi3_media (quad0, quad0, const2_rtx)); 11658 emit_move_insn (change_address (tramp_mem, DImode, NULL_RTX), quad0); 11659 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, cxtload, 0), 11660 gen_rtx_SUBREG (V2HImode, cxt, 0), 11661 movishori)); 11662 emit_insn (gen_rotrdi3_mextr (cxtload, cxtload, 11663 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56))); 11664 emit_insn (gen_ashldi3_media (cxtload, cxtload, const2_rtx)); 11665 if (TARGET_LITTLE_ENDIAN) 11666 { 11667 emit_insn (gen_mshflo_l_di (quad1, ptabs, cxtload)); 11668 emit_insn (gen_mextr4 (quad2, cxtload, blink)); 11669 } 11670 else 11671 { 11672 emit_insn (gen_mextr4 (quad1, cxtload, ptabs)); 11673 emit_insn (gen_mshflo_l_di (quad2, blink, cxtload)); 11674 } 11675 emit_move_insn (adjust_address (tramp_mem, DImode, 8), quad1); 11676 emit_move_insn (adjust_address (tramp_mem, DImode, 16), quad2); 11677 emit_insn (gen_ic_invalidate_line (tramp)); 11678 return; 11679 } 11680 else if (TARGET_SHCOMPACT) 11681 { 11682 emit_insn (gen_initialize_trampoline (tramp, cxt, fnaddr)); 11683 return; 11684 } 11685 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX), 11686 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301, 11687 SImode)); 11688 emit_move_insn (adjust_address (tramp_mem, SImode, 4), 11689 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009, 11690 SImode)); 11691 emit_move_insn (adjust_address (tramp_mem, SImode, 8), cxt); 11692 emit_move_insn (adjust_address (tramp_mem, SImode, 12), fnaddr); 11693 if (TARGET_HARD_SH4 || TARGET_SH5) 11694 { 11695 if (!TARGET_INLINE_IC_INVALIDATE 11696 || (!(TARGET_SH4A || TARGET_SH4_300) && TARGET_USERMODE)) 11697 emit_library_call (function_symbol (NULL, "__ic_invalidate", 11698 FUNCTION_ORDINARY), 11699 LCT_NORMAL, VOIDmode, 1, tramp, SImode); 11700 else 11701 emit_insn (gen_ic_invalidate_line (tramp)); 11702 } 11703} 11704 11705/* On SH5, trampolines are SHmedia code, so add 1 to the address. */ 11706static rtx 11707sh_trampoline_adjust_address (rtx tramp) 11708{ 11709 if (TARGET_SHMEDIA) 11710 tramp = expand_simple_binop (Pmode, PLUS, tramp, const1_rtx, 11711 gen_reg_rtx (Pmode), 0, OPTAB_LIB_WIDEN); 11712 return tramp; 11713} 11714 11715/* FIXME: This is overly conservative. A SHcompact function that 11716 receives arguments ``by reference'' will have them stored in its 11717 own stack frame, so it must not pass pointers or references to 11718 these arguments to other functions by means of sibling calls. */ 11719/* If PIC, we cannot make sibling calls to global functions 11720 because the PLT requires r12 to be live. */ 11721static bool 11722sh_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED) 11723{ 11724 return (1 11725 && (! TARGET_SHCOMPACT 11726 || crtl->args.info.stack_regs == 0) 11727 && ! sh_cfun_interrupt_handler_p () 11728 && (! flag_pic 11729 || (decl && ! TREE_PUBLIC (decl)) 11730 || (decl && DECL_VISIBILITY (decl) != VISIBILITY_DEFAULT))); 11731} 11732 11733/* Machine specific built-in functions. */ 11734 11735struct builtin_description 11736{ 11737 bool (* const is_enabled) (void); 11738 const enum insn_code icode; 11739 const char *const name; 11740 int signature; 11741 tree fndecl; 11742}; 11743 11744static bool 11745shmedia_builtin_p (void) 11746{ 11747 return TARGET_SHMEDIA; 11748} 11749 11750/* This function can be used if there are any built-ins that are not for 11751 SHmedia. It's commented out to avoid the defined-but-unused warning. */ 11752static bool 11753sh1_builtin_p (void) 11754{ 11755 return TARGET_SH1; 11756} 11757 11758/* describe number and signedness of arguments; arg[0] == result 11759 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */ 11760/* 9: 64-bit pointer, 10: 32-bit pointer */ 11761static const char signature_args[][4] = 11762{ 11763#define SH_BLTIN_V2SI2 0 11764 { 4, 4 }, 11765#define SH_BLTIN_V4HI2 1 11766 { 4, 4 }, 11767#define SH_BLTIN_V2SI3 2 11768 { 4, 4, 4 }, 11769#define SH_BLTIN_V4HI3 3 11770 { 4, 4, 4 }, 11771#define SH_BLTIN_V8QI3 4 11772 { 4, 4, 4 }, 11773#define SH_BLTIN_MAC_HISI 5 11774 { 1, 4, 4, 1 }, 11775#define SH_BLTIN_SH_HI 6 11776 { 4, 4, 1 }, 11777#define SH_BLTIN_SH_SI 7 11778 { 4, 4, 1 }, 11779#define SH_BLTIN_V4HI2V2SI 8 11780 { 4, 4, 4 }, 11781#define SH_BLTIN_V4HI2V8QI 9 11782 { 4, 4, 4 }, 11783#define SH_BLTIN_SISF 10 11784 { 4, 2 }, 11785#define SH_BLTIN_LDUA_L 11 11786 { 2, 10 }, 11787#define SH_BLTIN_LDUA_Q 12 11788 { 1, 10 }, 11789#define SH_BLTIN_STUA_L 13 11790 { 0, 10, 2 }, 11791#define SH_BLTIN_STUA_Q 14 11792 { 0, 10, 1 }, 11793#define SH_BLTIN_LDUA_L64 15 11794 { 2, 9 }, 11795#define SH_BLTIN_LDUA_Q64 16 11796 { 1, 9 }, 11797#define SH_BLTIN_STUA_L64 17 11798 { 0, 9, 2 }, 11799#define SH_BLTIN_STUA_Q64 18 11800 { 0, 9, 1 }, 11801#define SH_BLTIN_NUM_SHARED_SIGNATURES 19 11802#define SH_BLTIN_2 19 11803#define SH_BLTIN_SU 19 11804 { 1, 2 }, 11805#define SH_BLTIN_3 20 11806#define SH_BLTIN_SUS 20 11807 { 2, 2, 1 }, 11808#define SH_BLTIN_PSSV 21 11809 { 0, 8, 2, 2 }, 11810#define SH_BLTIN_XXUU 22 11811#define SH_BLTIN_UUUU 22 11812 { 1, 1, 1, 1 }, 11813#define SH_BLTIN_PV 23 11814 { 0, 8 }, 11815#define SH_BLTIN_VP 24 11816 { 8, 0 }, 11817#define SH_BLTIN_UV 25 11818 { 1, 0 }, 11819#define SH_BLTIN_VU 26 11820 { 0, 1 }, 11821}; 11822/* mcmv: operands considered unsigned. */ 11823/* mmulsum_wq, msad_ubq: result considered unsigned long long. */ 11824/* mperm: control value considered unsigned int. */ 11825/* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */ 11826/* mshards_q: returns signed short. */ 11827/* nsb: takes long long arg, returns unsigned char. */ 11828static struct builtin_description bdesc[] = 11829{ 11830 { shmedia_builtin_p, 11831 CODE_FOR_absv2si2, "__builtin_absv2si2", SH_BLTIN_V2SI2, 0 }, 11832 { shmedia_builtin_p, 11833 CODE_FOR_absv4hi2, "__builtin_absv4hi2", SH_BLTIN_V4HI2, 0 }, 11834 { shmedia_builtin_p, 11835 CODE_FOR_addv2si3, "__builtin_addv2si3", SH_BLTIN_V2SI3, 0 }, 11836 { shmedia_builtin_p, 11837 CODE_FOR_addv4hi3, "__builtin_addv4hi3", SH_BLTIN_V4HI3, 0 }, 11838 { shmedia_builtin_p, 11839 CODE_FOR_ssaddv2si3,"__builtin_ssaddv2si3", SH_BLTIN_V2SI3, 0 }, 11840 { shmedia_builtin_p, 11841 CODE_FOR_usaddv8qi3,"__builtin_usaddv8qi3", SH_BLTIN_V8QI3, 0 }, 11842 { shmedia_builtin_p, 11843 CODE_FOR_ssaddv4hi3,"__builtin_ssaddv4hi3", SH_BLTIN_V4HI3, 0 }, 11844 { shmedia_builtin_p, 11845 CODE_FOR_alloco_i, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV, 0 }, 11846 { shmedia_builtin_p, 11847 CODE_FOR_negcmpeqv8qi,"__builtin_sh_media_MCMPEQ_B", SH_BLTIN_V8QI3, 0 }, 11848 { shmedia_builtin_p, 11849 CODE_FOR_negcmpeqv2si,"__builtin_sh_media_MCMPEQ_L", SH_BLTIN_V2SI3, 0 }, 11850 { shmedia_builtin_p, 11851 CODE_FOR_negcmpeqv4hi,"__builtin_sh_media_MCMPEQ_W", SH_BLTIN_V4HI3, 0 }, 11852 { shmedia_builtin_p, 11853 CODE_FOR_negcmpgtuv8qi,"__builtin_sh_media_MCMPGT_UB", SH_BLTIN_V8QI3, 0 }, 11854 { shmedia_builtin_p, 11855 CODE_FOR_negcmpgtv2si,"__builtin_sh_media_MCMPGT_L", SH_BLTIN_V2SI3, 0 }, 11856 { shmedia_builtin_p, 11857 CODE_FOR_negcmpgtv4hi,"__builtin_sh_media_MCMPGT_W", SH_BLTIN_V4HI3, 0 }, 11858 { shmedia_builtin_p, 11859 CODE_FOR_mcmv, "__builtin_sh_media_MCMV", SH_BLTIN_UUUU, 0 }, 11860 { shmedia_builtin_p, 11861 CODE_FOR_mcnvs_lw, "__builtin_sh_media_MCNVS_LW", SH_BLTIN_3, 0 }, 11862 { shmedia_builtin_p, 11863 CODE_FOR_mcnvs_wb, "__builtin_sh_media_MCNVS_WB", SH_BLTIN_V4HI2V8QI, 0 }, 11864 { shmedia_builtin_p, 11865 CODE_FOR_mcnvs_wub, "__builtin_sh_media_MCNVS_WUB", SH_BLTIN_V4HI2V8QI, 0 }, 11866 { shmedia_builtin_p, 11867 CODE_FOR_mextr1, "__builtin_sh_media_MEXTR1", SH_BLTIN_V8QI3, 0 }, 11868 { shmedia_builtin_p, 11869 CODE_FOR_mextr2, "__builtin_sh_media_MEXTR2", SH_BLTIN_V8QI3, 0 }, 11870 { shmedia_builtin_p, 11871 CODE_FOR_mextr3, "__builtin_sh_media_MEXTR3", SH_BLTIN_V8QI3, 0 }, 11872 { shmedia_builtin_p, 11873 CODE_FOR_mextr4, "__builtin_sh_media_MEXTR4", SH_BLTIN_V8QI3, 0 }, 11874 { shmedia_builtin_p, 11875 CODE_FOR_mextr5, "__builtin_sh_media_MEXTR5", SH_BLTIN_V8QI3, 0 }, 11876 { shmedia_builtin_p, 11877 CODE_FOR_mextr6, "__builtin_sh_media_MEXTR6", SH_BLTIN_V8QI3, 0 }, 11878 { shmedia_builtin_p, 11879 CODE_FOR_mextr7, "__builtin_sh_media_MEXTR7", SH_BLTIN_V8QI3, 0 }, 11880 { shmedia_builtin_p, 11881 CODE_FOR_mmacfx_wl, "__builtin_sh_media_MMACFX_WL", SH_BLTIN_MAC_HISI, 0 }, 11882 { shmedia_builtin_p, 11883 CODE_FOR_mmacnfx_wl,"__builtin_sh_media_MMACNFX_WL", SH_BLTIN_MAC_HISI, 0 }, 11884 { shmedia_builtin_p, 11885 CODE_FOR_mulv2si3, "__builtin_mulv2si3", SH_BLTIN_V2SI3, 0 }, 11886 { shmedia_builtin_p, 11887 CODE_FOR_mulv4hi3, "__builtin_mulv4hi3", SH_BLTIN_V4HI3, 0 }, 11888 { shmedia_builtin_p, 11889 CODE_FOR_mmulfx_l, "__builtin_sh_media_MMULFX_L", SH_BLTIN_V2SI3, 0 }, 11890 { shmedia_builtin_p, 11891 CODE_FOR_mmulfx_w, "__builtin_sh_media_MMULFX_W", SH_BLTIN_V4HI3, 0 }, 11892 { shmedia_builtin_p, 11893 CODE_FOR_mmulfxrp_w,"__builtin_sh_media_MMULFXRP_W", SH_BLTIN_V4HI3, 0 }, 11894 { shmedia_builtin_p, 11895 CODE_FOR_mmulhi_wl, "__builtin_sh_media_MMULHI_WL", SH_BLTIN_V4HI2V2SI, 0 }, 11896 { shmedia_builtin_p, 11897 CODE_FOR_mmullo_wl, "__builtin_sh_media_MMULLO_WL", SH_BLTIN_V4HI2V2SI, 0 }, 11898 { shmedia_builtin_p, 11899 CODE_FOR_mmulsum_wq,"__builtin_sh_media_MMULSUM_WQ", SH_BLTIN_XXUU, 0 }, 11900 { shmedia_builtin_p, 11901 CODE_FOR_mperm_w, "__builtin_sh_media_MPERM_W", SH_BLTIN_SH_HI, 0 }, 11902 { shmedia_builtin_p, 11903 CODE_FOR_msad_ubq, "__builtin_sh_media_MSAD_UBQ", SH_BLTIN_XXUU, 0 }, 11904 { shmedia_builtin_p, 11905 CODE_FOR_mshalds_l, "__builtin_sh_media_MSHALDS_L", SH_BLTIN_SH_SI, 0 }, 11906 { shmedia_builtin_p, 11907 CODE_FOR_mshalds_w, "__builtin_sh_media_MSHALDS_W", SH_BLTIN_SH_HI, 0 }, 11908 { shmedia_builtin_p, 11909 CODE_FOR_ashrv2si3, "__builtin_ashrv2si3", SH_BLTIN_SH_SI, 0 }, 11910 { shmedia_builtin_p, 11911 CODE_FOR_ashrv4hi3, "__builtin_ashrv4hi3", SH_BLTIN_SH_HI, 0 }, 11912 { shmedia_builtin_p, 11913 CODE_FOR_mshards_q, "__builtin_sh_media_MSHARDS_Q", SH_BLTIN_SUS, 0 }, 11914 { shmedia_builtin_p, 11915 CODE_FOR_mshfhi_b, "__builtin_sh_media_MSHFHI_B", SH_BLTIN_V8QI3, 0 }, 11916 { shmedia_builtin_p, 11917 CODE_FOR_mshfhi_l, "__builtin_sh_media_MSHFHI_L", SH_BLTIN_V2SI3, 0 }, 11918 { shmedia_builtin_p, 11919 CODE_FOR_mshfhi_w, "__builtin_sh_media_MSHFHI_W", SH_BLTIN_V4HI3, 0 }, 11920 { shmedia_builtin_p, 11921 CODE_FOR_mshflo_b, "__builtin_sh_media_MSHFLO_B", SH_BLTIN_V8QI3, 0 }, 11922 { shmedia_builtin_p, 11923 CODE_FOR_mshflo_l, "__builtin_sh_media_MSHFLO_L", SH_BLTIN_V2SI3, 0 }, 11924 { shmedia_builtin_p, 11925 CODE_FOR_mshflo_w, "__builtin_sh_media_MSHFLO_W", SH_BLTIN_V4HI3, 0 }, 11926 { shmedia_builtin_p, 11927 CODE_FOR_ashlv2si3, "__builtin_ashlv2si3", SH_BLTIN_SH_SI, 0 }, 11928 { shmedia_builtin_p, 11929 CODE_FOR_ashlv4hi3, "__builtin_ashlv4hi3", SH_BLTIN_SH_HI, 0 }, 11930 { shmedia_builtin_p, 11931 CODE_FOR_lshrv2si3, "__builtin_lshrv2si3", SH_BLTIN_SH_SI, 0 }, 11932 { shmedia_builtin_p, 11933 CODE_FOR_lshrv4hi3, "__builtin_lshrv4hi3", SH_BLTIN_SH_HI, 0 }, 11934 { shmedia_builtin_p, 11935 CODE_FOR_subv2si3, "__builtin_subv2si3", SH_BLTIN_V2SI3, 0 }, 11936 { shmedia_builtin_p, 11937 CODE_FOR_subv4hi3, "__builtin_subv4hi3", SH_BLTIN_V4HI3, 0 }, 11938 { shmedia_builtin_p, 11939 CODE_FOR_sssubv2si3,"__builtin_sssubv2si3", SH_BLTIN_V2SI3, 0 }, 11940 { shmedia_builtin_p, 11941 CODE_FOR_ussubv8qi3,"__builtin_ussubv8qi3", SH_BLTIN_V8QI3, 0 }, 11942 { shmedia_builtin_p, 11943 CODE_FOR_sssubv4hi3,"__builtin_sssubv4hi3", SH_BLTIN_V4HI3, 0 }, 11944 { shmedia_builtin_p, 11945 CODE_FOR_fcosa_s, "__builtin_sh_media_FCOSA_S", SH_BLTIN_SISF, 0 }, 11946 { shmedia_builtin_p, 11947 CODE_FOR_fsina_s, "__builtin_sh_media_FSINA_S", SH_BLTIN_SISF, 0 }, 11948 { shmedia_builtin_p, 11949 CODE_FOR_fipr, "__builtin_sh_media_FIPR_S", SH_BLTIN_3, 0 }, 11950 { shmedia_builtin_p, 11951 CODE_FOR_ftrv, "__builtin_sh_media_FTRV_S", SH_BLTIN_3, 0 }, 11952 { shmedia_builtin_p, 11953 CODE_FOR_sqrtdf2, "__builtin_sh_media_FSQRT_D", SH_BLTIN_2, 0 }, 11954 { shmedia_builtin_p, 11955 CODE_FOR_sqrtsf2, "__builtin_sh_media_FSQRT_S", SH_BLTIN_2, 0 }, 11956 { shmedia_builtin_p, 11957 CODE_FOR_fsrra_s, "__builtin_sh_media_FSRRA_S", SH_BLTIN_2, 0 }, 11958 { shmedia_builtin_p, 11959 CODE_FOR_ldhi_l, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L, 0 }, 11960 { shmedia_builtin_p, 11961 CODE_FOR_ldhi_q, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q, 0 }, 11962 { shmedia_builtin_p, 11963 CODE_FOR_ldlo_l, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L, 0 }, 11964 { shmedia_builtin_p, 11965 CODE_FOR_ldlo_q, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q, 0 }, 11966 { shmedia_builtin_p, 11967 CODE_FOR_sthi_l, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L, 0 }, 11968 { shmedia_builtin_p, 11969 CODE_FOR_sthi_q, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q, 0 }, 11970 { shmedia_builtin_p, 11971 CODE_FOR_stlo_l, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L, 0 }, 11972 { shmedia_builtin_p, 11973 CODE_FOR_stlo_q, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q, 0 }, 11974 { shmedia_builtin_p, 11975 CODE_FOR_ldhi_l64, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L64, 0 }, 11976 { shmedia_builtin_p, 11977 CODE_FOR_ldhi_q64, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q64, 0 }, 11978 { shmedia_builtin_p, 11979 CODE_FOR_ldlo_l64, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L64, 0 }, 11980 { shmedia_builtin_p, 11981 CODE_FOR_ldlo_q64, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q64, 0 }, 11982 { shmedia_builtin_p, 11983 CODE_FOR_sthi_l64, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L64, 0 }, 11984 { shmedia_builtin_p, 11985 CODE_FOR_sthi_q64, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q64, 0 }, 11986 { shmedia_builtin_p, 11987 CODE_FOR_stlo_l64, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L64, 0 }, 11988 { shmedia_builtin_p, 11989 CODE_FOR_stlo_q64, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q64, 0 }, 11990 { shmedia_builtin_p, 11991 CODE_FOR_nsb, "__builtin_sh_media_NSB", SH_BLTIN_SU, 0 }, 11992 { shmedia_builtin_p, 11993 CODE_FOR_byterev, "__builtin_sh_media_BYTEREV", SH_BLTIN_2, 0 }, 11994 { shmedia_builtin_p, 11995 CODE_FOR_prefetch, "__builtin_sh_media_PREFO", SH_BLTIN_PSSV, 0 }, 11996 11997 { sh1_builtin_p, 11998 CODE_FOR_sts_fpscr, "__builtin_sh_get_fpscr", SH_BLTIN_UV, 0 }, 11999 { sh1_builtin_p, 12000 CODE_FOR_set_fpscr, "__builtin_sh_set_fpscr", SH_BLTIN_VU, 0 }, 12001}; 12002 12003static tree sh_builtin_get_fpscr; 12004static tree sh_builtin_set_fpscr; 12005 12006static void 12007sh_init_builtins (void) 12008{ 12009 tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES]; 12010 memset (shared, 0, sizeof shared); 12011 12012 for (unsigned int di = 0; di < ARRAY_SIZE (bdesc); ++di) 12013 { 12014 builtin_description* d = &bdesc[di]; 12015 12016 if (!d->is_enabled ()) 12017 continue; 12018 12019 tree type, arg_type = NULL_TREE; 12020 int signature = d->signature; 12021 12022 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature]) 12023 type = shared[signature]; 12024 else 12025 { 12026 int has_result = signature_args[signature][0] != 0; 12027 tree args[3]; 12028 12029 if ((signature_args[signature][1] & 8) 12030 && (((signature_args[signature][1] & 1) && TARGET_SHMEDIA32) 12031 || ((signature_args[signature][1] & 2) && TARGET_SHMEDIA64))) 12032 continue; 12033 if (! TARGET_FPU_ANY 12034 && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode)) 12035 continue; 12036 for (unsigned int i = 0; i < ARRAY_SIZE (args); i++) 12037 args[i] = NULL_TREE; 12038 for (int i = 3; ; i--) 12039 { 12040 int arg = signature_args[signature][i]; 12041 int opno = i - 1 + has_result; 12042 12043 if (arg & 8) 12044 arg_type = ptr_type_node; 12045 else if (arg) 12046 arg_type = (*lang_hooks.types.type_for_mode) 12047 (insn_data[d->icode].operand[opno].mode, (arg & 1)); 12048 else if (i) 12049 continue; 12050 else 12051 arg_type = void_type_node; 12052 if (i == 0) 12053 break; 12054 args[i-1] = arg_type; 12055 } 12056 type = build_function_type_list (arg_type, args[0], args[1], 12057 args[2], NULL_TREE); 12058 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES) 12059 shared[signature] = type; 12060 } 12061 d->fndecl = 12062 add_builtin_function (d->name, type, d - bdesc, BUILT_IN_MD, 12063 NULL, NULL_TREE); 12064 /* Recode {sts,set}_fpscr decls for sh_atomic_assign_expand_fenv. */ 12065 if (d->icode == CODE_FOR_sts_fpscr) 12066 sh_builtin_get_fpscr = d->fndecl; 12067 else if (d->icode == CODE_FOR_set_fpscr) 12068 sh_builtin_set_fpscr = d->fndecl; 12069 } 12070} 12071 12072/* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */ 12073 12074static void 12075sh_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update) 12076{ 12077 const unsigned SH_FE_INVALID = 64; 12078 const unsigned SH_FE_DIVBYZERO = 32; 12079 const unsigned SH_FE_OVERFLOW = 16; 12080 const unsigned SH_FE_UNDERFLOW = 8; 12081 const unsigned SH_FE_INEXACT = 4; 12082 const unsigned HOST_WIDE_INT SH_FE_ALL_EXCEPT = (SH_FE_INVALID 12083 | SH_FE_DIVBYZERO 12084 | SH_FE_OVERFLOW 12085 | SH_FE_UNDERFLOW 12086 | SH_FE_INEXACT); 12087 const unsigned HOST_WIDE_INT SH_FE_EXCEPT_SHIFT = 5; 12088 tree fenv_var, mask, ld_fenv, masked_fenv; 12089 tree new_fenv_var, reload_fenv, restore_fnenv; 12090 tree update_call, atomic_feraiseexcept, hold_fnclex; 12091 12092 if (! TARGET_FPU_ANY) 12093 return; 12094 12095 /* Generate the equivalent of : 12096 unsigned int fenv_var; 12097 fenv_var = __builtin_sh_get_fpscr (); 12098 12099 unsigned int masked_fenv; 12100 masked_fenv = fenv_var & mask; 12101 12102 __builtin_sh_set_fpscr (masked_fenv); */ 12103 12104 fenv_var = create_tmp_var (unsigned_type_node); 12105 mask = build_int_cst (unsigned_type_node, 12106 ~((SH_FE_ALL_EXCEPT << SH_FE_EXCEPT_SHIFT) 12107 | SH_FE_ALL_EXCEPT)); 12108 ld_fenv = build2 (MODIFY_EXPR, unsigned_type_node, 12109 fenv_var, build_call_expr (sh_builtin_get_fpscr, 0)); 12110 masked_fenv = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var, mask); 12111 hold_fnclex = build_call_expr (sh_builtin_set_fpscr, 1, masked_fenv); 12112 *hold = build2 (COMPOUND_EXPR, void_type_node, 12113 build2 (COMPOUND_EXPR, void_type_node, masked_fenv, ld_fenv), 12114 hold_fnclex); 12115 12116 /* Store the value of masked_fenv to clear the exceptions: 12117 __builtin_sh_set_fpscr (masked_fenv); */ 12118 12119 *clear = build_call_expr (sh_builtin_set_fpscr, 1, masked_fenv); 12120 12121 /* Generate the equivalent of : 12122 unsigned int new_fenv_var; 12123 new_fenv_var = __builtin_sh_get_fpscr (); 12124 12125 __builtin_sh_set_fpscr (fenv_var); 12126 12127 __atomic_feraiseexcept (new_fenv_var); */ 12128 12129 new_fenv_var = create_tmp_var (unsigned_type_node); 12130 reload_fenv = build2 (MODIFY_EXPR, unsigned_type_node, new_fenv_var, 12131 build_call_expr (sh_builtin_get_fpscr, 0)); 12132 restore_fnenv = build_call_expr (sh_builtin_set_fpscr, 1, fenv_var); 12133 atomic_feraiseexcept = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT); 12134 update_call = build_call_expr (atomic_feraiseexcept, 1, 12135 fold_convert (integer_type_node, 12136 new_fenv_var)); 12137 *update = build2 (COMPOUND_EXPR, void_type_node, 12138 build2 (COMPOUND_EXPR, void_type_node, 12139 reload_fenv, restore_fnenv), update_call); 12140} 12141 12142/* Implements target hook vector_mode_supported_p. */ 12143bool 12144sh_vector_mode_supported_p (machine_mode mode) 12145{ 12146 if (TARGET_FPU_ANY 12147 && ((mode == V2SFmode) 12148 || (mode == V4SFmode) 12149 || (mode == V16SFmode))) 12150 return true; 12151 12152 else if (TARGET_SHMEDIA 12153 && ((mode == V8QImode) 12154 || (mode == V2HImode) 12155 || (mode == V4HImode) 12156 || (mode == V2SImode))) 12157 return true; 12158 12159 return false; 12160} 12161 12162bool 12163sh_frame_pointer_required (void) 12164{ 12165/* If needed override this in other tm.h files to cope with various OS 12166 lossage requiring a frame pointer. */ 12167 if (SUBTARGET_FRAME_POINTER_REQUIRED) 12168 return true; 12169 12170 if (crtl->profile) 12171 return true; 12172 12173 return false; 12174} 12175 12176/* Implements target hook dwarf_calling_convention. Return an enum 12177 of dwarf_calling_convention. */ 12178int 12179sh_dwarf_calling_convention (const_tree func) 12180{ 12181 if (sh_attr_renesas_p (func)) 12182 return DW_CC_GNU_renesas_sh; 12183 12184 return DW_CC_normal; 12185} 12186 12187/* Returns the sh builtin decl for CODE. */ 12188static tree 12189sh_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED) 12190{ 12191 if (code >= ARRAY_SIZE (bdesc)) 12192 return error_mark_node; 12193 12194 if (!bdesc[code].is_enabled ()) 12195 return error_mark_node; 12196 12197 return bdesc[code].fndecl; 12198} 12199 12200/* Expand an expression EXP that calls a built-in function, 12201 with result going to TARGET if that's convenient 12202 (and in mode MODE if that's convenient). 12203 SUBTARGET may be used as the target for computing one of EXP's operands. 12204 IGNORE is nonzero if the value is to be ignored. */ 12205static rtx 12206sh_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, 12207 machine_mode mode ATTRIBUTE_UNUSED, int ignore) 12208{ 12209 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); 12210 unsigned int fcode = DECL_FUNCTION_CODE (fndecl); 12211 const struct builtin_description *d = &bdesc[fcode]; 12212 enum insn_code icode = d->icode; 12213 int signature = d->signature; 12214 int nop = 0; 12215 rtx op[4]; 12216 12217 if (signature_args[signature][0]) 12218 { 12219 if (ignore) 12220 return NULL_RTX; 12221 12222 machine_mode tmode = insn_data[icode].operand[0].mode; 12223 if (! target || GET_MODE (target) != tmode 12224 || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) 12225 target = gen_reg_rtx (tmode); 12226 op[nop++] = target; 12227 } 12228 else 12229 target = NULL_RTX; 12230 12231 for (int i = 1; i <= 3; i++, nop++) 12232 { 12233 tree arg; 12234 machine_mode opmode, argmode; 12235 tree optype; 12236 12237 if (! signature_args[signature][i]) 12238 break; 12239 arg = CALL_EXPR_ARG (exp, i - 1); 12240 if (arg == error_mark_node) 12241 return const0_rtx; 12242 if (signature_args[signature][i] & 8) 12243 { 12244 opmode = ptr_mode; 12245 optype = ptr_type_node; 12246 } 12247 else 12248 { 12249 opmode = insn_data[icode].operand[nop].mode; 12250 optype = (*lang_hooks.types.type_for_mode) (opmode, 0); 12251 } 12252 argmode = TYPE_MODE (TREE_TYPE (arg)); 12253 if (argmode != opmode) 12254 arg = build1 (NOP_EXPR, optype, arg); 12255 op[nop] = expand_expr (arg, NULL_RTX, opmode, EXPAND_NORMAL); 12256 if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode)) 12257 op[nop] = copy_to_mode_reg (opmode, op[nop]); 12258 } 12259 12260 rtx pat = NULL_RTX; 12261 12262 switch (nop) 12263 { 12264 case 1: 12265 pat = (*insn_data[d->icode].genfun) (op[0]); 12266 break; 12267 case 2: 12268 pat = (*insn_data[d->icode].genfun) (op[0], op[1]); 12269 break; 12270 case 3: 12271 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]); 12272 break; 12273 case 4: 12274 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]); 12275 break; 12276 default: 12277 gcc_unreachable (); 12278 } 12279 if (! pat) 12280 return NULL_RTX; 12281 emit_insn (pat); 12282 return target; 12283} 12284 12285void 12286sh_expand_unop_v2sf (enum rtx_code code, rtx op0, rtx op1) 12287{ 12288 rtx sel0 = const0_rtx; 12289 rtx sel1 = const1_rtx; 12290 rtx (*fn) (rtx, rtx, rtx, rtx, rtx) = gen_unary_sf_op; 12291 rtx op = gen_rtx_fmt_e (code, SFmode, op1); 12292 12293 emit_insn ((*fn) (op0, op1, op, sel0, sel0)); 12294 emit_insn ((*fn) (op0, op1, op, sel1, sel1)); 12295} 12296 12297void 12298sh_expand_binop_v2sf (enum rtx_code code, rtx op0, rtx op1, rtx op2) 12299{ 12300 rtx op = gen_rtx_fmt_ee (code, SFmode, op1, op2); 12301 12302 emit_insn (gen_binary_sf_op0 (op0, op1, op2, op)); 12303 emit_insn (gen_binary_sf_op1 (op0, op1, op2, op)); 12304} 12305 12306/* Return true if hard register REGNO can hold a value of machine-mode MODE. 12307 We can allow any mode in any general register. The special registers 12308 only allow SImode. Don't allow any mode in the PR. 12309 12310 We cannot hold DCmode values in the XD registers because alter_reg 12311 handles subregs of them incorrectly. We could work around this by 12312 spacing the XD registers like the DR registers, but this would require 12313 additional memory in every compilation to hold larger register vectors. 12314 We could hold SFmode / SCmode values in XD registers, but that 12315 would require a tertiary reload when reloading from / to memory, 12316 and a secondary reload to reload from / to general regs; that 12317 seems to be a losing proposition. 12318 12319 We want to allow TImode FP regs so that when V4SFmode is loaded as TImode, 12320 it won't be ferried through GP registers first. */ 12321bool 12322sh_hard_regno_mode_ok (unsigned int regno, machine_mode mode) 12323{ 12324 if (SPECIAL_REGISTER_P (regno)) 12325 return mode == SImode; 12326 12327 if (regno == FPUL_REG) 12328 return (mode == SImode || mode == SFmode); 12329 12330 if (FP_REGISTER_P (regno) && mode == SFmode) 12331 return true; 12332 12333 if (mode == V2SFmode) 12334 { 12335 if (((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 2 == 0) 12336 || GENERAL_REGISTER_P (regno))) 12337 return true; 12338 else 12339 return false; 12340 } 12341 12342 if (mode == V4SFmode) 12343 { 12344 if ((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 4 == 0) 12345 || GENERAL_REGISTER_P (regno)) 12346 return true; 12347 else 12348 return false; 12349 } 12350 12351 if (mode == V16SFmode) 12352 { 12353 if (TARGET_SHMEDIA) 12354 { 12355 if (FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 16 == 0) 12356 return true; 12357 else 12358 return false; 12359 } 12360 else 12361 return regno == FIRST_XD_REG; 12362 } 12363 12364 if (FP_REGISTER_P (regno)) 12365 { 12366 if (mode == SFmode 12367 || mode == SImode 12368 || ((TARGET_SH2E || TARGET_SHMEDIA) && mode == SCmode) 12369 || ((((TARGET_SH4 || TARGET_SH2A_DOUBLE) && mode == DFmode) 12370 || mode == DCmode 12371 || (TARGET_SHMEDIA 12372 && (mode == DFmode || mode == DImode 12373 || mode == V2SFmode || mode == TImode))) 12374 && ((regno - FIRST_FP_REG) & 1) == 0) 12375 || ((TARGET_SH4 || TARGET_SHMEDIA) && mode == TImode 12376 && ((regno - FIRST_FP_REG) & 3) == 0)) 12377 return true; 12378 else 12379 return false; 12380 } 12381 12382 if (XD_REGISTER_P (regno)) 12383 return mode == DFmode; 12384 12385 if (TARGET_REGISTER_P (regno)) 12386 return (mode == DImode || mode == SImode || mode == PDImode); 12387 12388 if (regno == PR_REG) 12389 return mode == SImode; 12390 12391 if (regno == FPSCR_REG) 12392 return mode == SImode; 12393 12394 /* FIXME. This works around PR target/37633 for -O0. */ 12395 if (!optimize && TARGET_SHMEDIA32 && GET_MODE_SIZE (mode) > 4) 12396 { 12397 unsigned int n = GET_MODE_SIZE (mode) / 8; 12398 12399 if (regno >= FIRST_GENERAL_REG + 10 - n + 1 12400 && regno <= FIRST_GENERAL_REG + 14) 12401 return false; 12402 } 12403 12404 return true; 12405} 12406 12407/* Specify the modes required to caller save a given hard regno. 12408 choose_hard_reg_mode chooses mode based on HARD_REGNO_MODE_OK 12409 and returns ?Imode for float regs when sh_hard_regno_mode_ok 12410 permits integer modes on them. That makes LRA's split process 12411 unhappy. See PR55212. 12412 */ 12413machine_mode 12414sh_hard_regno_caller_save_mode (unsigned int regno, unsigned int nregs, 12415 machine_mode mode) 12416{ 12417 if (FP_REGISTER_P (regno) 12418 && (mode == SFmode 12419 || mode == SCmode 12420 || ((mode == DFmode || mode == DCmode) 12421 && ((regno - FIRST_FP_REG) & 1) == 0))) 12422 return mode; 12423 12424 return choose_hard_reg_mode (regno, nregs, false); 12425} 12426 12427/* Return the class of registers for which a mode change from FROM to TO 12428 is invalid. */ 12429bool 12430sh_cannot_change_mode_class (machine_mode from, machine_mode to, 12431 enum reg_class rclass) 12432{ 12433 /* We want to enable the use of SUBREGs as a means to 12434 VEC_SELECT a single element of a vector. */ 12435 12436 /* This effectively disallows using GENERAL_REGS for SFmode vector subregs. 12437 This can be problematic when SFmode vector subregs need to be accessed 12438 on the stack with displacement addressing, as it happens with -O0. 12439 Thus we disallow the mode change for -O0. */ 12440 if (to == SFmode && VECTOR_MODE_P (from) && GET_MODE_INNER (from) == SFmode) 12441 return optimize ? (reg_classes_intersect_p (GENERAL_REGS, rclass)) : false; 12442 12443 if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to)) 12444 { 12445 if (TARGET_LITTLE_ENDIAN) 12446 { 12447 if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8) 12448 return reg_classes_intersect_p (DF_REGS, rclass); 12449 } 12450 else 12451 { 12452 if (GET_MODE_SIZE (from) < 8) 12453 return reg_classes_intersect_p (DF_REGS, rclass); 12454 } 12455 } 12456 return false; 12457} 12458 12459/* Return true if registers in machine mode MODE will likely be 12460 allocated to registers in small register classes. */ 12461bool 12462sh_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED) 12463{ 12464 return (! TARGET_SHMEDIA); 12465} 12466 12467/* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times 12468 that label is used. */ 12469void 12470sh_mark_label (rtx address, int nuses) 12471{ 12472 if (GOTOFF_P (address)) 12473 { 12474 /* Extract the label or symbol. */ 12475 address = XEXP (address, 0); 12476 if (GET_CODE (address) == PLUS) 12477 address = XEXP (address, 0); 12478 address = XVECEXP (address, 0, 0); 12479 } 12480 if (GET_CODE (address) == LABEL_REF 12481 && LABEL_P (XEXP (address, 0))) 12482 LABEL_NUSES (XEXP (address, 0)) += nuses; 12483} 12484 12485/* Compute extra cost of moving data between one register class 12486 and another. 12487 12488 If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass 12489 uses this information. Hence, the general register <-> floating point 12490 register information here is not used for SFmode. */ 12491static int 12492sh_register_move_cost (machine_mode mode, 12493 reg_class_t srcclass, reg_class_t dstclass) 12494{ 12495 if (dstclass == T_REGS || dstclass == PR_REGS) 12496 return 10; 12497 12498 if (dstclass == MAC_REGS && srcclass == MAC_REGS) 12499 return 4; 12500 12501 if (mode == SImode && ! TARGET_SHMEDIA && TARGET_FMOVD 12502 && REGCLASS_HAS_FP_REG (srcclass) 12503 && REGCLASS_HAS_FP_REG (dstclass)) 12504 return 4; 12505 12506 if (REGCLASS_HAS_FP_REG (dstclass) && srcclass == T_REGS) 12507 return ((TARGET_HARD_SH4 && !optimize_size) ? 10 : 7); 12508 12509 if ((REGCLASS_HAS_FP_REG (dstclass) && srcclass == MAC_REGS) 12510 || (dstclass == MAC_REGS && REGCLASS_HAS_FP_REG (srcclass))) 12511 return 9; 12512 12513 if ((REGCLASS_HAS_FP_REG (dstclass) 12514 && REGCLASS_HAS_GENERAL_REG (srcclass)) 12515 || (REGCLASS_HAS_GENERAL_REG (dstclass) 12516 && REGCLASS_HAS_FP_REG (srcclass))) 12517 { 12518 /* Discourage trying to use fp regs for a pointer. This also 12519 discourages fp regs with SImode because Pmode is an alias 12520 of SImode on this target. See PR target/48596. */ 12521 int addend = (mode == Pmode) ? 40 : 0; 12522 12523 return (((TARGET_SHMEDIA ? 4 : TARGET_FMOVD ? 8 : 12) + addend) 12524 * ((GET_MODE_SIZE (mode) + 7) / 8U)); 12525 } 12526 12527 if ((dstclass == FPUL_REGS 12528 && REGCLASS_HAS_GENERAL_REG (srcclass)) 12529 || (srcclass == FPUL_REGS 12530 && REGCLASS_HAS_GENERAL_REG (dstclass))) 12531 return 5; 12532 12533 if ((dstclass == FPUL_REGS 12534 && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS)) 12535 || (srcclass == FPUL_REGS 12536 && (dstclass == PR_REGS || dstclass == MAC_REGS))) 12537 return 7; 12538 12539 if ((srcclass == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass)) 12540 || ((dstclass) == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass))) 12541 return 20; 12542 12543 /* ??? ptabs faults on (value & 0x3) == 0x3 */ 12544 if (TARGET_SHMEDIA 12545 && ((srcclass) == TARGET_REGS || (srcclass) == SIBCALL_REGS)) 12546 { 12547 if (sh_gettrcost >= 0) 12548 return sh_gettrcost; 12549 else if (!TARGET_PT_FIXED) 12550 return 100; 12551 } 12552 12553 if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass)) 12554 || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass))) 12555 return 4; 12556 12557 if (TARGET_SHMEDIA 12558 || (TARGET_FMOVD 12559 && ! REGCLASS_HAS_GENERAL_REG (srcclass) 12560 && ! REGCLASS_HAS_GENERAL_REG (dstclass))) 12561 return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U); 12562 12563 return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U); 12564} 12565 12566static rtx 12567emit_load_ptr (rtx reg, rtx addr) 12568{ 12569 rtx mem = gen_const_mem (ptr_mode, addr); 12570 12571 if (Pmode != ptr_mode) 12572 mem = gen_rtx_SIGN_EXTEND (Pmode, mem); 12573 return emit_move_insn (reg, mem); 12574} 12575 12576static void 12577sh_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED, 12578 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset, 12579 tree function) 12580{ 12581 CUMULATIVE_ARGS cum; 12582 int structure_value_byref = 0; 12583 rtx this_rtx, this_value, sibcall, funexp; 12584 rtx_insn *insns; 12585 tree funtype = TREE_TYPE (function); 12586 int simple_add = CONST_OK_FOR_ADD (delta); 12587 int did_load = 0; 12588 rtx scratch0, scratch1, scratch2; 12589 unsigned i; 12590 12591 reload_completed = 1; 12592 epilogue_completed = 1; 12593 crtl->uses_only_leaf_regs = 1; 12594 12595 emit_note (NOTE_INSN_PROLOGUE_END); 12596 12597 /* Find the "this" pointer. We have such a wide range of ABIs for the 12598 SH that it's best to do this completely machine independently. 12599 "this" is passed as first argument, unless a structure return pointer 12600 comes first, in which case "this" comes second. */ 12601 INIT_CUMULATIVE_ARGS (cum, funtype, NULL_RTX, 0, 1); 12602#ifndef PCC_STATIC_STRUCT_RETURN 12603 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)) 12604 structure_value_byref = 1; 12605#endif /* not PCC_STATIC_STRUCT_RETURN */ 12606 if (structure_value_byref && sh_struct_value_rtx (function, 0) == 0) 12607 { 12608 tree ptype = build_pointer_type (TREE_TYPE (funtype)); 12609 12610 sh_function_arg_advance (pack_cumulative_args (&cum), Pmode, ptype, true); 12611 } 12612 this_rtx 12613 = sh_function_arg (pack_cumulative_args (&cum), Pmode, ptr_type_node, true); 12614 12615 /* For SHcompact, we only have r0 for a scratch register: r1 is the 12616 static chain pointer (even if you can't have nested virtual functions 12617 right now, someone might implement them sometime), and the rest of the 12618 registers are used for argument passing, are callee-saved, or reserved. */ 12619 /* We need to check call_used_regs / fixed_regs in case -fcall_saved-reg / 12620 -ffixed-reg has been used. */ 12621 if (! call_used_regs[0] || fixed_regs[0]) 12622 error ("r0 needs to be available as a call-clobbered register"); 12623 scratch0 = scratch1 = scratch2 = gen_rtx_REG (Pmode, 0); 12624 if (! TARGET_SH5) 12625 { 12626 if (call_used_regs[1] && ! fixed_regs[1]) 12627 scratch1 = gen_rtx_REG (ptr_mode, 1); 12628 /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer 12629 pointing where to return struct values. */ 12630 if (call_used_regs[3] && ! fixed_regs[3]) 12631 scratch2 = gen_rtx_REG (Pmode, 3); 12632 } 12633 else if (TARGET_SHMEDIA) 12634 { 12635 for (i = FIRST_GENERAL_REG; i <= LAST_GENERAL_REG; i++) 12636 if (i != REGNO (scratch0) && 12637 call_used_regs[i] && ! fixed_regs[i] && ! FUNCTION_ARG_REGNO_P (i)) 12638 { 12639 scratch1 = gen_rtx_REG (ptr_mode, i); 12640 break; 12641 } 12642 if (scratch1 == scratch0) 12643 error ("need a second call-clobbered general purpose register"); 12644 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++) 12645 if (call_used_regs[i] && ! fixed_regs[i]) 12646 { 12647 scratch2 = gen_rtx_REG (Pmode, i); 12648 break; 12649 } 12650 if (scratch2 == scratch0) 12651 error ("need a call-clobbered target register"); 12652 } 12653 12654 this_value = plus_constant (Pmode, this_rtx, delta); 12655 if (vcall_offset 12656 && (simple_add || scratch0 != scratch1) 12657 && strict_memory_address_p (ptr_mode, this_value)) 12658 { 12659 emit_load_ptr (scratch0, this_value); 12660 did_load = 1; 12661 } 12662 12663 if (!delta) 12664 ; /* Do nothing. */ 12665 else if (simple_add) 12666 emit_move_insn (this_rtx, this_value); 12667 else 12668 { 12669 emit_move_insn (scratch1, GEN_INT (delta)); 12670 emit_insn (gen_add2_insn (this_rtx, scratch1)); 12671 } 12672 12673 if (vcall_offset) 12674 { 12675 rtx offset_addr; 12676 12677 if (!did_load) 12678 emit_load_ptr (scratch0, this_rtx); 12679 12680 offset_addr = plus_constant (Pmode, scratch0, vcall_offset); 12681 if (strict_memory_address_p (ptr_mode, offset_addr)) 12682 ; /* Do nothing. */ 12683 else if (! TARGET_SH5 && scratch0 != scratch1) 12684 { 12685 /* scratch0 != scratch1, and we have indexed loads. Get better 12686 schedule by loading the offset into r1 and using an indexed 12687 load - then the load of r1 can issue before the load from 12688 (this_rtx + delta) finishes. */ 12689 emit_move_insn (scratch1, GEN_INT (vcall_offset)); 12690 offset_addr = gen_rtx_PLUS (Pmode, scratch0, scratch1); 12691 } 12692 else if (CONST_OK_FOR_ADD (vcall_offset)) 12693 { 12694 emit_insn (gen_add2_insn (scratch0, GEN_INT (vcall_offset))); 12695 offset_addr = scratch0; 12696 } 12697 else if (scratch0 != scratch1) 12698 { 12699 emit_move_insn (scratch1, GEN_INT (vcall_offset)); 12700 emit_insn (gen_add2_insn (scratch0, scratch1)); 12701 offset_addr = scratch0; 12702 } 12703 else 12704 gcc_unreachable (); /* FIXME */ 12705 emit_load_ptr (scratch0, offset_addr); 12706 12707 if (Pmode != ptr_mode) 12708 scratch0 = gen_rtx_TRUNCATE (ptr_mode, scratch0); 12709 emit_insn (gen_add2_insn (this_rtx, scratch0)); 12710 } 12711 12712 /* Generate a tail call to the target function. */ 12713 if (! TREE_USED (function)) 12714 { 12715 assemble_external (function); 12716 TREE_USED (function) = 1; 12717 } 12718 funexp = XEXP (DECL_RTL (function), 0); 12719 /* If the function is overridden, so is the thunk, hence we don't 12720 need GOT addressing even if this is a public symbol. */ 12721#if 0 12722 if (TARGET_SH1 && ! flag_weak) 12723 sibcall = gen_sibcalli_thunk (funexp, const0_rtx); 12724 else 12725#endif 12726 if (TARGET_SH2 && flag_pic) 12727 { 12728 sibcall = gen_sibcall_pcrel (funexp, const0_rtx); 12729 XEXP (XVECEXP (sibcall, 0, 2), 0) = scratch2; 12730 } 12731 else 12732 { 12733 if (TARGET_SHMEDIA && flag_pic) 12734 { 12735 funexp = gen_sym2PIC (funexp); 12736 PUT_MODE (funexp, Pmode); 12737 } 12738 emit_move_insn (scratch2, funexp); 12739 funexp = gen_rtx_MEM (FUNCTION_MODE, scratch2); 12740 sibcall = gen_sibcall (funexp, const0_rtx, NULL_RTX); 12741 } 12742 sibcall = emit_call_insn (sibcall); 12743 SIBLING_CALL_P (sibcall) = 1; 12744 use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall), this_rtx); 12745 emit_barrier (); 12746 12747 /* Run just enough of rest_of_compilation to do scheduling and get 12748 the insns emitted. Note that use_thunk calls 12749 assemble_start_function and assemble_end_function. */ 12750 12751 insns = get_insns (); 12752 12753 if (optimize > 0) 12754 { 12755 if (! cfun->cfg) 12756 init_flow (cfun); 12757 split_all_insns_noflow (); 12758 } 12759 12760 sh_reorg (); 12761 shorten_branches (insns); 12762 final_start_function (insns, file, 1); 12763 final (insns, file, 1); 12764 final_end_function (); 12765 12766 reload_completed = 0; 12767 epilogue_completed = 0; 12768} 12769 12770rtx 12771function_symbol (rtx target, const char *name, enum sh_function_kind kind) 12772{ 12773 rtx sym; 12774 12775 /* If this is not an ordinary function, the name usually comes from a 12776 string literal or an sprintf buffer. Make sure we use the same 12777 string consistently, so that cse will be able to unify address loads. */ 12778 if (kind != FUNCTION_ORDINARY) 12779 name = IDENTIFIER_POINTER (get_identifier (name)); 12780 sym = gen_rtx_SYMBOL_REF (Pmode, name); 12781 SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION; 12782 if (flag_pic) 12783 switch (kind) 12784 { 12785 case FUNCTION_ORDINARY: 12786 break; 12787 case SFUNC_GOT: 12788 { 12789 rtx reg = target ? target : gen_reg_rtx (Pmode); 12790 12791 emit_insn (gen_symGOT2reg (reg, sym)); 12792 sym = reg; 12793 break; 12794 } 12795 case SFUNC_STATIC: 12796 { 12797 /* ??? To allow cse to work, we use GOTOFF relocations. 12798 We could add combiner patterns to transform this into 12799 straight pc-relative calls with sym2PIC / bsrf when 12800 label load and function call are still 1:1 and in the 12801 same basic block during combine. */ 12802 rtx reg = target ? target : gen_reg_rtx (Pmode); 12803 12804 emit_insn (gen_symGOTOFF2reg (reg, sym)); 12805 sym = reg; 12806 break; 12807 } 12808 } 12809 if (target && sym != target) 12810 { 12811 emit_move_insn (target, sym); 12812 return target; 12813 } 12814 return sym; 12815} 12816 12817/* Find the number of a general purpose register in S. */ 12818static int 12819scavenge_reg (HARD_REG_SET *s) 12820{ 12821 int r; 12822 for (r = FIRST_GENERAL_REG; r <= LAST_GENERAL_REG; r++) 12823 if (TEST_HARD_REG_BIT (*s, r)) 12824 return r; 12825 return -1; 12826} 12827 12828rtx 12829sh_get_pr_initial_val (void) 12830{ 12831 rtx val; 12832 12833 /* ??? Unfortunately, get_hard_reg_initial_val doesn't always work for the 12834 PR register on SHcompact, because it might be clobbered by the prologue. 12835 We check first if that is known to be the case. */ 12836 if (TARGET_SHCOMPACT 12837 && ((crtl->args.info.call_cookie 12838 & ~ CALL_COOKIE_RET_TRAMP (1)) 12839 || crtl->saves_all_registers)) 12840 return gen_frame_mem (SImode, return_address_pointer_rtx); 12841 12842 /* If we haven't finished rtl generation, there might be a nonlocal label 12843 that we haven't seen yet. 12844 ??? get_hard_reg_initial_val fails if it is called after register 12845 allocation has started, unless it has been called before for the 12846 same register. And even then, we end in trouble if we didn't use 12847 the register in the same basic block before. So call 12848 get_hard_reg_initial_val now and wrap it in an unspec if we might 12849 need to replace it. */ 12850 /* ??? We also must do this for TARGET_SH1 in general, because otherwise 12851 combine can put the pseudo returned by get_hard_reg_initial_val into 12852 instructions that need a general purpose registers, which will fail to 12853 be recognized when the pseudo becomes allocated to PR. */ 12854 val 12855 = get_hard_reg_initial_val (Pmode, TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG); 12856 if (TARGET_SH1) 12857 return gen_rtx_UNSPEC (SImode, gen_rtvec (1, val), UNSPEC_RA); 12858 return val; 12859} 12860 12861bool 12862sh_expand_t_scc (rtx operands[]) 12863{ 12864 enum rtx_code code = GET_CODE (operands[1]); 12865 rtx target = operands[0]; 12866 rtx op0 = operands[2]; 12867 rtx op1 = operands[3]; 12868 rtx result = target; 12869 HOST_WIDE_INT val; 12870 12871 if (!REG_P (op0) || REGNO (op0) != T_REG 12872 || !CONST_INT_P (op1)) 12873 return false; 12874 if (!REG_P (result)) 12875 result = gen_reg_rtx (SImode); 12876 val = INTVAL (op1); 12877 if ((code == EQ && val == 1) || (code == NE && val == 0)) 12878 emit_insn (gen_movt (result, get_t_reg_rtx ())); 12879 else if ((code == EQ && val == 0) || (code == NE && val == 1)) 12880 emit_insn (gen_movnegt (result, get_t_reg_rtx ())); 12881 else if (code == EQ || code == NE) 12882 emit_insn (gen_move_insn (result, GEN_INT (code == NE))); 12883 else 12884 return false; 12885 if (result != target) 12886 emit_move_insn (target, result); 12887 return true; 12888} 12889 12890/* INSN is an sfunc; return the rtx that describes the address used. */ 12891static rtx 12892extract_sfunc_addr (rtx insn) 12893{ 12894 rtx pattern, part = NULL_RTX; 12895 int len, i; 12896 12897 pattern = PATTERN (insn); 12898 len = XVECLEN (pattern, 0); 12899 for (i = 0; i < len; i++) 12900 { 12901 part = XVECEXP (pattern, 0, i); 12902 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == Pmode 12903 && GENERAL_REGISTER_P (true_regnum (XEXP (part, 0)))) 12904 return XEXP (part, 0); 12905 } 12906 gcc_assert (GET_CODE (XVECEXP (pattern, 0, 0)) == UNSPEC_VOLATILE); 12907 return XVECEXP (XVECEXP (pattern, 0, 0), 0, 1); 12908} 12909 12910/* Verify that the register in use_sfunc_addr still agrees with the address 12911 used in the sfunc. This prevents fill_slots_from_thread from changing 12912 use_sfunc_addr. 12913 INSN is the use_sfunc_addr instruction, and REG is the register it 12914 guards. */ 12915bool 12916check_use_sfunc_addr (rtx_insn *insn, rtx reg) 12917{ 12918 /* Search for the sfunc. It should really come right after INSN. */ 12919 while ((insn = NEXT_INSN (insn))) 12920 { 12921 if (LABEL_P (insn) || JUMP_P (insn)) 12922 break; 12923 if (! INSN_P (insn)) 12924 continue; 12925 12926 if (rtx_sequence *seq = dyn_cast<rtx_sequence *> (PATTERN (insn))) 12927 insn = seq->insn (0); 12928 if (GET_CODE (PATTERN (insn)) != PARALLEL 12929 || get_attr_type (insn) != TYPE_SFUNC) 12930 continue; 12931 return rtx_equal_p (extract_sfunc_addr (insn), reg); 12932 } 12933 gcc_unreachable (); 12934} 12935 12936/* This function returns a constant rtx that represents 2**15 / pi in 12937 SFmode. It's used to scale a fixed-point signed 16.16-bit fraction 12938 of a full circle back to an SFmode value, i.e. 0x10000 maps to 2*pi. */ 12939static GTY(()) rtx sh_fsca_sf2int_rtx; 12940 12941rtx 12942sh_fsca_sf2int (void) 12943{ 12944 if (! sh_fsca_sf2int_rtx) 12945 { 12946 REAL_VALUE_TYPE rv; 12947 12948 real_from_string (&rv, "10430.378350470453"); 12949 sh_fsca_sf2int_rtx = const_double_from_real_value (rv, SFmode); 12950 } 12951 12952 return sh_fsca_sf2int_rtx; 12953} 12954 12955/* This function returns a constant rtx that represents pi / 2**15 in 12956 SFmode. It's used to scale SFmode angles, in radians, to a 12957 fixed-point signed 16.16-bit fraction of a full circle, i.e. 2*pi 12958 maps to 0x10000. */ 12959static GTY(()) rtx sh_fsca_int2sf_rtx; 12960 12961rtx 12962sh_fsca_int2sf (void) 12963{ 12964 if (! sh_fsca_int2sf_rtx) 12965 { 12966 REAL_VALUE_TYPE rv; 12967 12968 real_from_string (&rv, "9.587379924285257e-5"); 12969 sh_fsca_int2sf_rtx = const_double_from_real_value (rv, SFmode); 12970 } 12971 12972 return sh_fsca_int2sf_rtx; 12973} 12974 12975/* Initialize the CUMULATIVE_ARGS structure. */ 12976void 12977sh_init_cumulative_args (CUMULATIVE_ARGS * pcum, 12978 tree fntype, 12979 rtx libname ATTRIBUTE_UNUSED, 12980 tree fndecl, 12981 signed int n_named_args, 12982 machine_mode mode) 12983{ 12984 pcum->arg_count [(int) SH_ARG_FLOAT] = 0; 12985 pcum->free_single_fp_reg = 0; 12986 pcum->stack_regs = 0; 12987 pcum->byref_regs = 0; 12988 pcum->byref = 0; 12989 pcum->outgoing = (n_named_args == -1) ? 0 : 1; 12990 12991 /* XXX - Should we check TARGET_HITACHI here ??? */ 12992 pcum->renesas_abi = sh_attr_renesas_p (fntype) ? 1 : 0; 12993 12994 if (fntype) 12995 { 12996 pcum->force_mem = ((TARGET_HITACHI || pcum->renesas_abi) 12997 && aggregate_value_p (TREE_TYPE (fntype), fndecl)); 12998 pcum->prototype_p = prototype_p (fntype); 12999 pcum->arg_count [(int) SH_ARG_INT] 13000 = TARGET_SH5 && aggregate_value_p (TREE_TYPE (fntype), fndecl); 13001 13002 pcum->call_cookie 13003 = CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT 13004 && pcum->arg_count [(int) SH_ARG_INT] == 0 13005 && (TYPE_MODE (TREE_TYPE (fntype)) == BLKmode 13006 ? int_size_in_bytes (TREE_TYPE (fntype)) 13007 : GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (fntype)))) > 4 13008 && (BASE_RETURN_VALUE_REG (TYPE_MODE (TREE_TYPE (fntype))) 13009 == FIRST_RET_REG)); 13010 } 13011 else 13012 { 13013 pcum->arg_count [(int) SH_ARG_INT] = 0; 13014 pcum->prototype_p = FALSE; 13015 if (mode != VOIDmode) 13016 { 13017 pcum->call_cookie = 13018 CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT 13019 && GET_MODE_SIZE (mode) > 4 13020 && BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG); 13021 13022 /* If the default ABI is the Renesas ABI then all library 13023 calls must assume that the library will be using the 13024 Renesas ABI. So if the function would return its result 13025 in memory then we must force the address of this memory 13026 block onto the stack. Ideally we would like to call 13027 targetm.calls.return_in_memory() here but we do not have 13028 the TYPE or the FNDECL available so we synthesize the 13029 contents of that function as best we can. */ 13030 pcum->force_mem = 13031 (TARGET_DEFAULT & MASK_HITACHI) 13032 && (mode == BLKmode 13033 || (GET_MODE_SIZE (mode) > 4 13034 && !(mode == DFmode 13035 && TARGET_FPU_DOUBLE))); 13036 } 13037 else 13038 { 13039 pcum->call_cookie = 0; 13040 pcum->force_mem = FALSE; 13041 } 13042 } 13043} 13044 13045rtx 13046sh_gen_truncate (machine_mode mode, rtx x, int need_sign_ext) 13047{ 13048 enum rtx_code code = TRUNCATE; 13049 13050 if (GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND) 13051 { 13052 rtx inner = XEXP (x, 0); 13053 machine_mode inner_mode = GET_MODE (inner); 13054 13055 if (inner_mode == mode) 13056 return inner; 13057 else if (GET_MODE_SIZE (inner_mode) >= GET_MODE_SIZE (mode)) 13058 x = inner; 13059 else if (GET_MODE_SIZE (inner_mode) < GET_MODE_SIZE (mode) 13060 && (! need_sign_ext || GET_CODE (x) == SIGN_EXTEND)) 13061 { 13062 code = GET_CODE (x); 13063 x = inner; 13064 } 13065 } 13066 return gen_rtx_fmt_e (code, mode, x); 13067} 13068 13069/* Look through X cleaning up truncates of registers that span multiple 13070 actual hard registers. Return the number of changes made. */ 13071int 13072shmedia_cleanup_truncate (rtx x) 13073{ 13074 int n_changes = 0; 13075 subrtx_var_iterator::array_type array; 13076 FOR_EACH_SUBRTX_VAR (iter, array, x, NONCONST) 13077 { 13078 rtx x = *iter; 13079 if (GET_CODE (x) == TRUNCATE) 13080 { 13081 rtx reg = XEXP (x, 0); 13082 machine_mode reg_mode = GET_MODE (reg); 13083 if (REG_P (reg) && GET_MODE_SIZE (reg_mode) > 8) 13084 { 13085 int offset = subreg_lowpart_offset (DImode, reg_mode); 13086 XEXP (x, 0) = simplify_subreg (DImode, reg, reg_mode, offset); 13087 n_changes += 1; 13088 iter.skip_subrtxes (); 13089 } 13090 } 13091 } 13092 return n_changes; 13093} 13094 13095/* Load and store depend on the highpart of the address. However, 13096 set_attr_alternative does not give well-defined results before reload, 13097 so we must look at the rtl ourselves to see if any of the feeding 13098 registers is used in a memref. 13099 13100 Return true iff INSN contains a MEM. */ 13101bool 13102sh_contains_memref_p (rtx insn) 13103{ 13104 subrtx_iterator::array_type array; 13105 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST) 13106 if (MEM_P (*iter)) 13107 return true; 13108 return false; 13109} 13110 13111/* Return true iff INSN loads a banked register. */ 13112bool 13113sh_loads_bankedreg_p (rtx insn) 13114{ 13115 if (GET_CODE (PATTERN (insn)) == SET) 13116 { 13117 rtx op = SET_DEST (PATTERN(insn)); 13118 if (REG_P (op) && BANKED_REGISTER_P (REGNO (op))) 13119 return true; 13120 } 13121 13122 return false; 13123} 13124 13125/* FNADDR is the MEM expression from a call expander. Return an address 13126 to use in an SHmedia insn pattern. */ 13127rtx 13128shmedia_prepare_call_address (rtx fnaddr, int is_sibcall) 13129{ 13130 int is_sym; 13131 13132 fnaddr = XEXP (fnaddr, 0); 13133 is_sym = GET_CODE (fnaddr) == SYMBOL_REF; 13134 if (flag_pic && is_sym) 13135 { 13136 if (! SYMBOL_REF_LOCAL_P (fnaddr)) 13137 { 13138 rtx reg = gen_reg_rtx (Pmode); 13139 13140 /* We must not use GOTPLT for sibcalls, because PIC_REG 13141 must be restored before the PLT code gets to run. */ 13142 if (is_sibcall) 13143 emit_insn (gen_symGOT2reg (reg, fnaddr)); 13144 else 13145 emit_insn (gen_symGOTPLT2reg (reg, fnaddr)); 13146 fnaddr = reg; 13147 } 13148 else 13149 { 13150 fnaddr = gen_sym2PIC (fnaddr); 13151 PUT_MODE (fnaddr, Pmode); 13152 } 13153 } 13154 /* If ptabs might trap, make this visible to the rest of the compiler. 13155 We generally assume that symbols pertain to valid locations, but 13156 it is possible to generate invalid symbols with asm or linker tricks. 13157 In a list of functions where each returns its successor, an invalid 13158 symbol might denote an empty list. */ 13159 if (!TARGET_PT_FIXED 13160 && (!is_sym || TARGET_INVALID_SYMBOLS) 13161 && (!REG_P (fnaddr) || ! TARGET_REGISTER_P (REGNO (fnaddr)))) 13162 { 13163 rtx tr = gen_reg_rtx (PDImode); 13164 13165 emit_insn (gen_ptabs (tr, fnaddr)); 13166 fnaddr = tr; 13167 } 13168 else if (! target_reg_operand (fnaddr, Pmode)) 13169 fnaddr = copy_to_mode_reg (Pmode, fnaddr); 13170 return fnaddr; 13171} 13172 13173/* Implement TARGET_PREFERRED_RELOAD_CLASS. */ 13174static reg_class_t 13175sh_preferred_reload_class (rtx x, reg_class_t rclass) 13176{ 13177 if (rclass == NO_REGS 13178 && TARGET_SHMEDIA 13179 && (CONST_DOUBLE_P (x) 13180 || GET_CODE (x) == SYMBOL_REF 13181 || PIC_ADDR_P (x))) 13182 return GENERAL_REGS; 13183 13184 return rclass; 13185} 13186 13187/* Implement TARGET_SECONDARY_RELOAD. */ 13188static reg_class_t 13189sh_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i, 13190 machine_mode mode, secondary_reload_info *sri) 13191{ 13192 enum reg_class rclass = (enum reg_class) rclass_i; 13193 13194 if (MEM_P (x) && GET_CODE (XEXP (x, 0)) == PLUS 13195 && REG_P (XEXP (XEXP (x, 0), 0)) 13196 && REGNO (XEXP (XEXP (x, 0), 0)) == GBR_REG) 13197 return rclass == R0_REGS ? NO_REGS : R0_REGS; 13198 13199 if (MEM_P (x) && REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == GBR_REG) 13200 return rclass == R0_REGS ? NO_REGS : R0_REGS; 13201 13202 if (REG_P (x) && REGNO (x) == GBR_REG) 13203 return NO_REGS; 13204 13205 if (in_p) 13206 { 13207 if (REGCLASS_HAS_FP_REG (rclass) 13208 && ! TARGET_SHMEDIA 13209 && immediate_operand ((x), mode) 13210 && ! ((fp_zero_operand (x) || fp_one_operand (x)) && mode == SFmode)) 13211 switch (mode) 13212 { 13213 case SFmode: 13214 sri->icode = CODE_FOR_reload_insf__frn; 13215 return NO_REGS; 13216 case DFmode: 13217 sri->icode = CODE_FOR_reload_indf__frn; 13218 return NO_REGS; 13219 case SImode: 13220 /* ??? If we knew that we are in the appropriate mode - 13221 single precision - we could use a reload pattern directly. */ 13222 return FPUL_REGS; 13223 default: 13224 abort (); 13225 } 13226 if (rclass == FPUL_REGS 13227 && ((REG_P (x) && (REGNO (x) == MACL_REG || REGNO (x) == MACH_REG 13228 || REGNO (x) == T_REG)) 13229 || GET_CODE (x) == PLUS)) 13230 return GENERAL_REGS; 13231 if (rclass == FPUL_REGS && immediate_operand (x, mode)) 13232 { 13233 if (satisfies_constraint_I08 (x) || fp_zero_operand (x)) 13234 return GENERAL_REGS; 13235 else if (mode == SFmode) 13236 return FP_REGS; 13237 sri->icode = CODE_FOR_reload_insi__i_fpul; 13238 return NO_REGS; 13239 } 13240 if (rclass == FPSCR_REGS 13241 && ((REG_P (x) && REGNO (x) >= FIRST_PSEUDO_REGISTER) 13242 || (MEM_P (x) && GET_CODE (XEXP (x, 0)) == PLUS))) 13243 return GENERAL_REGS; 13244 if (REGCLASS_HAS_FP_REG (rclass) 13245 && TARGET_SHMEDIA 13246 && immediate_operand (x, mode) 13247 && x != CONST0_RTX (GET_MODE (x)) 13248 && GET_MODE (x) != V4SFmode) 13249 return GENERAL_REGS; 13250 if ((mode == QImode || mode == HImode) 13251 && TARGET_SHMEDIA && inqhi_operand (x, mode)) 13252 { 13253 sri->icode = ((mode == QImode) 13254 ? CODE_FOR_reload_inqi : CODE_FOR_reload_inhi); 13255 return NO_REGS; 13256 } 13257 if (TARGET_SHMEDIA && rclass == GENERAL_REGS 13258 && (GET_CODE (x) == LABEL_REF || PIC_ADDR_P (x))) 13259 return TARGET_REGS; 13260 } /* end of input-only processing. */ 13261 13262 if (((REGCLASS_HAS_FP_REG (rclass) 13263 && (REG_P (x) 13264 && (GENERAL_OR_AP_REGISTER_P (REGNO (x)) 13265 || (FP_REGISTER_P (REGNO (x)) && mode == SImode 13266 && TARGET_FMOVD)))) 13267 || (REGCLASS_HAS_GENERAL_REG (rclass) 13268 && REG_P (x) 13269 && FP_REGISTER_P (REGNO (x)))) 13270 && ! TARGET_SHMEDIA 13271 && (mode == SFmode || mode == SImode)) 13272 return FPUL_REGS; 13273 if ((rclass == FPUL_REGS 13274 || (REGCLASS_HAS_FP_REG (rclass) 13275 && ! TARGET_SHMEDIA && mode == SImode)) 13276 && (MEM_P (x) 13277 || (REG_P (x) 13278 && (REGNO (x) >= FIRST_PSEUDO_REGISTER 13279 || REGNO (x) == T_REG 13280 || system_reg_operand (x, VOIDmode))))) 13281 { 13282 if (rclass == FPUL_REGS) 13283 return GENERAL_REGS; 13284 return NO_REGS; // LRA wants NO_REGS here, it used to be FPUL_REGS; 13285 } 13286 if ((rclass == TARGET_REGS 13287 || (TARGET_SHMEDIA && rclass == SIBCALL_REGS)) 13288 && !satisfies_constraint_Csy (x) 13289 && (!REG_P (x) || ! GENERAL_REGISTER_P (REGNO (x)))) 13290 return GENERAL_REGS; 13291 if ((rclass == MAC_REGS || rclass == PR_REGS) 13292 && REG_P (x) && ! GENERAL_REGISTER_P (REGNO (x)) 13293 && rclass != REGNO_REG_CLASS (REGNO (x))) 13294 return GENERAL_REGS; 13295 if (rclass != GENERAL_REGS && REG_P (x) 13296 && TARGET_REGISTER_P (REGNO (x))) 13297 return GENERAL_REGS; 13298 13299 /* If here fall back to loading FPUL register through general registers. 13300 This case can happen when movsi_ie insn is picked initially to 13301 load/store the FPUL register from/to another register, and then the 13302 other register is allocated on the stack. */ 13303 if (rclass == FPUL_REGS && true_regnum (x) == -1) 13304 return GENERAL_REGS; 13305 13306 /* Force mov.b / mov.w displacement addressing insn to use R0 as 13307 the other operand. 13308 On SH2A could also just leave it alone here, which would result in a 13309 4 byte move insn being generated instead. However, for this to work 13310 the insns must have the appropriate alternatives. */ 13311 if ((mode == QImode || mode == HImode) && rclass != R0_REGS 13312 && satisfies_constraint_Sdd (x) 13313 && sh_disp_addr_displacement (x) 13314 <= sh_max_mov_insn_displacement (mode, false)) 13315 return R0_REGS; 13316 13317 /* When reload is trying to address a QImode or HImode subreg on the stack, 13318 force any subreg byte into R0_REGS, as this is going to become a 13319 displacement address. 13320 We could restrict this to SUBREG_BYTE (x) > 0, but if the actual reg 13321 is on the stack, the memref to it might already require a displacement 13322 and that has to be added to the final address. At this point we don't 13323 know the cumulative displacement so we assume the worst case. */ 13324 if ((mode == QImode || mode == HImode) && rclass != R0_REGS 13325 && GET_CODE (x) == SUBREG && true_regnum (x) == -1) 13326 return R0_REGS; 13327 13328 return NO_REGS; 13329} 13330 13331/* Return true if SUBST can't safely replace its equivalent during RA. */ 13332static bool 13333sh_cannot_substitute_mem_equiv_p (rtx) 13334{ 13335 if (TARGET_SHMEDIA) 13336 return false; 13337 13338 /* If SUBST is mem[base+index] or QI/HImode mem[base+disp], the insn 13339 uses R0 and may cause spill failure when R0 is already used. 13340 We have to return true for that case at least. 13341 Moreover SH has strong R0 parity and also have not enough numbers of 13342 the hard registers to make the equiv substitution win in the size 13343 and the speed on average working sets. The pseudos produced to 13344 hold the equiv values can't get good hard registers for bad cases 13345 and end up memory save/restore insns which make the code worse. */ 13346 return true; 13347} 13348 13349/* Return true if DISP can be legitimized. */ 13350static bool 13351sh_legitimize_address_displacement (rtx *disp, rtx *offs, 13352 machine_mode mode) 13353{ 13354 if (TARGET_SHMEDIA) 13355 return false; 13356 13357 if (((TARGET_SH4 || TARGET_SH2A_DOUBLE) && mode == DFmode) 13358 || (TARGET_SH2E && mode == SFmode)) 13359 return false; 13360 13361 struct disp_adjust adj = sh_find_mov_disp_adjust (mode, INTVAL (*disp)); 13362 if (adj.offset_adjust != NULL_RTX && adj.mov_disp != NULL_RTX) 13363 { 13364 *disp = adj.mov_disp; 13365 *offs = adj.offset_adjust; 13366 return true; 13367 } 13368 13369 return false; 13370} 13371 13372/* Return true if movsf insn should be splited with an additional 13373 register. */ 13374bool 13375sh_movsf_ie_ra_split_p (rtx op0, rtx op1, rtx op2) 13376{ 13377 /* op0 == op1 */ 13378 if (rtx_equal_p (op0, op1)) 13379 return true; 13380 /* fy, FQ, reg */ 13381 if (GET_CODE (op1) == CONST_DOUBLE 13382 && ! satisfies_constraint_G (op1) 13383 && ! satisfies_constraint_H (op1) 13384 && REG_P (op0) 13385 && REG_P (op2)) 13386 return true; 13387 /* f, r, y */ 13388 if (REG_P (op0) && FP_REGISTER_P (REGNO (op0)) 13389 && REG_P (op1) && GENERAL_REGISTER_P (REGNO (op1)) 13390 && REG_P (op2) && (REGNO (op2) == FPUL_REG)) 13391 return true; 13392 /* r, f, y */ 13393 if (REG_P (op1) && FP_REGISTER_P (REGNO (op1)) 13394 && REG_P (op0) && GENERAL_REGISTER_P (REGNO (op0)) 13395 && REG_P (op2) && (REGNO (op2) == FPUL_REG)) 13396 return true; 13397 13398 return false; 13399} 13400 13401static void 13402sh_conditional_register_usage (void) 13403{ 13404 int regno; 13405 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno ++) 13406 if (! VALID_REGISTER_P (regno)) 13407 fixed_regs[regno] = call_used_regs[regno] = 1; 13408 /* R8 and R9 are call-clobbered on SH5, but not on earlier SH ABIs. */ 13409 if (TARGET_SH5) 13410 { 13411 call_used_regs[FIRST_GENERAL_REG + 8] 13412 = call_used_regs[FIRST_GENERAL_REG + 9] = 1; 13413 call_really_used_regs[FIRST_GENERAL_REG + 8] 13414 = call_really_used_regs[FIRST_GENERAL_REG + 9] = 1; 13415 } 13416 if (TARGET_SHMEDIA) 13417 { 13418 regno_reg_class[FIRST_GENERAL_REG] = GENERAL_REGS; 13419 CLEAR_HARD_REG_SET (reg_class_contents[FP0_REGS]); 13420 regno_reg_class[FIRST_FP_REG] = FP_REGS; 13421 } 13422 if (flag_pic) 13423 { 13424 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1; 13425 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1; 13426 } 13427 /* Renesas saves and restores mac registers on call. */ 13428 if (TARGET_HITACHI && ! TARGET_NOMACSAVE) 13429 { 13430 call_really_used_regs[MACH_REG] = 0; 13431 call_really_used_regs[MACL_REG] = 0; 13432 } 13433 13434 if (TARGET_SHMEDIA) 13435 { 13436 for (regno = FIRST_TARGET_REG; regno <= LAST_TARGET_REG; regno ++) 13437 if (! fixed_regs[regno] && call_really_used_regs[regno]) 13438 SET_HARD_REG_BIT (reg_class_contents[SIBCALL_REGS], regno); 13439 } 13440 else 13441 for (regno = FIRST_GENERAL_REG; regno <= LAST_GENERAL_REG; regno++) 13442 if (! fixed_regs[regno] && call_really_used_regs[regno]) 13443 SET_HARD_REG_BIT (reg_class_contents[SIBCALL_REGS], regno); 13444 13445 call_really_used_regs[FPSCR_MODES_REG] = 0; 13446 call_really_used_regs[FPSCR_STAT_REG] = 0; 13447} 13448 13449/* Implement TARGET_LEGITIMATE_CONSTANT_P 13450 13451 can_store_by_pieces constructs VOIDmode CONST_DOUBLEs. */ 13452static bool 13453sh_legitimate_constant_p (machine_mode mode, rtx x) 13454{ 13455 return (TARGET_SHMEDIA 13456 ? ((mode != DFmode && GET_MODE_CLASS (mode) != MODE_VECTOR_FLOAT) 13457 || x == CONST0_RTX (mode) 13458 || !TARGET_SHMEDIA_FPU 13459 || TARGET_SHMEDIA64) 13460 : (GET_CODE (x) != CONST_DOUBLE 13461 || mode == DFmode || mode == SFmode 13462 || mode == DImode || GET_MODE (x) == VOIDmode)); 13463} 13464 13465enum sh_divide_strategy_e sh_div_strategy = SH_DIV_STRATEGY_DEFAULT; 13466 13467static void 13468sh_init_sync_libfuncs (void) 13469{ 13470 init_sync_libfuncs (UNITS_PER_WORD); 13471} 13472 13473/* Return true if it is appropriate to emit `ret' instructions in the 13474 body of a function. */ 13475bool 13476sh_can_use_simple_return_p (void) 13477{ 13478 HARD_REG_SET live_regs_mask; 13479 int d; 13480 13481 /* Some targets require special return insns. */ 13482 if (TARGET_SHMEDIA 13483 || (TARGET_SHCOMPACT 13484 && (crtl->args.info.call_cookie & CALL_COOKIE_RET_TRAMP (1)))) 13485 return false; 13486 13487 if (! reload_completed || frame_pointer_needed) 13488 return false; 13489 13490 /* Moving prologue around does't reduce the size. */ 13491 if (optimize_function_for_size_p (cfun)) 13492 return false; 13493 13494 /* Finally, allow for pr save. */ 13495 d = calc_live_regs (&live_regs_mask); 13496 13497 if (rounded_frame_size (d) > 4) 13498 return false; 13499 13500 return true; 13501} 13502 13503/*------------------------------------------------------------------------------ 13504 Address mode optimization support code 13505*/ 13506 13507typedef HOST_WIDE_INT disp_t; 13508static const disp_t MIN_DISP = HOST_WIDE_INT_MIN; 13509static const disp_t MAX_DISP = HOST_WIDE_INT_MAX; 13510static const disp_t INVALID_DISP = MAX_DISP; 13511 13512/* A memory reference which is described by a base register and a 13513 displacement. */ 13514class base_reg_disp 13515{ 13516public: 13517 base_reg_disp (rtx br, disp_t d); 13518 13519 bool is_reg (void) const; 13520 bool is_disp (void) const; 13521 rtx reg (void) const; 13522 disp_t disp (void) const; 13523 13524private: 13525 rtx reg_; 13526 disp_t disp_; 13527}; 13528 13529inline 13530base_reg_disp::base_reg_disp (rtx br, disp_t d) 13531: reg_ (br), disp_ (d) 13532{ 13533} 13534 13535inline bool 13536base_reg_disp::is_reg (void) const 13537{ 13538 return reg_ != NULL_RTX && disp_ != INVALID_DISP; 13539} 13540 13541inline bool 13542base_reg_disp::is_disp (void) const 13543{ 13544 return reg_ == NULL_RTX && disp_ != INVALID_DISP; 13545} 13546 13547inline rtx 13548base_reg_disp::reg (void) const 13549{ 13550 return reg_; 13551} 13552 13553inline disp_t 13554base_reg_disp::disp (void) const 13555{ 13556 return disp_; 13557} 13558 13559/* Find the base register and calculate the displacement for a given 13560 address rtx 'x'. */ 13561static base_reg_disp 13562sh_find_base_reg_disp (rtx_insn* insn, rtx x, disp_t disp = 0, 13563 rtx base_reg = NULL) 13564{ 13565 if (REG_P (x)) 13566 { 13567 if (REGNO (x) == GBR_REG) 13568 return base_reg_disp (x, disp); 13569 13570 /* We've reached a hard-reg. This is probably the point where 13571 function args are copied to pseudos. Do not go any further and 13572 stick to the pseudo. If the original mem addr was in a hard reg 13573 from the beginning, it will become the base reg. */ 13574 if (REGNO (x) < FIRST_PSEUDO_REGISTER) 13575 return base_reg_disp (base_reg != NULL ? base_reg : x, disp); 13576 13577 /* Find the def of the reg and trace it. If there are more than one 13578 defs and they are not the same, assume it's not safe to proceed. */ 13579 rtx_insn* last_i = NULL; 13580 rtx last_set = NULL; 13581 for (df_ref d = DF_REG_DEF_CHAIN (REGNO (x)); d != NULL; 13582 d = DF_REF_NEXT_REG (d)) 13583 { 13584 rtx set = const_cast<rtx> (set_of (x, DF_REF_INSN (d))); 13585 13586 /* Accept multiple defs, as long as they are equal. */ 13587 if (last_set == NULL || rtx_equal_p (last_set, set)) 13588 { 13589 last_i = DF_REF_INSN (d); 13590 last_set = set; 13591 } 13592 else 13593 { 13594 last_i = NULL; 13595 last_set = NULL; 13596 break; 13597 } 13598 } 13599 13600 if (last_set != NULL && last_i != NULL) 13601 return sh_find_base_reg_disp (last_i, XEXP (last_set, 1), disp, 13602 XEXP (last_set, 0)); 13603 13604 /* When here, no previous insn was found that sets the reg. 13605 The input reg is already the base reg. */ 13606 return base_reg_disp (x, disp); 13607 } 13608 13609 else if (GET_CODE (x) == PLUS) 13610 { 13611 base_reg_disp left_val = sh_find_base_reg_disp (insn, XEXP (x, 0)); 13612 base_reg_disp right_val = sh_find_base_reg_disp (insn, XEXP (x, 1)); 13613 13614 /* Either left or right val must be a reg. 13615 We don't handle the case of 'reg + reg' here. */ 13616 if (left_val.is_reg () && right_val.is_disp ()) 13617 return base_reg_disp (left_val.reg (), left_val.disp () 13618 + right_val.disp () + disp); 13619 else if (right_val.is_reg () && left_val.is_disp ()) 13620 return base_reg_disp (right_val.reg (), right_val.disp () 13621 + left_val.disp () + disp); 13622 else 13623 return base_reg_disp (base_reg, disp); 13624 } 13625 13626 else if (CONST_INT_P (x)) 13627 return base_reg_disp (NULL, disp + INTVAL (x)); 13628 13629 /* Didn't find anything useful. */ 13630 return base_reg_disp (base_reg, disp); 13631} 13632 13633/* Given an insn and a memory operand, try to find an equivalent GBR 13634 based memory address and return the corresponding new memory address. 13635 Return NULL_RTX if not found. */ 13636rtx 13637sh_find_equiv_gbr_addr (rtx_insn* insn, rtx mem) 13638{ 13639 if (!MEM_P (mem) || gbr_address_mem (mem, GET_MODE (mem))) 13640 return NULL_RTX; 13641 13642 /* Leave post/pre inc/dec or any other side effect addresses alone. */ 13643 if (side_effects_p (XEXP (mem, 0))) 13644 return NULL_RTX; 13645 13646 /* When not optimizing there might be no dataflow available. */ 13647 if (df == NULL) 13648 return NULL_RTX; 13649 13650 base_reg_disp gbr_disp = sh_find_base_reg_disp (insn, XEXP (mem, 0)); 13651 13652 if (gbr_disp.is_reg () && REGNO (gbr_disp.reg ()) == GBR_REG) 13653 { 13654 /* If GBR is marked as call clobbered we bail out if we see a call. 13655 FIXME: Actually should check if this mem refers to the gbr value 13656 before or after the call. If there is a store_gbr preceeding this 13657 mem, it's safe to use GBR for this mem. 13658 13659 If GBR is not marked as call clobbered, but there is some other 13660 def than a call, it's probably a load_gbr upon which we also 13661 bail out to be on the safe side. 13662 FIXME: Should check if we have a use-after-def case, such as 13663 the call case above. */ 13664 for (df_ref d = DF_REG_DEF_CHAIN (GBR_REG); d != NULL; 13665 d = DF_REF_NEXT_REG (d)) 13666 { 13667 if (CALL_P (DF_REF_INSN (d))) 13668 { 13669 if (REGNO_REG_SET_P (regs_invalidated_by_call_regset, GBR_REG)) 13670 return NULL_RTX; 13671 else 13672 continue; 13673 } 13674 else 13675 return NULL_RTX; 13676 } 13677 13678 rtx disp = GEN_INT (gbr_disp.disp ()); 13679 if (gbr_displacement (disp, GET_MODE (mem))) 13680 return gen_rtx_PLUS (SImode, gen_rtx_REG (SImode, GBR_REG), disp); 13681 } 13682 13683 return NULL_RTX; 13684} 13685 13686/*------------------------------------------------------------------------------ 13687 Manual insn combine support code. 13688*/ 13689 13690/* Return true if the specified insn contains any UNSPECs or 13691 UNSPEC_VOLATILEs. */ 13692static bool 13693sh_unspec_insn_p (rtx x) 13694{ 13695 subrtx_iterator::array_type array; 13696 FOR_EACH_SUBRTX (i, array, x, ALL) 13697 if (*i != NULL 13698 && (GET_CODE (*i) == UNSPEC || GET_CODE (*i) == UNSPEC_VOLATILE)) 13699 return true; 13700 13701 return false; 13702} 13703 13704/* Return true if the register operands of the specified insn are modified 13705 between the specified from and to insns (exclusive of those two). */ 13706bool 13707sh_insn_operands_modified_between_p (rtx_insn* operands_insn, 13708 const rtx_insn* from, 13709 const rtx_insn* to) 13710{ 13711 /* FIXME: Return true for multiple sets for now. */ 13712 rtx s = single_set (operands_insn); 13713 if (s == NULL_RTX) 13714 return true; 13715 13716 subrtx_iterator::array_type array; 13717 FOR_EACH_SUBRTX (i, array, SET_SRC (s), ALL) 13718 if (*i != NULL && 13719 ((REG_P (*i) || SUBREG_P (*i)) && reg_set_between_p (*i, from, to))) 13720 return true; 13721 13722 return false; 13723} 13724 13725/* Given an insn, determine whether it's a 'nott' insn, i.e. an insn that 13726 negates the T bit and stores the result in the T bit. */ 13727bool 13728sh_is_nott_insn (const rtx_insn* i) 13729{ 13730 return i != NULL && GET_CODE (PATTERN (i)) == SET 13731 && t_reg_operand (XEXP (PATTERN (i), 0), VOIDmode) 13732 && negt_reg_operand (XEXP (PATTERN (i), 1), VOIDmode); 13733} 13734 13735rtx 13736sh_movt_set_dest (const rtx_insn* i) 13737{ 13738 if (i == NULL) 13739 return NULL; 13740 13741 const_rtx p = PATTERN (i); 13742 return GET_CODE (p) == SET 13743 && arith_reg_dest (XEXP (p, 0), SImode) 13744 && t_reg_operand (XEXP (p, 1), VOIDmode) ? XEXP (p, 0) : NULL; 13745} 13746 13747/* Given an insn, check whether it's a 'movrt' kind of insn, i.e. an insn 13748 that stores the negated T bit in a register, and return the destination 13749 register rtx, or null. */ 13750rtx 13751sh_movrt_set_dest (const rtx_insn* i) 13752{ 13753 if (i == NULL) 13754 return NULL; 13755 13756 const_rtx p = PATTERN (i); 13757 13758 /* The negc movrt replacement is inside a parallel. */ 13759 if (GET_CODE (p) == PARALLEL) 13760 p = XVECEXP (p, 0, 0); 13761 13762 return GET_CODE (p) == SET 13763 && arith_reg_dest (XEXP (p, 0), SImode) 13764 && negt_reg_operand (XEXP (p, 1), VOIDmode) ? XEXP (p, 0) : NULL; 13765} 13766 13767/* Given an insn and a reg number, tell whether the reg dies or is unused 13768 after the insn. */ 13769bool 13770sh_reg_dead_or_unused_after_insn (const rtx_insn* i, int regno) 13771{ 13772 return find_regno_note (i, REG_DEAD, regno) != NULL 13773 || find_regno_note (i, REG_UNUSED, regno) != NULL; 13774} 13775 13776/* Given an insn and a reg number, remove reg dead or reg unused notes to 13777 mark it as being used after the insn. */ 13778void 13779sh_remove_reg_dead_or_unused_notes (rtx_insn* i, int regno) 13780{ 13781 if (rtx n = find_regno_note (i, REG_DEAD, regno)) 13782 remove_note (i, n); 13783 if (rtx n = find_regno_note (i, REG_UNUSED, regno)) 13784 remove_note (i, n); 13785} 13786 13787/* Given an insn check if it contains any post/pre inc/dec mem operands and 13788 add the REG_INC notes accordingly. 13789 FIXME: This function is very similar to lra.c (add_auto_inc_notes). 13790 FIXME: This function is currently used by peephole2 patterns because 13791 the peephole2 pass does not preserve REG_INC notes. If the notes 13792 are dropped the following passes will do wrong things. */ 13793rtx_insn* 13794sh_check_add_incdec_notes (rtx_insn* i) 13795{ 13796 struct for_each_inc_dec_clb 13797 { 13798 static int func (rtx mem ATTRIBUTE_UNUSED, rtx op ATTRIBUTE_UNUSED, 13799 rtx dest, rtx src ATTRIBUTE_UNUSED, 13800 rtx srcoff ATTRIBUTE_UNUSED, void* arg) 13801 { 13802 gcc_assert (REG_P (dest)); 13803 13804 rtx_insn* i = (rtx_insn*)arg; 13805 if (find_regno_note (i, REG_INC, REGNO (dest)) == NULL) 13806 add_reg_note (i, REG_INC, dest); 13807 13808 return 0; 13809 } 13810 }; 13811 13812 for_each_inc_dec (PATTERN (i), for_each_inc_dec_clb::func, i); 13813 return i; 13814} 13815 13816/* Given a move insn destiation and a source, make sure that the move source 13817 operand is not a post-inc mem load with the same address reg as the 13818 destination. Returns the modified source operand with the post-inc removed 13819 if necessary. */ 13820rtx 13821sh_remove_overlapping_post_inc (rtx dst, rtx src) 13822{ 13823 if (!MEM_P (src)) 13824 return src; 13825 13826 rtx addr = XEXP (src, 0); 13827 13828 if (GET_CODE (addr) == POST_INC 13829 && reg_overlap_mentioned_p (XEXP (addr, 0), dst)) 13830 return replace_equiv_address (src, XEXP (addr, 0)); 13831 13832 gcc_assert (GET_CODE (addr) != POST_MODIFY); 13833 return src; 13834} 13835 13836/* Emit a move insn that is safe to be used in peephole patterns. */ 13837rtx_insn* 13838sh_peephole_emit_move_insn (rtx dst, rtx src) 13839{ 13840 return sh_check_add_incdec_notes ( 13841 emit_move_insn (dst, sh_remove_overlapping_post_inc (dst, src))); 13842} 13843 13844/* Given an op rtx and an insn, try to find out whether the result of the 13845 specified op consists only of logical operations on T bit stores. */ 13846bool 13847sh_is_logical_t_store_expr (rtx op, rtx_insn* insn) 13848{ 13849 if (!logical_operator (op, SImode)) 13850 return false; 13851 13852 rtx ops[2] = { XEXP (op, 0), XEXP (op, 1) }; 13853 int op_is_t_count = 0; 13854 13855 for (int i = 0; i < 2; ++i) 13856 { 13857 if (t_reg_operand (ops[i], VOIDmode) 13858 || negt_reg_operand (ops[i], VOIDmode)) 13859 op_is_t_count++; 13860 13861 else 13862 { 13863 set_of_reg op_set = sh_find_set_of_reg (ops[i], insn, 13864 prev_nonnote_insn_bb); 13865 if (op_set.set_src == NULL_RTX) 13866 continue; 13867 13868 if (t_reg_operand (op_set.set_src, VOIDmode) 13869 || negt_reg_operand (op_set.set_src, VOIDmode) 13870 || sh_is_logical_t_store_expr (op_set.set_src, op_set.insn)) 13871 op_is_t_count++; 13872 } 13873 } 13874 13875 return op_is_t_count == 2; 13876} 13877 13878/* Given the operand that is extended in a sign/zero extend insn, and the 13879 insn, try to figure out whether the sign/zero extension can be replaced 13880 by a simple reg-reg copy. If so, the replacement reg rtx is returned, 13881 NULL_RTX otherwise. */ 13882rtx 13883sh_try_omit_signzero_extend (rtx extended_op, rtx_insn* insn) 13884{ 13885 if (REG_P (extended_op)) 13886 extended_op = extended_op; 13887 else if (GET_CODE (extended_op) == SUBREG && REG_P (SUBREG_REG (extended_op))) 13888 extended_op = SUBREG_REG (extended_op); 13889 else 13890 return NULL_RTX; 13891 13892 /* Reg moves must be of the same mode. */ 13893 if (GET_MODE (extended_op) != SImode) 13894 return NULL_RTX; 13895 13896 set_of_reg s = sh_find_set_of_reg (extended_op, insn, prev_nonnote_insn_bb); 13897 if (s.set_src == NULL_RTX) 13898 return NULL_RTX; 13899 13900 if (t_reg_operand (s.set_src, VOIDmode) 13901 || negt_reg_operand (s.set_src, VOIDmode)) 13902 return extended_op; 13903 13904 /* If the zero extended reg was formed by a logical operation, check the 13905 operands of the logical operation. If both originated from T bit 13906 stores the zero extension can be eliminated. */ 13907 else if (sh_is_logical_t_store_expr (s.set_src, s.insn)) 13908 return extended_op; 13909 13910 return NULL_RTX; 13911} 13912 13913/* Given the current insn, which is assumed to be a movrt_negc insn, try to 13914 figure out whether it should be converted into a movt-xor sequence in 13915 the movrt_negc splitter. 13916 Returns true if insns have been modified and the splitter has succeeded. */ 13917bool 13918sh_split_movrt_negc_to_movt_xor (rtx_insn* curr_insn, rtx operands[]) 13919{ 13920 /* In cases such as 13921 tst r4,r4 13922 mov #-1,r1 13923 negc r1,r1 13924 tst r4,r4 13925 we can replace the T bit clobbering negc with a movt-xor sequence and 13926 eliminate the redundant comparison. 13927 Because the xor insn depends on register allocation results, allow this 13928 only before reload. */ 13929 if (!can_create_pseudo_p ()) 13930 return false; 13931 13932 set_of_reg t_before_negc = sh_find_set_of_reg (get_t_reg_rtx (), curr_insn, 13933 prev_nonnote_insn_bb); 13934 set_of_reg t_after_negc = sh_find_set_of_reg (get_t_reg_rtx (), curr_insn, 13935 next_nonnote_insn_bb); 13936 13937 if (t_before_negc.set_rtx != NULL_RTX && t_after_negc.set_rtx != NULL_RTX 13938 && rtx_equal_p (t_before_negc.set_rtx, t_after_negc.set_rtx) 13939 && !reg_used_between_p (get_t_reg_rtx (), curr_insn, t_after_negc.insn) 13940 && !sh_insn_operands_modified_between_p (t_before_negc.insn, 13941 t_before_negc.insn, 13942 t_after_negc.insn) 13943 && !modified_between_p (get_t_reg_rtx (), curr_insn, t_after_negc.insn) 13944 && !sh_unspec_insn_p (t_after_negc.insn) 13945 && !volatile_insn_p (PATTERN (t_after_negc.insn)) 13946 && !side_effects_p (PATTERN (t_after_negc.insn)) 13947 && !may_trap_or_fault_p (PATTERN (t_after_negc.insn))) 13948 { 13949 emit_insn (gen_movrt_xor (operands[0], get_t_reg_rtx ())); 13950 set_insn_deleted (t_after_negc.insn); 13951 return true; 13952 } 13953 else 13954 return false; 13955} 13956 13957/* Given a reg and the current insn, see if the value of the reg originated 13958 from a sign or zero extension and return the discovered information. */ 13959sh_extending_set_of_reg 13960sh_find_extending_set_of_reg (rtx reg, rtx_insn* curr_insn) 13961{ 13962 if (reg == NULL) 13963 return sh_extending_set_of_reg (curr_insn); 13964 13965 if (SUBREG_P (reg)) 13966 reg = SUBREG_REG (reg); 13967 13968 if (!REG_P (reg)) 13969 return sh_extending_set_of_reg (curr_insn); 13970 13971 /* FIXME: Also search the predecessor basic blocks. It seems that checking 13972 only the adjacent predecessor blocks would cover most of the cases. 13973 Also try to look through the first extension that we hit. There are some 13974 cases, where a zero_extend is followed an (implicit) sign_extend, and it 13975 fails to see the sign_extend. */ 13976 sh_extending_set_of_reg result = 13977 sh_find_set_of_reg (reg, curr_insn, prev_nonnote_insn_bb, true); 13978 13979 if (result.set_src != NULL) 13980 { 13981 if (GET_CODE (result.set_src) == SIGN_EXTEND 13982 || GET_CODE (result.set_src) == ZERO_EXTEND) 13983 { 13984 if (dump_file) 13985 fprintf (dump_file, "sh_find_extending_set_of_reg: reg %d is " 13986 "explicitly sign/zero extended in insn %d\n", 13987 REGNO (reg), INSN_UID (result.insn)); 13988 result.from_mode = GET_MODE (XEXP (result.set_src, 0)); 13989 result.ext_code = GET_CODE (result.set_src); 13990 } 13991 else if (MEM_P (result.set_src) 13992 && (GET_MODE (result.set_src) == QImode 13993 || GET_MODE (result.set_src) == HImode) 13994 && !sh_unspec_insn_p (result.insn)) 13995 { 13996 /* On SH QIHImode memory loads always sign extend. However, in 13997 some cases where it seems that the higher bits are not 13998 interesting, the loads will not be expanded as sign extending 13999 insns, but as QIHImode loads into QIHImode regs. We report that 14000 the reg has been sign extended by the mem load. When it is used 14001 as such, we must convert the mem load into a sign extending insn, 14002 see also sh_extending_set_of_reg::use_as_extended_reg. */ 14003 if (dump_file) 14004 fprintf (dump_file, "sh_find_extending_set_of_reg: reg %d is " 14005 "implicitly sign extended in insn %d\n", 14006 REGNO (reg), INSN_UID (result.insn)); 14007 result.from_mode = GET_MODE (result.set_src); 14008 result.ext_code = SIGN_EXTEND; 14009 } 14010 } 14011 14012 return result; 14013} 14014 14015/* Given a reg that is known to be sign or zero extended at some insn, 14016 take the appropriate measures so that the extended value can be used as 14017 a reg at the specified insn and return the resulting reg rtx. */ 14018rtx 14019sh_extending_set_of_reg::use_as_extended_reg (rtx_insn* use_at_insn) const 14020{ 14021 gcc_assert (insn != NULL && set_src != NULL && set_rtx != NULL); 14022 gcc_assert (ext_code == SIGN_EXTEND || ext_code == ZERO_EXTEND); 14023 gcc_assert (from_mode == QImode || from_mode == HImode); 14024 14025 if (MEM_P (set_src) && ext_code == SIGN_EXTEND) 14026 { 14027 if (dump_file) 14028 fprintf (dump_file, 14029 "use_as_extended_reg: converting non-extending mem load in " 14030 "insn %d into sign-extending load\n", INSN_UID (insn)); 14031 14032 rtx r = gen_reg_rtx (SImode); 14033 rtx_insn* i0; 14034 if (from_mode == QImode) 14035 i0 = emit_insn_after (gen_extendqisi2 (r, set_src), insn); 14036 else if (from_mode == HImode) 14037 i0 = emit_insn_after (gen_extendhisi2 (r, set_src), insn); 14038 else 14039 gcc_unreachable (); 14040 14041 emit_insn_after ( 14042 gen_move_insn (XEXP (set_rtx, 0), 14043 gen_lowpart (GET_MODE (set_src), r)), i0); 14044 set_insn_deleted (insn); 14045 return r; 14046 } 14047 else 14048 { 14049 rtx extension_dst = XEXP (set_rtx, 0); 14050 if (GET_MODE (extension_dst) != SImode) 14051 extension_dst = simplify_gen_subreg (SImode, extension_dst, 14052 GET_MODE (extension_dst), 0); 14053 if (modified_between_p (extension_dst, insn, use_at_insn)) 14054 { 14055 if (dump_file) 14056 fprintf (dump_file, 14057 "use_as_extended_reg: dest reg %d of extending insn %d is " 14058 "modified, inserting a reg-reg copy\n", 14059 REGNO (extension_dst), INSN_UID (insn)); 14060 14061 rtx r = gen_reg_rtx (SImode); 14062 emit_insn_after (gen_move_insn (r, extension_dst), insn); 14063 return r; 14064 } 14065 else 14066 { 14067 sh_remove_reg_dead_or_unused_notes (insn, REGNO (extension_dst)); 14068 return extension_dst; 14069 } 14070 } 14071} 14072 14073bool 14074sh_extending_set_of_reg::can_use_as_unextended_reg (void) const 14075{ 14076 if ((ext_code == SIGN_EXTEND || ext_code == ZERO_EXTEND) 14077 && (from_mode == QImode || from_mode == HImode) 14078 && set_src != NULL) 14079 return arith_reg_operand (XEXP (set_src, 0), from_mode); 14080 else 14081 return false; 14082} 14083 14084rtx 14085sh_extending_set_of_reg::use_as_unextended_reg (rtx_insn* use_at_insn) const 14086{ 14087 gcc_assert (can_use_as_unextended_reg ()); 14088 14089 rtx r = XEXP (set_src, 0); 14090 rtx r0 = simplify_gen_subreg (SImode, r, from_mode, 0); 14091 14092 if (modified_between_p (r, insn, use_at_insn)) 14093 { 14094 rtx r1 = gen_reg_rtx (SImode); 14095 emit_insn_after (gen_move_insn (r1, r0), insn); 14096 return r1; 14097 } 14098 else 14099 { 14100 sh_remove_reg_dead_or_unused_notes (insn, SUBREG_P (r) 14101 ? REGNO (SUBREG_REG (r)) 14102 : REGNO (r)); 14103 return r0; 14104 } 14105} 14106 14107/* Given the current insn, which is assumed to be the *tst<mode>_t_subregs insn, 14108 perform the necessary checks on the operands and split it accordingly. */ 14109void 14110sh_split_tst_subregs (rtx_insn* curr_insn, machine_mode subreg_mode, 14111 int subreg_offset, rtx operands[]) 14112{ 14113 gcc_assert (subreg_mode == QImode || subreg_mode == HImode); 14114 14115 sh_extending_set_of_reg eop0 = sh_find_extending_set_of_reg (operands[0], 14116 curr_insn); 14117 sh_extending_set_of_reg eop1 = sh_find_extending_set_of_reg (operands[1], 14118 curr_insn); 14119 14120 /* If one of the operands is known to be zero extended, that's already 14121 sufficient to mask out the unwanted high bits. */ 14122 if (eop0.ext_code == ZERO_EXTEND && eop0.from_mode == subreg_mode) 14123 { 14124 emit_insn (gen_tstsi_t (eop0.use_as_extended_reg (curr_insn), 14125 operands[1])); 14126 return; 14127 } 14128 if (eop1.ext_code == ZERO_EXTEND && eop1.from_mode == subreg_mode) 14129 { 14130 emit_insn (gen_tstsi_t (operands[0], 14131 eop1.use_as_extended_reg (curr_insn))); 14132 return; 14133 } 14134 14135 /* None of the operands seem to be zero extended. 14136 If both are sign extended it's OK, too. */ 14137 if (eop0.ext_code == SIGN_EXTEND && eop1.ext_code == SIGN_EXTEND 14138 && eop0.from_mode == subreg_mode && eop1.from_mode == subreg_mode) 14139 { 14140 emit_insn (gen_tstsi_t (eop0.use_as_extended_reg (curr_insn), 14141 eop1.use_as_extended_reg (curr_insn))); 14142 return; 14143 } 14144 14145 /* Otherwise we have to insert a zero extension on one of the operands to 14146 mask out the unwanted high bits. 14147 Prefer the operand that has no known extension. */ 14148 if (eop0.ext_code != UNKNOWN && eop1.ext_code == UNKNOWN) 14149 std::swap (operands[0], operands[1]); 14150 14151 rtx tmp0 = gen_reg_rtx (SImode); 14152 rtx tmp1 = simplify_gen_subreg (subreg_mode, operands[0], 14153 GET_MODE (operands[0]), subreg_offset); 14154 emit_insn (subreg_mode == QImode 14155 ? gen_zero_extendqisi2 (tmp0, tmp1) 14156 : gen_zero_extendhisi2 (tmp0, tmp1)); 14157 emit_insn (gen_tstsi_t (tmp0, operands[1])); 14158} 14159 14160/* A helper class to increment/decrement a counter variable each time a 14161 function is entered/left. */ 14162class scope_counter 14163{ 14164public: 14165 scope_counter (int& counter) : m_counter (counter) { ++m_counter; } 14166 14167 ~scope_counter (void) 14168 { 14169 --m_counter; 14170 gcc_assert (m_counter >= 0); 14171 } 14172 14173 int count (void) const { return m_counter; } 14174 14175private: 14176 int& m_counter; 14177}; 14178 14179/* Given an rtx x, determine whether the expression can be used to create 14180 an insn that calulates x and stores the result in the T bit. 14181 This is used by the 'treg_set_expr' predicate to construct insns sequences 14182 where T bit results are fed into other insns, such as addc, subc, negc 14183 insns. 14184 14185 FIXME: The patterns that expand 'treg_set_expr' operands tend to 14186 distinguish between 'positive' and 'negative' forms. For now this has to 14187 be done in the preparation code. We could also introduce 14188 'pos_treg_set_expr' and 'neg_treg_set_expr' predicates for that and write 14189 two different patterns for the 'postive' and 'negative' forms. However, 14190 the total amount of lines of code seems to be about the same and the 14191 '{pos|neg}_treg_set_expr' predicates would be more expensive, because the 14192 recog function would need to look inside the expression by temporarily 14193 splitting it. */ 14194static int sh_recog_treg_set_expr_reent_count = 0; 14195 14196bool 14197sh_recog_treg_set_expr (rtx op, machine_mode mode) 14198{ 14199 scope_counter recursion (sh_recog_treg_set_expr_reent_count); 14200 14201 /* Limit the recursion count to avoid nested expressions which we can't 14202 resolve to a single treg set insn. */ 14203 if (recursion.count () > 1) 14204 return false; 14205 14206 /* Early accept known possible operands before doing recog. */ 14207 if (op == const0_rtx || op == const1_rtx || t_reg_operand (op, mode)) 14208 return true; 14209 14210 /* Early reject impossible operands before doing recog. 14211 There are some (set ((t) (subreg ...))) patterns, but we must be careful 14212 not to allow any invalid reg-reg or mem-reg moves, or else other passes 14213 such as lower-subreg will bail out. Some insns such as SH4A movua are 14214 done with UNSPEC, so must reject those, too, or else it would result 14215 in an invalid reg -> treg move. */ 14216 if (register_operand (op, mode) || memory_operand (op, mode) 14217 || sh_unspec_insn_p (op)) 14218 return false; 14219 14220 if (!can_create_pseudo_p ()) 14221 return false; 14222 14223 /* expand_debug_locations may call this to compute rtx costs at 14224 very early stage. In that case, don't make new insns here to 14225 avoid codegen differences with -g. */ 14226 if (currently_expanding_to_rtl) 14227 return false; 14228 14229 /* We are going to invoke recog in a re-entrant way and thus 14230 have to capture its current state and restore it afterwards. */ 14231 recog_data_d prev_recog_data = recog_data; 14232 14233 rtx_insn* i = make_insn_raw (gen_rtx_SET (VOIDmode, get_t_reg_rtx (), op)); 14234 SET_PREV_INSN (i) = NULL; 14235 SET_NEXT_INSN (i) = NULL; 14236 14237 int result = recog (PATTERN (i), i, 0); 14238 14239 /* It seems there is no insn like that. Create a simple negated 14240 version and try again. If we hit a negated form, we'll allow that 14241 and append a nott sequence when splitting out the insns. Insns that 14242 do the split can then remove the trailing nott if they know how to 14243 deal with it. */ 14244 if (result < 0 && GET_CODE (op) == EQ) 14245 { 14246 PUT_CODE (op, NE); 14247 result = recog (PATTERN (i), i, 0); 14248 PUT_CODE (op, EQ); 14249 } 14250 if (result < 0 && GET_CODE (op) == NE) 14251 { 14252 PUT_CODE (op, EQ); 14253 result = recog (PATTERN (i), i, 0); 14254 PUT_CODE (op, NE); 14255 } 14256 14257 recog_data = prev_recog_data; 14258 return result >= 0; 14259} 14260 14261/* Returns true when recog of a 'treg_set_expr' is currently in progress. 14262 This can be used as a condition for insn/split patterns to allow certain 14263 T bit setting patters only to be matched as sub expressions of other 14264 patterns. */ 14265bool 14266sh_in_recog_treg_set_expr (void) 14267{ 14268 return sh_recog_treg_set_expr_reent_count > 0; 14269} 14270 14271/* Given an rtx x, which is assumed to be some expression that has been 14272 matched by the 'treg_set_expr' predicate before, split and emit the 14273 insns that are necessary to calculate the expression and store the result 14274 in the T bit. 14275 The splitting is done recursively similar to 'try_split' in emit-rt.c. 14276 Unfortunately we can't use 'try_split' here directly, as it tries to invoke 14277 'delete_insn' which then causes the DF parts to bail out, because we 14278 currently are inside another gen_split* function and would invoke 14279 'try_split' in a reentrant way. */ 14280static std::pair<rtx_insn*, rtx_insn*> 14281sh_try_split_insn_simple (rtx_insn* i, rtx_insn* curr_insn, int n = 0) 14282{ 14283 if (dump_file) 14284 { 14285 fprintf (dump_file, "sh_try_split_insn_simple n = %d i = \n", n); 14286 print_rtl_single (dump_file, i); 14287 fprintf (dump_file, "\n"); 14288 } 14289 14290 rtx_insn* seq = safe_as_a<rtx_insn*> (split_insns (PATTERN (i), curr_insn)); 14291 14292 if (seq == NULL) 14293 return std::make_pair (i, i); 14294 14295 /* Avoid infinite splitter loops if any insn of the result matches 14296 the original pattern. */ 14297 for (rtx_insn* s = seq; s != NULL; s = NEXT_INSN (s)) 14298 if (INSN_P (s) && rtx_equal_p (PATTERN (s), PATTERN (i))) 14299 return std::make_pair (i, i); 14300 14301 unshare_all_rtl_in_chain (seq); 14302 14303 /* 'seq' is now a replacement for 'i'. Assuming that 'i' is an insn in 14304 a linked list, replace the single insn with the new insns. */ 14305 rtx_insn* seqlast = seq; 14306 while (NEXT_INSN (seqlast) != NULL) 14307 seqlast = NEXT_INSN (seqlast); 14308 14309 if (rtx_insn* iprev = PREV_INSN (i)) 14310 SET_NEXT_INSN (iprev) = seq; 14311 if (rtx_insn* inext = NEXT_INSN (i)) 14312 SET_PREV_INSN (inext) = seqlast; 14313 14314 SET_PREV_INSN (seq) = PREV_INSN (i); 14315 SET_NEXT_INSN (seqlast) = NEXT_INSN (i); 14316 14317 SET_PREV_INSN (i) = NULL; 14318 SET_NEXT_INSN (i) = NULL; 14319 14320 /* Recursively split all insns. */ 14321 for (i = seq; ; i = NEXT_INSN (i)) 14322 { 14323 std::pair<rtx_insn*, rtx_insn*> ii = 14324 sh_try_split_insn_simple (i, curr_insn, n + 1); 14325 if (i == seq) 14326 seq = ii.first; 14327 if (i == seqlast) 14328 { 14329 seqlast = ii.second; 14330 break; 14331 } 14332 i = ii.first; 14333 } 14334 14335 return std::make_pair (seq, seqlast); 14336} 14337 14338sh_treg_insns 14339sh_split_treg_set_expr (rtx x, rtx_insn* curr_insn) 14340{ 14341 if (t_reg_operand (x, VOIDmode)) 14342 return sh_treg_insns (); 14343 14344 scope_counter in_treg_set_expr (sh_recog_treg_set_expr_reent_count); 14345 14346 rtx_insn* i = make_insn_raw (gen_rtx_SET (VOIDmode, get_t_reg_rtx (), x)); 14347 SET_PREV_INSN (i) = NULL; 14348 SET_NEXT_INSN (i) = NULL; 14349 14350 if (dump_file) 14351 { 14352 fprintf (dump_file, "split_treg_set_expr insn:\n"); 14353 print_rtl (dump_file, i); 14354 fprintf (dump_file, "\n"); 14355 } 14356 14357 /* We are going to invoke recog/split_insns in a re-entrant way and thus 14358 have to capture its current state and restore it afterwards. */ 14359 recog_data_d prev_recog_data = recog_data; 14360 14361 int insn_code = recog (PATTERN (i), i, 0); 14362 14363 /* If the insn was not found, see if we matched the negated form before 14364 and append a nott. */ 14365 bool append_nott = false; 14366 14367 if (insn_code < 0 && GET_CODE (x) == EQ) 14368 { 14369 PUT_CODE (x, NE); 14370 insn_code = recog (PATTERN (i), i, 0); 14371 if (insn_code >= 0) 14372 append_nott = true; 14373 else 14374 PUT_CODE (x, EQ); 14375 } 14376 if (insn_code < 0 && GET_CODE (x) == NE) 14377 { 14378 PUT_CODE (x, EQ); 14379 insn_code = recog (PATTERN (i), i, 0); 14380 if (insn_code >= 0) 14381 append_nott = true; 14382 else 14383 PUT_CODE (x, NE); 14384 } 14385 14386 gcc_assert (insn_code >= 0); 14387 14388 /* Try to recursively split the insn. Some insns might refuse to split 14389 any further while we are in the treg_set_expr splitting phase. They 14390 will be emitted as part of the outer insn and then split again. */ 14391 std::pair<rtx_insn*, rtx_insn*> insnlist = 14392 sh_try_split_insn_simple (i, curr_insn); 14393 14394 /* Restore recog state. */ 14395 recog_data = prev_recog_data; 14396 14397 rtx_insn* nott_insn = sh_is_nott_insn (insnlist.second) 14398 ? insnlist.second 14399 : NULL; 14400 if (dump_file) 14401 { 14402 fprintf (dump_file, "split_treg_set_expr insnlist:\n"); 14403 print_rtl (dump_file, insnlist.first); 14404 fprintf (dump_file, "\n"); 14405 14406 if (nott_insn != NULL) 14407 fprintf (dump_file, "trailing nott insn %d\n", INSN_UID (nott_insn)); 14408 } 14409 14410 emit_insn (insnlist.first); 14411 14412 if (nott_insn != NULL && append_nott) 14413 { 14414 if (dump_file) 14415 fprintf (dump_file, "removing trailing nott\n"); 14416 remove_insn (nott_insn); 14417 nott_insn = NULL; 14418 append_nott = false; 14419 } 14420 14421 if (append_nott) 14422 nott_insn = emit_insn (gen_nott (get_t_reg_rtx ())); 14423 14424 rtx_insn* first_insn = get_insns (); 14425 14426 if (dump_file) 14427 { 14428 fprintf (dump_file, "resulting insns:\n"); 14429 print_rtl (dump_file, first_insn); 14430 fprintf (dump_file, "\n"); 14431 } 14432 14433 return sh_treg_insns (first_insn, nott_insn); 14434} 14435 14436/*------------------------------------------------------------------------------ 14437 Mode switching support code. 14438*/ 14439 14440static void 14441sh_emit_mode_set (int entity ATTRIBUTE_UNUSED, int mode, 14442 int prev_mode, HARD_REG_SET regs_live ATTRIBUTE_UNUSED) 14443{ 14444 if ((TARGET_SH4A_FP || TARGET_SH4_300) 14445 && prev_mode != FP_MODE_NONE && prev_mode != mode) 14446 { 14447 emit_insn (gen_toggle_pr ()); 14448 if (TARGET_FMOVD) 14449 emit_insn (gen_toggle_sz ()); 14450 } 14451 else if (mode != FP_MODE_NONE) 14452 { 14453 rtx tmp = gen_reg_rtx (SImode); 14454 emit_insn (gen_sts_fpscr (tmp)); 14455 rtx i = NULL; 14456 14457 const unsigned HOST_WIDE_INT fpbits = 14458 TARGET_FMOVD ? (FPSCR_PR | FPSCR_SZ) : FPSCR_PR; 14459 14460 if (prev_mode != FP_MODE_NONE && prev_mode != mode) 14461 i = gen_xorsi3 (tmp, tmp, force_reg (SImode, GEN_INT (fpbits))); 14462 else if (mode == FP_MODE_SINGLE) 14463 i = gen_andsi3 (tmp, tmp, force_reg (SImode, GEN_INT (~fpbits))); 14464 else if (mode == FP_MODE_DOUBLE) 14465 i = gen_iorsi3 (tmp, tmp, force_reg (SImode, GEN_INT (fpbits))); 14466 else 14467 gcc_unreachable (); 14468 14469 emit_insn (i); 14470 emit_insn (gen_lds_fpscr (tmp)); 14471 } 14472} 14473 14474static int 14475sh_mode_needed (int entity ATTRIBUTE_UNUSED, rtx_insn *insn) 14476{ 14477 return recog_memoized (insn) >= 0 ? get_attr_fp_mode (insn) : FP_MODE_NONE; 14478} 14479 14480static int 14481sh_mode_after (int entity ATTRIBUTE_UNUSED, int mode, rtx_insn *insn) 14482{ 14483 if (TARGET_HITACHI && recog_memoized (insn) >= 0 && 14484 get_attr_fp_set (insn) != FP_SET_NONE) 14485 return (int) get_attr_fp_set (insn); 14486 else 14487 return mode; 14488} 14489 14490static int 14491sh_mode_entry (int entity ATTRIBUTE_UNUSED) 14492{ 14493 return NORMAL_MODE (entity); 14494} 14495 14496static int 14497sh_mode_exit (int entity ATTRIBUTE_UNUSED) 14498{ 14499 return sh_cfun_attr_renesas_p () ? FP_MODE_NONE : NORMAL_MODE (entity); 14500} 14501 14502static int 14503sh_mode_priority (int entity ATTRIBUTE_UNUSED, int n) 14504{ 14505 return ((TARGET_FPU_SINGLE != 0) ^ (n) ? FP_MODE_SINGLE : FP_MODE_DOUBLE); 14506} 14507 14508/*------------------------------------------------------------------------------ 14509 Misc 14510*/ 14511 14512/* Return true if we use LRA instead of reload pass. */ 14513bool 14514sh_lra_p (void) 14515{ 14516 return sh_lra_flag; 14517} 14518 14519/* Implement TARGET_USE_BY_PIECES_INFRASTRUCTURE_P. */ 14520 14521static bool 14522sh_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size, 14523 unsigned int align, 14524 enum by_pieces_operation op, 14525 bool speed_p) 14526{ 14527 switch (op) 14528 { 14529 case MOVE_BY_PIECES: 14530 return move_by_pieces_ninsns (size, align, MOVE_MAX_PIECES + 1) 14531 < (!speed_p ? 2 : (align >= 32) ? 16 : 2); 14532 case STORE_BY_PIECES: 14533 case SET_BY_PIECES: 14534 return move_by_pieces_ninsns (size, align, STORE_MAX_PIECES + 1) 14535 < (!speed_p ? 2 : (align >= 32) ? 16 : 2); 14536 default: 14537 return default_use_by_pieces_infrastructure_p (size, align, 14538 op, speed_p); 14539 } 14540} 14541 14542#include "gt-sh.h" 14543