1/* Subroutines for insn-output.c for HPPA. 2 Copyright (C) 1992-2015 Free Software Foundation, Inc. 3 Contributed by Tim Moore (moore@cs.utah.edu), based on sparc.c 4 5This file is part of GCC. 6 7GCC is free software; you can redistribute it and/or modify 8it under the terms of the GNU General Public License as published by 9the Free Software Foundation; either version 3, or (at your option) 10any later version. 11 12GCC is distributed in the hope that it will be useful, 13but WITHOUT ANY WARRANTY; without even the implied warranty of 14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15GNU General Public License for more details. 16 17You should have received a copy of the GNU General Public License 18along with GCC; see the file COPYING3. If not see 19<http://www.gnu.org/licenses/>. */ 20 21#include "config.h" 22#include "system.h" 23#include "coretypes.h" 24#include "tm.h" 25#include "rtl.h" 26#include "regs.h" 27#include "hard-reg-set.h" 28#include "insn-config.h" 29#include "conditions.h" 30#include "insn-attr.h" 31#include "flags.h" 32#include "hash-set.h" 33#include "machmode.h" 34#include "vec.h" 35#include "double-int.h" 36#include "input.h" 37#include "alias.h" 38#include "symtab.h" 39#include "wide-int.h" 40#include "inchash.h" 41#include "tree.h" 42#include "fold-const.h" 43#include "stor-layout.h" 44#include "stringpool.h" 45#include "varasm.h" 46#include "calls.h" 47#include "output.h" 48#include "dbxout.h" 49#include "except.h" 50#include "hashtab.h" 51#include "function.h" 52#include "statistics.h" 53#include "real.h" 54#include "fixed-value.h" 55#include "expmed.h" 56#include "dojump.h" 57#include "explow.h" 58#include "emit-rtl.h" 59#include "stmt.h" 60#include "expr.h" 61#include "insn-codes.h" 62#include "optabs.h" 63#include "reload.h" 64#include "diagnostic-core.h" 65#include "ggc.h" 66#include "recog.h" 67#include "predict.h" 68#include "tm_p.h" 69#include "target.h" 70#include "common/common-target.h" 71#include "target-def.h" 72#include "langhooks.h" 73#include "dominance.h" 74#include "cfg.h" 75#include "cfgrtl.h" 76#include "cfganal.h" 77#include "lcm.h" 78#include "cfgbuild.h" 79#include "cfgcleanup.h" 80#include "basic-block.h" 81#include "df.h" 82#include "opts.h" 83#include "builtins.h" 84 85/* Return nonzero if there is a bypass for the output of 86 OUT_INSN and the fp store IN_INSN. */ 87int 88pa_fpstore_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn) 89{ 90 machine_mode store_mode; 91 machine_mode other_mode; 92 rtx set; 93 94 if (recog_memoized (in_insn) < 0 95 || (get_attr_type (in_insn) != TYPE_FPSTORE 96 && get_attr_type (in_insn) != TYPE_FPSTORE_LOAD) 97 || recog_memoized (out_insn) < 0) 98 return 0; 99 100 store_mode = GET_MODE (SET_SRC (PATTERN (in_insn))); 101 102 set = single_set (out_insn); 103 if (!set) 104 return 0; 105 106 other_mode = GET_MODE (SET_SRC (set)); 107 108 return (GET_MODE_SIZE (store_mode) == GET_MODE_SIZE (other_mode)); 109} 110 111 112#ifndef DO_FRAME_NOTES 113#ifdef INCOMING_RETURN_ADDR_RTX 114#define DO_FRAME_NOTES 1 115#else 116#define DO_FRAME_NOTES 0 117#endif 118#endif 119 120static void pa_option_override (void); 121static void copy_reg_pointer (rtx, rtx); 122static void fix_range (const char *); 123static int hppa_register_move_cost (machine_mode mode, reg_class_t, 124 reg_class_t); 125static int hppa_address_cost (rtx, machine_mode mode, addr_space_t, bool); 126static bool hppa_rtx_costs (rtx, int, int, int, int *, bool); 127static inline rtx force_mode (machine_mode, rtx); 128static void pa_reorg (void); 129static void pa_combine_instructions (void); 130static int pa_can_combine_p (rtx_insn *, rtx_insn *, rtx_insn *, int, rtx, 131 rtx, rtx); 132static bool forward_branch_p (rtx_insn *); 133static void compute_zdepwi_operands (unsigned HOST_WIDE_INT, unsigned *); 134static void compute_zdepdi_operands (unsigned HOST_WIDE_INT, unsigned *); 135static int compute_movmem_length (rtx_insn *); 136static int compute_clrmem_length (rtx_insn *); 137static bool pa_assemble_integer (rtx, unsigned int, int); 138static void remove_useless_addtr_insns (int); 139static void store_reg (int, HOST_WIDE_INT, int); 140static void store_reg_modify (int, int, HOST_WIDE_INT); 141static void load_reg (int, HOST_WIDE_INT, int); 142static void set_reg_plus_d (int, int, HOST_WIDE_INT, int); 143static rtx pa_function_value (const_tree, const_tree, bool); 144static rtx pa_libcall_value (machine_mode, const_rtx); 145static bool pa_function_value_regno_p (const unsigned int); 146static void pa_output_function_prologue (FILE *, HOST_WIDE_INT); 147static void update_total_code_bytes (unsigned int); 148static void pa_output_function_epilogue (FILE *, HOST_WIDE_INT); 149static int pa_adjust_cost (rtx_insn *, rtx, rtx_insn *, int); 150static int pa_adjust_priority (rtx_insn *, int); 151static int pa_issue_rate (void); 152static int pa_reloc_rw_mask (void); 153static void pa_som_asm_init_sections (void) ATTRIBUTE_UNUSED; 154static section *pa_som_tm_clone_table_section (void) ATTRIBUTE_UNUSED; 155static section *pa_select_section (tree, int, unsigned HOST_WIDE_INT) 156 ATTRIBUTE_UNUSED; 157static void pa_encode_section_info (tree, rtx, int); 158static const char *pa_strip_name_encoding (const char *); 159static bool pa_function_ok_for_sibcall (tree, tree); 160static void pa_globalize_label (FILE *, const char *) 161 ATTRIBUTE_UNUSED; 162static void pa_asm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, 163 HOST_WIDE_INT, tree); 164#if !defined(USE_COLLECT2) 165static void pa_asm_out_constructor (rtx, int); 166static void pa_asm_out_destructor (rtx, int); 167#endif 168static void pa_init_builtins (void); 169static rtx pa_expand_builtin (tree, rtx, rtx, machine_mode mode, int); 170static rtx hppa_builtin_saveregs (void); 171static void hppa_va_start (tree, rtx); 172static tree hppa_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *); 173static bool pa_scalar_mode_supported_p (machine_mode); 174static bool pa_commutative_p (const_rtx x, int outer_code); 175static void copy_fp_args (rtx_insn *) ATTRIBUTE_UNUSED; 176static int length_fp_args (rtx_insn *) ATTRIBUTE_UNUSED; 177static rtx hppa_legitimize_address (rtx, rtx, machine_mode); 178static inline void pa_file_start_level (void) ATTRIBUTE_UNUSED; 179static inline void pa_file_start_space (int) ATTRIBUTE_UNUSED; 180static inline void pa_file_start_file (int) ATTRIBUTE_UNUSED; 181static inline void pa_file_start_mcount (const char*) ATTRIBUTE_UNUSED; 182static void pa_elf_file_start (void) ATTRIBUTE_UNUSED; 183static void pa_som_file_start (void) ATTRIBUTE_UNUSED; 184static void pa_linux_file_start (void) ATTRIBUTE_UNUSED; 185static void pa_hpux64_gas_file_start (void) ATTRIBUTE_UNUSED; 186static void pa_hpux64_hpas_file_start (void) ATTRIBUTE_UNUSED; 187static void output_deferred_plabels (void); 188static void output_deferred_profile_counters (void) ATTRIBUTE_UNUSED; 189#ifdef ASM_OUTPUT_EXTERNAL_REAL 190static void pa_hpux_file_end (void); 191#endif 192static void pa_init_libfuncs (void); 193static rtx pa_struct_value_rtx (tree, int); 194static bool pa_pass_by_reference (cumulative_args_t, machine_mode, 195 const_tree, bool); 196static int pa_arg_partial_bytes (cumulative_args_t, machine_mode, 197 tree, bool); 198static void pa_function_arg_advance (cumulative_args_t, machine_mode, 199 const_tree, bool); 200static rtx pa_function_arg (cumulative_args_t, machine_mode, 201 const_tree, bool); 202static unsigned int pa_function_arg_boundary (machine_mode, const_tree); 203static struct machine_function * pa_init_machine_status (void); 204static reg_class_t pa_secondary_reload (bool, rtx, reg_class_t, 205 machine_mode, 206 secondary_reload_info *); 207static void pa_extra_live_on_entry (bitmap); 208static machine_mode pa_promote_function_mode (const_tree, 209 machine_mode, int *, 210 const_tree, int); 211 212static void pa_asm_trampoline_template (FILE *); 213static void pa_trampoline_init (rtx, tree, rtx); 214static rtx pa_trampoline_adjust_address (rtx); 215static rtx pa_delegitimize_address (rtx); 216static bool pa_print_operand_punct_valid_p (unsigned char); 217static rtx pa_internal_arg_pointer (void); 218static bool pa_can_eliminate (const int, const int); 219static void pa_conditional_register_usage (void); 220static machine_mode pa_c_mode_for_suffix (char); 221static section *pa_function_section (tree, enum node_frequency, bool, bool); 222static bool pa_cannot_force_const_mem (machine_mode, rtx); 223static bool pa_legitimate_constant_p (machine_mode, rtx); 224static unsigned int pa_section_type_flags (tree, const char *, int); 225static bool pa_legitimate_address_p (machine_mode, rtx, bool); 226 227/* The following extra sections are only used for SOM. */ 228static GTY(()) section *som_readonly_data_section; 229static GTY(()) section *som_one_only_readonly_data_section; 230static GTY(()) section *som_one_only_data_section; 231static GTY(()) section *som_tm_clone_table_section; 232 233/* Counts for the number of callee-saved general and floating point 234 registers which were saved by the current function's prologue. */ 235static int gr_saved, fr_saved; 236 237/* Boolean indicating whether the return pointer was saved by the 238 current function's prologue. */ 239static bool rp_saved; 240 241static rtx find_addr_reg (rtx); 242 243/* Keep track of the number of bytes we have output in the CODE subspace 244 during this compilation so we'll know when to emit inline long-calls. */ 245unsigned long total_code_bytes; 246 247/* The last address of the previous function plus the number of bytes in 248 associated thunks that have been output. This is used to determine if 249 a thunk can use an IA-relative branch to reach its target function. */ 250static unsigned int last_address; 251 252/* Variables to handle plabels that we discover are necessary at assembly 253 output time. They are output after the current function. */ 254struct GTY(()) deferred_plabel 255{ 256 rtx internal_label; 257 rtx symbol; 258}; 259static GTY((length ("n_deferred_plabels"))) struct deferred_plabel * 260 deferred_plabels; 261static size_t n_deferred_plabels = 0; 262 263/* Initialize the GCC target structure. */ 264 265#undef TARGET_OPTION_OVERRIDE 266#define TARGET_OPTION_OVERRIDE pa_option_override 267 268#undef TARGET_ASM_ALIGNED_HI_OP 269#define TARGET_ASM_ALIGNED_HI_OP "\t.half\t" 270#undef TARGET_ASM_ALIGNED_SI_OP 271#define TARGET_ASM_ALIGNED_SI_OP "\t.word\t" 272#undef TARGET_ASM_ALIGNED_DI_OP 273#define TARGET_ASM_ALIGNED_DI_OP "\t.dword\t" 274#undef TARGET_ASM_UNALIGNED_HI_OP 275#define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP 276#undef TARGET_ASM_UNALIGNED_SI_OP 277#define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP 278#undef TARGET_ASM_UNALIGNED_DI_OP 279#define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP 280#undef TARGET_ASM_INTEGER 281#define TARGET_ASM_INTEGER pa_assemble_integer 282 283#undef TARGET_ASM_FUNCTION_PROLOGUE 284#define TARGET_ASM_FUNCTION_PROLOGUE pa_output_function_prologue 285#undef TARGET_ASM_FUNCTION_EPILOGUE 286#define TARGET_ASM_FUNCTION_EPILOGUE pa_output_function_epilogue 287 288#undef TARGET_FUNCTION_VALUE 289#define TARGET_FUNCTION_VALUE pa_function_value 290#undef TARGET_LIBCALL_VALUE 291#define TARGET_LIBCALL_VALUE pa_libcall_value 292#undef TARGET_FUNCTION_VALUE_REGNO_P 293#define TARGET_FUNCTION_VALUE_REGNO_P pa_function_value_regno_p 294 295#undef TARGET_LEGITIMIZE_ADDRESS 296#define TARGET_LEGITIMIZE_ADDRESS hppa_legitimize_address 297 298#undef TARGET_SCHED_ADJUST_COST 299#define TARGET_SCHED_ADJUST_COST pa_adjust_cost 300#undef TARGET_SCHED_ADJUST_PRIORITY 301#define TARGET_SCHED_ADJUST_PRIORITY pa_adjust_priority 302#undef TARGET_SCHED_ISSUE_RATE 303#define TARGET_SCHED_ISSUE_RATE pa_issue_rate 304 305#undef TARGET_ENCODE_SECTION_INFO 306#define TARGET_ENCODE_SECTION_INFO pa_encode_section_info 307#undef TARGET_STRIP_NAME_ENCODING 308#define TARGET_STRIP_NAME_ENCODING pa_strip_name_encoding 309 310#undef TARGET_FUNCTION_OK_FOR_SIBCALL 311#define TARGET_FUNCTION_OK_FOR_SIBCALL pa_function_ok_for_sibcall 312 313#undef TARGET_COMMUTATIVE_P 314#define TARGET_COMMUTATIVE_P pa_commutative_p 315 316#undef TARGET_ASM_OUTPUT_MI_THUNK 317#define TARGET_ASM_OUTPUT_MI_THUNK pa_asm_output_mi_thunk 318#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK 319#define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall 320 321#undef TARGET_ASM_FILE_END 322#ifdef ASM_OUTPUT_EXTERNAL_REAL 323#define TARGET_ASM_FILE_END pa_hpux_file_end 324#else 325#define TARGET_ASM_FILE_END output_deferred_plabels 326#endif 327 328#undef TARGET_ASM_RELOC_RW_MASK 329#define TARGET_ASM_RELOC_RW_MASK pa_reloc_rw_mask 330 331#undef TARGET_PRINT_OPERAND_PUNCT_VALID_P 332#define TARGET_PRINT_OPERAND_PUNCT_VALID_P pa_print_operand_punct_valid_p 333 334#if !defined(USE_COLLECT2) 335#undef TARGET_ASM_CONSTRUCTOR 336#define TARGET_ASM_CONSTRUCTOR pa_asm_out_constructor 337#undef TARGET_ASM_DESTRUCTOR 338#define TARGET_ASM_DESTRUCTOR pa_asm_out_destructor 339#endif 340 341#undef TARGET_INIT_BUILTINS 342#define TARGET_INIT_BUILTINS pa_init_builtins 343 344#undef TARGET_EXPAND_BUILTIN 345#define TARGET_EXPAND_BUILTIN pa_expand_builtin 346 347#undef TARGET_REGISTER_MOVE_COST 348#define TARGET_REGISTER_MOVE_COST hppa_register_move_cost 349#undef TARGET_RTX_COSTS 350#define TARGET_RTX_COSTS hppa_rtx_costs 351#undef TARGET_ADDRESS_COST 352#define TARGET_ADDRESS_COST hppa_address_cost 353 354#undef TARGET_MACHINE_DEPENDENT_REORG 355#define TARGET_MACHINE_DEPENDENT_REORG pa_reorg 356 357#undef TARGET_INIT_LIBFUNCS 358#define TARGET_INIT_LIBFUNCS pa_init_libfuncs 359 360#undef TARGET_PROMOTE_FUNCTION_MODE 361#define TARGET_PROMOTE_FUNCTION_MODE pa_promote_function_mode 362#undef TARGET_PROMOTE_PROTOTYPES 363#define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true 364 365#undef TARGET_STRUCT_VALUE_RTX 366#define TARGET_STRUCT_VALUE_RTX pa_struct_value_rtx 367#undef TARGET_RETURN_IN_MEMORY 368#define TARGET_RETURN_IN_MEMORY pa_return_in_memory 369#undef TARGET_MUST_PASS_IN_STACK 370#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size 371#undef TARGET_PASS_BY_REFERENCE 372#define TARGET_PASS_BY_REFERENCE pa_pass_by_reference 373#undef TARGET_CALLEE_COPIES 374#define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_true 375#undef TARGET_ARG_PARTIAL_BYTES 376#define TARGET_ARG_PARTIAL_BYTES pa_arg_partial_bytes 377#undef TARGET_FUNCTION_ARG 378#define TARGET_FUNCTION_ARG pa_function_arg 379#undef TARGET_FUNCTION_ARG_ADVANCE 380#define TARGET_FUNCTION_ARG_ADVANCE pa_function_arg_advance 381#undef TARGET_FUNCTION_ARG_BOUNDARY 382#define TARGET_FUNCTION_ARG_BOUNDARY pa_function_arg_boundary 383 384#undef TARGET_EXPAND_BUILTIN_SAVEREGS 385#define TARGET_EXPAND_BUILTIN_SAVEREGS hppa_builtin_saveregs 386#undef TARGET_EXPAND_BUILTIN_VA_START 387#define TARGET_EXPAND_BUILTIN_VA_START hppa_va_start 388#undef TARGET_GIMPLIFY_VA_ARG_EXPR 389#define TARGET_GIMPLIFY_VA_ARG_EXPR hppa_gimplify_va_arg_expr 390 391#undef TARGET_SCALAR_MODE_SUPPORTED_P 392#define TARGET_SCALAR_MODE_SUPPORTED_P pa_scalar_mode_supported_p 393 394#undef TARGET_CANNOT_FORCE_CONST_MEM 395#define TARGET_CANNOT_FORCE_CONST_MEM pa_cannot_force_const_mem 396 397#undef TARGET_SECONDARY_RELOAD 398#define TARGET_SECONDARY_RELOAD pa_secondary_reload 399 400#undef TARGET_EXTRA_LIVE_ON_ENTRY 401#define TARGET_EXTRA_LIVE_ON_ENTRY pa_extra_live_on_entry 402 403#undef TARGET_ASM_TRAMPOLINE_TEMPLATE 404#define TARGET_ASM_TRAMPOLINE_TEMPLATE pa_asm_trampoline_template 405#undef TARGET_TRAMPOLINE_INIT 406#define TARGET_TRAMPOLINE_INIT pa_trampoline_init 407#undef TARGET_TRAMPOLINE_ADJUST_ADDRESS 408#define TARGET_TRAMPOLINE_ADJUST_ADDRESS pa_trampoline_adjust_address 409#undef TARGET_DELEGITIMIZE_ADDRESS 410#define TARGET_DELEGITIMIZE_ADDRESS pa_delegitimize_address 411#undef TARGET_INTERNAL_ARG_POINTER 412#define TARGET_INTERNAL_ARG_POINTER pa_internal_arg_pointer 413#undef TARGET_CAN_ELIMINATE 414#define TARGET_CAN_ELIMINATE pa_can_eliminate 415#undef TARGET_CONDITIONAL_REGISTER_USAGE 416#define TARGET_CONDITIONAL_REGISTER_USAGE pa_conditional_register_usage 417#undef TARGET_C_MODE_FOR_SUFFIX 418#define TARGET_C_MODE_FOR_SUFFIX pa_c_mode_for_suffix 419#undef TARGET_ASM_FUNCTION_SECTION 420#define TARGET_ASM_FUNCTION_SECTION pa_function_section 421 422#undef TARGET_LEGITIMATE_CONSTANT_P 423#define TARGET_LEGITIMATE_CONSTANT_P pa_legitimate_constant_p 424#undef TARGET_SECTION_TYPE_FLAGS 425#define TARGET_SECTION_TYPE_FLAGS pa_section_type_flags 426#undef TARGET_LEGITIMATE_ADDRESS_P 427#define TARGET_LEGITIMATE_ADDRESS_P pa_legitimate_address_p 428 429struct gcc_target targetm = TARGET_INITIALIZER; 430 431/* Parse the -mfixed-range= option string. */ 432 433static void 434fix_range (const char *const_str) 435{ 436 int i, first, last; 437 char *str, *dash, *comma; 438 439 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and 440 REG2 are either register names or register numbers. The effect 441 of this option is to mark the registers in the range from REG1 to 442 REG2 as ``fixed'' so they won't be used by the compiler. This is 443 used, e.g., to ensure that kernel mode code doesn't use fr4-fr31. */ 444 445 i = strlen (const_str); 446 str = (char *) alloca (i + 1); 447 memcpy (str, const_str, i + 1); 448 449 while (1) 450 { 451 dash = strchr (str, '-'); 452 if (!dash) 453 { 454 warning (0, "value of -mfixed-range must have form REG1-REG2"); 455 return; 456 } 457 *dash = '\0'; 458 459 comma = strchr (dash + 1, ','); 460 if (comma) 461 *comma = '\0'; 462 463 first = decode_reg_name (str); 464 if (first < 0) 465 { 466 warning (0, "unknown register name: %s", str); 467 return; 468 } 469 470 last = decode_reg_name (dash + 1); 471 if (last < 0) 472 { 473 warning (0, "unknown register name: %s", dash + 1); 474 return; 475 } 476 477 *dash = '-'; 478 479 if (first > last) 480 { 481 warning (0, "%s-%s is an empty range", str, dash + 1); 482 return; 483 } 484 485 for (i = first; i <= last; ++i) 486 fixed_regs[i] = call_used_regs[i] = 1; 487 488 if (!comma) 489 break; 490 491 *comma = ','; 492 str = comma + 1; 493 } 494 495 /* Check if all floating point registers have been fixed. */ 496 for (i = FP_REG_FIRST; i <= FP_REG_LAST; i++) 497 if (!fixed_regs[i]) 498 break; 499 500 if (i > FP_REG_LAST) 501 target_flags |= MASK_DISABLE_FPREGS; 502} 503 504/* Implement the TARGET_OPTION_OVERRIDE hook. */ 505 506static void 507pa_option_override (void) 508{ 509 unsigned int i; 510 cl_deferred_option *opt; 511 vec<cl_deferred_option> *v 512 = (vec<cl_deferred_option> *) pa_deferred_options; 513 514 if (v) 515 FOR_EACH_VEC_ELT (*v, i, opt) 516 { 517 switch (opt->opt_index) 518 { 519 case OPT_mfixed_range_: 520 fix_range (opt->arg); 521 break; 522 523 default: 524 gcc_unreachable (); 525 } 526 } 527 528 if (flag_pic && TARGET_PORTABLE_RUNTIME) 529 { 530 warning (0, "PIC code generation is not supported in the portable runtime model"); 531 } 532 533 if (flag_pic && TARGET_FAST_INDIRECT_CALLS) 534 { 535 warning (0, "PIC code generation is not compatible with fast indirect calls"); 536 } 537 538 if (! TARGET_GAS && write_symbols != NO_DEBUG) 539 { 540 warning (0, "-g is only supported when using GAS on this processor,"); 541 warning (0, "-g option disabled"); 542 write_symbols = NO_DEBUG; 543 } 544 545 /* We only support the "big PIC" model now. And we always generate PIC 546 code when in 64bit mode. */ 547 if (flag_pic == 1 || TARGET_64BIT) 548 flag_pic = 2; 549 550 /* Disable -freorder-blocks-and-partition as we don't support hot and 551 cold partitioning. */ 552 if (flag_reorder_blocks_and_partition) 553 { 554 inform (input_location, 555 "-freorder-blocks-and-partition does not work " 556 "on this architecture"); 557 flag_reorder_blocks_and_partition = 0; 558 flag_reorder_blocks = 1; 559 } 560 561 /* We can't guarantee that .dword is available for 32-bit targets. */ 562 if (UNITS_PER_WORD == 4) 563 targetm.asm_out.aligned_op.di = NULL; 564 565 /* The unaligned ops are only available when using GAS. */ 566 if (!TARGET_GAS) 567 { 568 targetm.asm_out.unaligned_op.hi = NULL; 569 targetm.asm_out.unaligned_op.si = NULL; 570 targetm.asm_out.unaligned_op.di = NULL; 571 } 572 573 init_machine_status = pa_init_machine_status; 574} 575 576enum pa_builtins 577{ 578 PA_BUILTIN_COPYSIGNQ, 579 PA_BUILTIN_FABSQ, 580 PA_BUILTIN_INFQ, 581 PA_BUILTIN_HUGE_VALQ, 582 PA_BUILTIN_max 583}; 584 585static GTY(()) tree pa_builtins[(int) PA_BUILTIN_max]; 586 587static void 588pa_init_builtins (void) 589{ 590#ifdef DONT_HAVE_FPUTC_UNLOCKED 591 { 592 tree decl = builtin_decl_explicit (BUILT_IN_PUTC_UNLOCKED); 593 set_builtin_decl (BUILT_IN_FPUTC_UNLOCKED, decl, 594 builtin_decl_implicit_p (BUILT_IN_PUTC_UNLOCKED)); 595 } 596#endif 597#if TARGET_HPUX_11 598 { 599 tree decl; 600 601 if ((decl = builtin_decl_explicit (BUILT_IN_FINITE)) != NULL_TREE) 602 set_user_assembler_name (decl, "_Isfinite"); 603 if ((decl = builtin_decl_explicit (BUILT_IN_FINITEF)) != NULL_TREE) 604 set_user_assembler_name (decl, "_Isfinitef"); 605 } 606#endif 607 608 if (HPUX_LONG_DOUBLE_LIBRARY) 609 { 610 tree decl, ftype; 611 612 /* Under HPUX, the __float128 type is a synonym for "long double". */ 613 (*lang_hooks.types.register_builtin_type) (long_double_type_node, 614 "__float128"); 615 616 /* TFmode support builtins. */ 617 ftype = build_function_type_list (long_double_type_node, 618 long_double_type_node, 619 NULL_TREE); 620 decl = add_builtin_function ("__builtin_fabsq", ftype, 621 PA_BUILTIN_FABSQ, BUILT_IN_MD, 622 "_U_Qfabs", NULL_TREE); 623 TREE_READONLY (decl) = 1; 624 pa_builtins[PA_BUILTIN_FABSQ] = decl; 625 626 ftype = build_function_type_list (long_double_type_node, 627 long_double_type_node, 628 long_double_type_node, 629 NULL_TREE); 630 decl = add_builtin_function ("__builtin_copysignq", ftype, 631 PA_BUILTIN_COPYSIGNQ, BUILT_IN_MD, 632 "_U_Qfcopysign", NULL_TREE); 633 TREE_READONLY (decl) = 1; 634 pa_builtins[PA_BUILTIN_COPYSIGNQ] = decl; 635 636 ftype = build_function_type_list (long_double_type_node, NULL_TREE); 637 decl = add_builtin_function ("__builtin_infq", ftype, 638 PA_BUILTIN_INFQ, BUILT_IN_MD, 639 NULL, NULL_TREE); 640 pa_builtins[PA_BUILTIN_INFQ] = decl; 641 642 decl = add_builtin_function ("__builtin_huge_valq", ftype, 643 PA_BUILTIN_HUGE_VALQ, BUILT_IN_MD, 644 NULL, NULL_TREE); 645 pa_builtins[PA_BUILTIN_HUGE_VALQ] = decl; 646 } 647} 648 649static rtx 650pa_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, 651 machine_mode mode ATTRIBUTE_UNUSED, 652 int ignore ATTRIBUTE_UNUSED) 653{ 654 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); 655 unsigned int fcode = DECL_FUNCTION_CODE (fndecl); 656 657 switch (fcode) 658 { 659 case PA_BUILTIN_FABSQ: 660 case PA_BUILTIN_COPYSIGNQ: 661 return expand_call (exp, target, ignore); 662 663 case PA_BUILTIN_INFQ: 664 case PA_BUILTIN_HUGE_VALQ: 665 { 666 machine_mode target_mode = TYPE_MODE (TREE_TYPE (exp)); 667 REAL_VALUE_TYPE inf; 668 rtx tmp; 669 670 real_inf (&inf); 671 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, target_mode); 672 673 tmp = validize_mem (force_const_mem (target_mode, tmp)); 674 675 if (target == 0) 676 target = gen_reg_rtx (target_mode); 677 678 emit_move_insn (target, tmp); 679 return target; 680 } 681 682 default: 683 gcc_unreachable (); 684 } 685 686 return NULL_RTX; 687} 688 689/* Function to init struct machine_function. 690 This will be called, via a pointer variable, 691 from push_function_context. */ 692 693static struct machine_function * 694pa_init_machine_status (void) 695{ 696 return ggc_cleared_alloc<machine_function> (); 697} 698 699/* If FROM is a probable pointer register, mark TO as a probable 700 pointer register with the same pointer alignment as FROM. */ 701 702static void 703copy_reg_pointer (rtx to, rtx from) 704{ 705 if (REG_POINTER (from)) 706 mark_reg_pointer (to, REGNO_POINTER_ALIGN (REGNO (from))); 707} 708 709/* Return 1 if X contains a symbolic expression. We know these 710 expressions will have one of a few well defined forms, so 711 we need only check those forms. */ 712int 713pa_symbolic_expression_p (rtx x) 714{ 715 716 /* Strip off any HIGH. */ 717 if (GET_CODE (x) == HIGH) 718 x = XEXP (x, 0); 719 720 return symbolic_operand (x, VOIDmode); 721} 722 723/* Accept any constant that can be moved in one instruction into a 724 general register. */ 725int 726pa_cint_ok_for_move (HOST_WIDE_INT ival) 727{ 728 /* OK if ldo, ldil, or zdepi, can be used. */ 729 return (VAL_14_BITS_P (ival) 730 || pa_ldil_cint_p (ival) 731 || pa_zdepi_cint_p (ival)); 732} 733 734/* True iff ldil can be used to load this CONST_INT. The least 735 significant 11 bits of the value must be zero and the value must 736 not change sign when extended from 32 to 64 bits. */ 737int 738pa_ldil_cint_p (HOST_WIDE_INT ival) 739{ 740 HOST_WIDE_INT x = ival & (((HOST_WIDE_INT) -1 << 31) | 0x7ff); 741 742 return x == 0 || x == ((HOST_WIDE_INT) -1 << 31); 743} 744 745/* True iff zdepi can be used to generate this CONST_INT. 746 zdepi first sign extends a 5-bit signed number to a given field 747 length, then places this field anywhere in a zero. */ 748int 749pa_zdepi_cint_p (unsigned HOST_WIDE_INT x) 750{ 751 unsigned HOST_WIDE_INT lsb_mask, t; 752 753 /* This might not be obvious, but it's at least fast. 754 This function is critical; we don't have the time loops would take. */ 755 lsb_mask = x & -x; 756 t = ((x >> 4) + lsb_mask) & ~(lsb_mask - 1); 757 /* Return true iff t is a power of two. */ 758 return ((t & (t - 1)) == 0); 759} 760 761/* True iff depi or extru can be used to compute (reg & mask). 762 Accept bit pattern like these: 763 0....01....1 764 1....10....0 765 1..10..01..1 */ 766int 767pa_and_mask_p (unsigned HOST_WIDE_INT mask) 768{ 769 mask = ~mask; 770 mask += mask & -mask; 771 return (mask & (mask - 1)) == 0; 772} 773 774/* True iff depi can be used to compute (reg | MASK). */ 775int 776pa_ior_mask_p (unsigned HOST_WIDE_INT mask) 777{ 778 mask += mask & -mask; 779 return (mask & (mask - 1)) == 0; 780} 781 782/* Legitimize PIC addresses. If the address is already 783 position-independent, we return ORIG. Newly generated 784 position-independent addresses go to REG. If we need more 785 than one register, we lose. */ 786 787static rtx 788legitimize_pic_address (rtx orig, machine_mode mode, rtx reg) 789{ 790 rtx pic_ref = orig; 791 792 gcc_assert (!PA_SYMBOL_REF_TLS_P (orig)); 793 794 /* Labels need special handling. */ 795 if (pic_label_operand (orig, mode)) 796 { 797 rtx_insn *insn; 798 799 /* We do not want to go through the movXX expanders here since that 800 would create recursion. 801 802 Nor do we really want to call a generator for a named pattern 803 since that requires multiple patterns if we want to support 804 multiple word sizes. 805 806 So instead we just emit the raw set, which avoids the movXX 807 expanders completely. */ 808 mark_reg_pointer (reg, BITS_PER_UNIT); 809 insn = emit_insn (gen_rtx_SET (VOIDmode, reg, orig)); 810 811 /* Put a REG_EQUAL note on this insn, so that it can be optimized. */ 812 add_reg_note (insn, REG_EQUAL, orig); 813 814 /* During and after reload, we need to generate a REG_LABEL_OPERAND note 815 and update LABEL_NUSES because this is not done automatically. */ 816 if (reload_in_progress || reload_completed) 817 { 818 /* Extract LABEL_REF. */ 819 if (GET_CODE (orig) == CONST) 820 orig = XEXP (XEXP (orig, 0), 0); 821 /* Extract CODE_LABEL. */ 822 orig = XEXP (orig, 0); 823 add_reg_note (insn, REG_LABEL_OPERAND, orig); 824 /* Make sure we have label and not a note. */ 825 if (LABEL_P (orig)) 826 LABEL_NUSES (orig)++; 827 } 828 crtl->uses_pic_offset_table = 1; 829 return reg; 830 } 831 if (GET_CODE (orig) == SYMBOL_REF) 832 { 833 rtx_insn *insn; 834 rtx tmp_reg; 835 836 gcc_assert (reg); 837 838 /* Before reload, allocate a temporary register for the intermediate 839 result. This allows the sequence to be deleted when the final 840 result is unused and the insns are trivially dead. */ 841 tmp_reg = ((reload_in_progress || reload_completed) 842 ? reg : gen_reg_rtx (Pmode)); 843 844 if (function_label_operand (orig, VOIDmode)) 845 { 846 /* Force function label into memory in word mode. */ 847 orig = XEXP (force_const_mem (word_mode, orig), 0); 848 /* Load plabel address from DLT. */ 849 emit_move_insn (tmp_reg, 850 gen_rtx_PLUS (word_mode, pic_offset_table_rtx, 851 gen_rtx_HIGH (word_mode, orig))); 852 pic_ref 853 = gen_const_mem (Pmode, 854 gen_rtx_LO_SUM (Pmode, tmp_reg, 855 gen_rtx_UNSPEC (Pmode, 856 gen_rtvec (1, orig), 857 UNSPEC_DLTIND14R))); 858 emit_move_insn (reg, pic_ref); 859 /* Now load address of function descriptor. */ 860 pic_ref = gen_rtx_MEM (Pmode, reg); 861 } 862 else 863 { 864 /* Load symbol reference from DLT. */ 865 emit_move_insn (tmp_reg, 866 gen_rtx_PLUS (word_mode, pic_offset_table_rtx, 867 gen_rtx_HIGH (word_mode, orig))); 868 pic_ref 869 = gen_const_mem (Pmode, 870 gen_rtx_LO_SUM (Pmode, tmp_reg, 871 gen_rtx_UNSPEC (Pmode, 872 gen_rtvec (1, orig), 873 UNSPEC_DLTIND14R))); 874 } 875 876 crtl->uses_pic_offset_table = 1; 877 mark_reg_pointer (reg, BITS_PER_UNIT); 878 insn = emit_move_insn (reg, pic_ref); 879 880 /* Put a REG_EQUAL note on this insn, so that it can be optimized. */ 881 set_unique_reg_note (insn, REG_EQUAL, orig); 882 883 return reg; 884 } 885 else if (GET_CODE (orig) == CONST) 886 { 887 rtx base; 888 889 if (GET_CODE (XEXP (orig, 0)) == PLUS 890 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx) 891 return orig; 892 893 gcc_assert (reg); 894 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS); 895 896 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg); 897 orig = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode, 898 base == reg ? 0 : reg); 899 900 if (GET_CODE (orig) == CONST_INT) 901 { 902 if (INT_14_BITS (orig)) 903 return plus_constant (Pmode, base, INTVAL (orig)); 904 orig = force_reg (Pmode, orig); 905 } 906 pic_ref = gen_rtx_PLUS (Pmode, base, orig); 907 /* Likewise, should we set special REG_NOTEs here? */ 908 } 909 910 return pic_ref; 911} 912 913static GTY(()) rtx gen_tls_tga; 914 915static rtx 916gen_tls_get_addr (void) 917{ 918 if (!gen_tls_tga) 919 gen_tls_tga = init_one_libfunc ("__tls_get_addr"); 920 return gen_tls_tga; 921} 922 923static rtx 924hppa_tls_call (rtx arg) 925{ 926 rtx ret; 927 928 ret = gen_reg_rtx (Pmode); 929 emit_library_call_value (gen_tls_get_addr (), ret, 930 LCT_CONST, Pmode, 1, arg, Pmode); 931 932 return ret; 933} 934 935static rtx 936legitimize_tls_address (rtx addr) 937{ 938 rtx ret, tmp, t1, t2, tp; 939 rtx_insn *insn; 940 941 /* Currently, we can't handle anything but a SYMBOL_REF. */ 942 if (GET_CODE (addr) != SYMBOL_REF) 943 return addr; 944 945 switch (SYMBOL_REF_TLS_MODEL (addr)) 946 { 947 case TLS_MODEL_GLOBAL_DYNAMIC: 948 tmp = gen_reg_rtx (Pmode); 949 if (flag_pic) 950 emit_insn (gen_tgd_load_pic (tmp, addr)); 951 else 952 emit_insn (gen_tgd_load (tmp, addr)); 953 ret = hppa_tls_call (tmp); 954 break; 955 956 case TLS_MODEL_LOCAL_DYNAMIC: 957 ret = gen_reg_rtx (Pmode); 958 tmp = gen_reg_rtx (Pmode); 959 start_sequence (); 960 if (flag_pic) 961 emit_insn (gen_tld_load_pic (tmp, addr)); 962 else 963 emit_insn (gen_tld_load (tmp, addr)); 964 t1 = hppa_tls_call (tmp); 965 insn = get_insns (); 966 end_sequence (); 967 t2 = gen_reg_rtx (Pmode); 968 emit_libcall_block (insn, t2, t1, 969 gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), 970 UNSPEC_TLSLDBASE)); 971 emit_insn (gen_tld_offset_load (ret, addr, t2)); 972 break; 973 974 case TLS_MODEL_INITIAL_EXEC: 975 tp = gen_reg_rtx (Pmode); 976 tmp = gen_reg_rtx (Pmode); 977 ret = gen_reg_rtx (Pmode); 978 emit_insn (gen_tp_load (tp)); 979 if (flag_pic) 980 emit_insn (gen_tie_load_pic (tmp, addr)); 981 else 982 emit_insn (gen_tie_load (tmp, addr)); 983 emit_move_insn (ret, gen_rtx_PLUS (Pmode, tp, tmp)); 984 break; 985 986 case TLS_MODEL_LOCAL_EXEC: 987 tp = gen_reg_rtx (Pmode); 988 ret = gen_reg_rtx (Pmode); 989 emit_insn (gen_tp_load (tp)); 990 emit_insn (gen_tle_load (ret, addr, tp)); 991 break; 992 993 default: 994 gcc_unreachable (); 995 } 996 997 return ret; 998} 999 1000/* Try machine-dependent ways of modifying an illegitimate address 1001 to be legitimate. If we find one, return the new, valid address. 1002 This macro is used in only one place: `memory_address' in explow.c. 1003 1004 OLDX is the address as it was before break_out_memory_refs was called. 1005 In some cases it is useful to look at this to decide what needs to be done. 1006 1007 It is always safe for this macro to do nothing. It exists to recognize 1008 opportunities to optimize the output. 1009 1010 For the PA, transform: 1011 1012 memory(X + <large int>) 1013 1014 into: 1015 1016 if (<large int> & mask) >= 16 1017 Y = (<large int> & ~mask) + mask + 1 Round up. 1018 else 1019 Y = (<large int> & ~mask) Round down. 1020 Z = X + Y 1021 memory (Z + (<large int> - Y)); 1022 1023 This is for CSE to find several similar references, and only use one Z. 1024 1025 X can either be a SYMBOL_REF or REG, but because combine cannot 1026 perform a 4->2 combination we do nothing for SYMBOL_REF + D where 1027 D will not fit in 14 bits. 1028 1029 MODE_FLOAT references allow displacements which fit in 5 bits, so use 1030 0x1f as the mask. 1031 1032 MODE_INT references allow displacements which fit in 14 bits, so use 1033 0x3fff as the mask. 1034 1035 This relies on the fact that most mode MODE_FLOAT references will use FP 1036 registers and most mode MODE_INT references will use integer registers. 1037 (In the rare case of an FP register used in an integer MODE, we depend 1038 on secondary reloads to clean things up.) 1039 1040 1041 It is also beneficial to handle (plus (mult (X) (Y)) (Z)) in a special 1042 manner if Y is 2, 4, or 8. (allows more shadd insns and shifted indexed 1043 addressing modes to be used). 1044 1045 Put X and Z into registers. Then put the entire expression into 1046 a register. */ 1047 1048rtx 1049hppa_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, 1050 machine_mode mode) 1051{ 1052 rtx orig = x; 1053 1054 /* We need to canonicalize the order of operands in unscaled indexed 1055 addresses since the code that checks if an address is valid doesn't 1056 always try both orders. */ 1057 if (!TARGET_NO_SPACE_REGS 1058 && GET_CODE (x) == PLUS 1059 && GET_MODE (x) == Pmode 1060 && REG_P (XEXP (x, 0)) 1061 && REG_P (XEXP (x, 1)) 1062 && REG_POINTER (XEXP (x, 0)) 1063 && !REG_POINTER (XEXP (x, 1))) 1064 return gen_rtx_PLUS (Pmode, XEXP (x, 1), XEXP (x, 0)); 1065 1066 if (tls_referenced_p (x)) 1067 return legitimize_tls_address (x); 1068 else if (flag_pic) 1069 return legitimize_pic_address (x, mode, gen_reg_rtx (Pmode)); 1070 1071 /* Strip off CONST. */ 1072 if (GET_CODE (x) == CONST) 1073 x = XEXP (x, 0); 1074 1075 /* Special case. Get the SYMBOL_REF into a register and use indexing. 1076 That should always be safe. */ 1077 if (GET_CODE (x) == PLUS 1078 && GET_CODE (XEXP (x, 0)) == REG 1079 && GET_CODE (XEXP (x, 1)) == SYMBOL_REF) 1080 { 1081 rtx reg = force_reg (Pmode, XEXP (x, 1)); 1082 return force_reg (Pmode, gen_rtx_PLUS (Pmode, reg, XEXP (x, 0))); 1083 } 1084 1085 /* Note we must reject symbols which represent function addresses 1086 since the assembler/linker can't handle arithmetic on plabels. */ 1087 if (GET_CODE (x) == PLUS 1088 && GET_CODE (XEXP (x, 1)) == CONST_INT 1089 && ((GET_CODE (XEXP (x, 0)) == SYMBOL_REF 1090 && !FUNCTION_NAME_P (XSTR (XEXP (x, 0), 0))) 1091 || GET_CODE (XEXP (x, 0)) == REG)) 1092 { 1093 rtx int_part, ptr_reg; 1094 int newoffset; 1095 int offset = INTVAL (XEXP (x, 1)); 1096 int mask; 1097 1098 mask = (GET_MODE_CLASS (mode) == MODE_FLOAT 1099 && !INT14_OK_STRICT ? 0x1f : 0x3fff); 1100 1101 /* Choose which way to round the offset. Round up if we 1102 are >= halfway to the next boundary. */ 1103 if ((offset & mask) >= ((mask + 1) / 2)) 1104 newoffset = (offset & ~ mask) + mask + 1; 1105 else 1106 newoffset = (offset & ~ mask); 1107 1108 /* If the newoffset will not fit in 14 bits (ldo), then 1109 handling this would take 4 or 5 instructions (2 to load 1110 the SYMBOL_REF + 1 or 2 to load the newoffset + 1 to 1111 add the new offset and the SYMBOL_REF.) Combine can 1112 not handle 4->2 or 5->2 combinations, so do not create 1113 them. */ 1114 if (! VAL_14_BITS_P (newoffset) 1115 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF) 1116 { 1117 rtx const_part = plus_constant (Pmode, XEXP (x, 0), newoffset); 1118 rtx tmp_reg 1119 = force_reg (Pmode, 1120 gen_rtx_HIGH (Pmode, const_part)); 1121 ptr_reg 1122 = force_reg (Pmode, 1123 gen_rtx_LO_SUM (Pmode, 1124 tmp_reg, const_part)); 1125 } 1126 else 1127 { 1128 if (! VAL_14_BITS_P (newoffset)) 1129 int_part = force_reg (Pmode, GEN_INT (newoffset)); 1130 else 1131 int_part = GEN_INT (newoffset); 1132 1133 ptr_reg = force_reg (Pmode, 1134 gen_rtx_PLUS (Pmode, 1135 force_reg (Pmode, XEXP (x, 0)), 1136 int_part)); 1137 } 1138 return plus_constant (Pmode, ptr_reg, offset - newoffset); 1139 } 1140 1141 /* Handle (plus (mult (a) (shadd_constant)) (b)). */ 1142 1143 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT 1144 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT 1145 && pa_shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))) 1146 && (OBJECT_P (XEXP (x, 1)) 1147 || GET_CODE (XEXP (x, 1)) == SUBREG) 1148 && GET_CODE (XEXP (x, 1)) != CONST) 1149 { 1150 int val = INTVAL (XEXP (XEXP (x, 0), 1)); 1151 rtx reg1, reg2; 1152 1153 reg1 = XEXP (x, 1); 1154 if (GET_CODE (reg1) != REG) 1155 reg1 = force_reg (Pmode, force_operand (reg1, 0)); 1156 1157 reg2 = XEXP (XEXP (x, 0), 0); 1158 if (GET_CODE (reg2) != REG) 1159 reg2 = force_reg (Pmode, force_operand (reg2, 0)); 1160 1161 return force_reg (Pmode, gen_rtx_PLUS (Pmode, 1162 gen_rtx_MULT (Pmode, 1163 reg2, 1164 GEN_INT (val)), 1165 reg1)); 1166 } 1167 1168 /* Similarly for (plus (plus (mult (a) (shadd_constant)) (b)) (c)). 1169 1170 Only do so for floating point modes since this is more speculative 1171 and we lose if it's an integer store. */ 1172 if (GET_CODE (x) == PLUS 1173 && GET_CODE (XEXP (x, 0)) == PLUS 1174 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT 1175 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT 1176 && pa_shadd_constant_p (INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1))) 1177 && (mode == SFmode || mode == DFmode)) 1178 { 1179 1180 /* First, try and figure out what to use as a base register. */ 1181 rtx reg1, reg2, base, idx; 1182 1183 reg1 = XEXP (XEXP (x, 0), 1); 1184 reg2 = XEXP (x, 1); 1185 base = NULL_RTX; 1186 idx = NULL_RTX; 1187 1188 /* Make sure they're both regs. If one was a SYMBOL_REF [+ const], 1189 then pa_emit_move_sequence will turn on REG_POINTER so we'll know 1190 it's a base register below. */ 1191 if (GET_CODE (reg1) != REG) 1192 reg1 = force_reg (Pmode, force_operand (reg1, 0)); 1193 1194 if (GET_CODE (reg2) != REG) 1195 reg2 = force_reg (Pmode, force_operand (reg2, 0)); 1196 1197 /* Figure out what the base and index are. */ 1198 1199 if (GET_CODE (reg1) == REG 1200 && REG_POINTER (reg1)) 1201 { 1202 base = reg1; 1203 idx = gen_rtx_PLUS (Pmode, 1204 gen_rtx_MULT (Pmode, 1205 XEXP (XEXP (XEXP (x, 0), 0), 0), 1206 XEXP (XEXP (XEXP (x, 0), 0), 1)), 1207 XEXP (x, 1)); 1208 } 1209 else if (GET_CODE (reg2) == REG 1210 && REG_POINTER (reg2)) 1211 { 1212 base = reg2; 1213 idx = XEXP (x, 0); 1214 } 1215 1216 if (base == 0) 1217 return orig; 1218 1219 /* If the index adds a large constant, try to scale the 1220 constant so that it can be loaded with only one insn. */ 1221 if (GET_CODE (XEXP (idx, 1)) == CONST_INT 1222 && VAL_14_BITS_P (INTVAL (XEXP (idx, 1)) 1223 / INTVAL (XEXP (XEXP (idx, 0), 1))) 1224 && INTVAL (XEXP (idx, 1)) % INTVAL (XEXP (XEXP (idx, 0), 1)) == 0) 1225 { 1226 /* Divide the CONST_INT by the scale factor, then add it to A. */ 1227 int val = INTVAL (XEXP (idx, 1)); 1228 1229 val /= INTVAL (XEXP (XEXP (idx, 0), 1)); 1230 reg1 = XEXP (XEXP (idx, 0), 0); 1231 if (GET_CODE (reg1) != REG) 1232 reg1 = force_reg (Pmode, force_operand (reg1, 0)); 1233 1234 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, reg1, GEN_INT (val))); 1235 1236 /* We can now generate a simple scaled indexed address. */ 1237 return 1238 force_reg 1239 (Pmode, gen_rtx_PLUS (Pmode, 1240 gen_rtx_MULT (Pmode, reg1, 1241 XEXP (XEXP (idx, 0), 1)), 1242 base)); 1243 } 1244 1245 /* If B + C is still a valid base register, then add them. */ 1246 if (GET_CODE (XEXP (idx, 1)) == CONST_INT 1247 && INTVAL (XEXP (idx, 1)) <= 4096 1248 && INTVAL (XEXP (idx, 1)) >= -4096) 1249 { 1250 int val = INTVAL (XEXP (XEXP (idx, 0), 1)); 1251 rtx reg1, reg2; 1252 1253 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, XEXP (idx, 1))); 1254 1255 reg2 = XEXP (XEXP (idx, 0), 0); 1256 if (GET_CODE (reg2) != CONST_INT) 1257 reg2 = force_reg (Pmode, force_operand (reg2, 0)); 1258 1259 return force_reg (Pmode, gen_rtx_PLUS (Pmode, 1260 gen_rtx_MULT (Pmode, 1261 reg2, 1262 GEN_INT (val)), 1263 reg1)); 1264 } 1265 1266 /* Get the index into a register, then add the base + index and 1267 return a register holding the result. */ 1268 1269 /* First get A into a register. */ 1270 reg1 = XEXP (XEXP (idx, 0), 0); 1271 if (GET_CODE (reg1) != REG) 1272 reg1 = force_reg (Pmode, force_operand (reg1, 0)); 1273 1274 /* And get B into a register. */ 1275 reg2 = XEXP (idx, 1); 1276 if (GET_CODE (reg2) != REG) 1277 reg2 = force_reg (Pmode, force_operand (reg2, 0)); 1278 1279 reg1 = force_reg (Pmode, 1280 gen_rtx_PLUS (Pmode, 1281 gen_rtx_MULT (Pmode, reg1, 1282 XEXP (XEXP (idx, 0), 1)), 1283 reg2)); 1284 1285 /* Add the result to our base register and return. */ 1286 return force_reg (Pmode, gen_rtx_PLUS (Pmode, base, reg1)); 1287 1288 } 1289 1290 /* Uh-oh. We might have an address for x[n-100000]. This needs 1291 special handling to avoid creating an indexed memory address 1292 with x-100000 as the base. 1293 1294 If the constant part is small enough, then it's still safe because 1295 there is a guard page at the beginning and end of the data segment. 1296 1297 Scaled references are common enough that we want to try and rearrange the 1298 terms so that we can use indexing for these addresses too. Only 1299 do the optimization for floatint point modes. */ 1300 1301 if (GET_CODE (x) == PLUS 1302 && pa_symbolic_expression_p (XEXP (x, 1))) 1303 { 1304 /* Ugly. We modify things here so that the address offset specified 1305 by the index expression is computed first, then added to x to form 1306 the entire address. */ 1307 1308 rtx regx1, regx2, regy1, regy2, y; 1309 1310 /* Strip off any CONST. */ 1311 y = XEXP (x, 1); 1312 if (GET_CODE (y) == CONST) 1313 y = XEXP (y, 0); 1314 1315 if (GET_CODE (y) == PLUS || GET_CODE (y) == MINUS) 1316 { 1317 /* See if this looks like 1318 (plus (mult (reg) (shadd_const)) 1319 (const (plus (symbol_ref) (const_int)))) 1320 1321 Where const_int is small. In that case the const 1322 expression is a valid pointer for indexing. 1323 1324 If const_int is big, but can be divided evenly by shadd_const 1325 and added to (reg). This allows more scaled indexed addresses. */ 1326 if (GET_CODE (XEXP (y, 0)) == SYMBOL_REF 1327 && GET_CODE (XEXP (x, 0)) == MULT 1328 && GET_CODE (XEXP (y, 1)) == CONST_INT 1329 && INTVAL (XEXP (y, 1)) >= -4096 1330 && INTVAL (XEXP (y, 1)) <= 4095 1331 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT 1332 && pa_shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1)))) 1333 { 1334 int val = INTVAL (XEXP (XEXP (x, 0), 1)); 1335 rtx reg1, reg2; 1336 1337 reg1 = XEXP (x, 1); 1338 if (GET_CODE (reg1) != REG) 1339 reg1 = force_reg (Pmode, force_operand (reg1, 0)); 1340 1341 reg2 = XEXP (XEXP (x, 0), 0); 1342 if (GET_CODE (reg2) != REG) 1343 reg2 = force_reg (Pmode, force_operand (reg2, 0)); 1344 1345 return force_reg (Pmode, 1346 gen_rtx_PLUS (Pmode, 1347 gen_rtx_MULT (Pmode, 1348 reg2, 1349 GEN_INT (val)), 1350 reg1)); 1351 } 1352 else if ((mode == DFmode || mode == SFmode) 1353 && GET_CODE (XEXP (y, 0)) == SYMBOL_REF 1354 && GET_CODE (XEXP (x, 0)) == MULT 1355 && GET_CODE (XEXP (y, 1)) == CONST_INT 1356 && INTVAL (XEXP (y, 1)) % INTVAL (XEXP (XEXP (x, 0), 1)) == 0 1357 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT 1358 && pa_shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1)))) 1359 { 1360 regx1 1361 = force_reg (Pmode, GEN_INT (INTVAL (XEXP (y, 1)) 1362 / INTVAL (XEXP (XEXP (x, 0), 1)))); 1363 regx2 = XEXP (XEXP (x, 0), 0); 1364 if (GET_CODE (regx2) != REG) 1365 regx2 = force_reg (Pmode, force_operand (regx2, 0)); 1366 regx2 = force_reg (Pmode, gen_rtx_fmt_ee (GET_CODE (y), Pmode, 1367 regx2, regx1)); 1368 return 1369 force_reg (Pmode, 1370 gen_rtx_PLUS (Pmode, 1371 gen_rtx_MULT (Pmode, regx2, 1372 XEXP (XEXP (x, 0), 1)), 1373 force_reg (Pmode, XEXP (y, 0)))); 1374 } 1375 else if (GET_CODE (XEXP (y, 1)) == CONST_INT 1376 && INTVAL (XEXP (y, 1)) >= -4096 1377 && INTVAL (XEXP (y, 1)) <= 4095) 1378 { 1379 /* This is safe because of the guard page at the 1380 beginning and end of the data space. Just 1381 return the original address. */ 1382 return orig; 1383 } 1384 else 1385 { 1386 /* Doesn't look like one we can optimize. */ 1387 regx1 = force_reg (Pmode, force_operand (XEXP (x, 0), 0)); 1388 regy1 = force_reg (Pmode, force_operand (XEXP (y, 0), 0)); 1389 regy2 = force_reg (Pmode, force_operand (XEXP (y, 1), 0)); 1390 regx1 = force_reg (Pmode, 1391 gen_rtx_fmt_ee (GET_CODE (y), Pmode, 1392 regx1, regy2)); 1393 return force_reg (Pmode, gen_rtx_PLUS (Pmode, regx1, regy1)); 1394 } 1395 } 1396 } 1397 1398 return orig; 1399} 1400 1401/* Implement the TARGET_REGISTER_MOVE_COST hook. 1402 1403 Compute extra cost of moving data between one register class 1404 and another. 1405 1406 Make moves from SAR so expensive they should never happen. We used to 1407 have 0xffff here, but that generates overflow in rare cases. 1408 1409 Copies involving a FP register and a non-FP register are relatively 1410 expensive because they must go through memory. 1411 1412 Other copies are reasonably cheap. */ 1413 1414static int 1415hppa_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED, 1416 reg_class_t from, reg_class_t to) 1417{ 1418 if (from == SHIFT_REGS) 1419 return 0x100; 1420 else if (to == SHIFT_REGS && FP_REG_CLASS_P (from)) 1421 return 18; 1422 else if ((FP_REG_CLASS_P (from) && ! FP_REG_CLASS_P (to)) 1423 || (FP_REG_CLASS_P (to) && ! FP_REG_CLASS_P (from))) 1424 return 16; 1425 else 1426 return 2; 1427} 1428 1429/* For the HPPA, REG and REG+CONST is cost 0 1430 and addresses involving symbolic constants are cost 2. 1431 1432 PIC addresses are very expensive. 1433 1434 It is no coincidence that this has the same structure 1435 as pa_legitimate_address_p. */ 1436 1437static int 1438hppa_address_cost (rtx X, machine_mode mode ATTRIBUTE_UNUSED, 1439 addr_space_t as ATTRIBUTE_UNUSED, 1440 bool speed ATTRIBUTE_UNUSED) 1441{ 1442 switch (GET_CODE (X)) 1443 { 1444 case REG: 1445 case PLUS: 1446 case LO_SUM: 1447 return 1; 1448 case HIGH: 1449 return 2; 1450 default: 1451 return 4; 1452 } 1453} 1454 1455/* Compute a (partial) cost for rtx X. Return true if the complete 1456 cost has been computed, and false if subexpressions should be 1457 scanned. In either case, *TOTAL contains the cost result. */ 1458 1459static bool 1460hppa_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED, 1461 int *total, bool speed ATTRIBUTE_UNUSED) 1462{ 1463 int factor; 1464 1465 switch (code) 1466 { 1467 case CONST_INT: 1468 if (INTVAL (x) == 0) 1469 *total = 0; 1470 else if (INT_14_BITS (x)) 1471 *total = 1; 1472 else 1473 *total = 2; 1474 return true; 1475 1476 case HIGH: 1477 *total = 2; 1478 return true; 1479 1480 case CONST: 1481 case LABEL_REF: 1482 case SYMBOL_REF: 1483 *total = 4; 1484 return true; 1485 1486 case CONST_DOUBLE: 1487 if ((x == CONST0_RTX (DFmode) || x == CONST0_RTX (SFmode)) 1488 && outer_code != SET) 1489 *total = 0; 1490 else 1491 *total = 8; 1492 return true; 1493 1494 case MULT: 1495 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT) 1496 { 1497 *total = COSTS_N_INSNS (3); 1498 return true; 1499 } 1500 1501 /* A mode size N times larger than SImode needs O(N*N) more insns. */ 1502 factor = GET_MODE_SIZE (GET_MODE (x)) / 4; 1503 if (factor == 0) 1504 factor = 1; 1505 1506 if (TARGET_PA_11 && !TARGET_DISABLE_FPREGS && !TARGET_SOFT_FLOAT) 1507 *total = factor * factor * COSTS_N_INSNS (8); 1508 else 1509 *total = factor * factor * COSTS_N_INSNS (20); 1510 return true; 1511 1512 case DIV: 1513 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT) 1514 { 1515 *total = COSTS_N_INSNS (14); 1516 return true; 1517 } 1518 /* FALLTHRU */ 1519 1520 case UDIV: 1521 case MOD: 1522 case UMOD: 1523 /* A mode size N times larger than SImode needs O(N*N) more insns. */ 1524 factor = GET_MODE_SIZE (GET_MODE (x)) / 4; 1525 if (factor == 0) 1526 factor = 1; 1527 1528 *total = factor * factor * COSTS_N_INSNS (60); 1529 return true; 1530 1531 case PLUS: /* this includes shNadd insns */ 1532 case MINUS: 1533 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT) 1534 { 1535 *total = COSTS_N_INSNS (3); 1536 return true; 1537 } 1538 1539 /* A size N times larger than UNITS_PER_WORD needs N times as 1540 many insns, taking N times as long. */ 1541 factor = GET_MODE_SIZE (GET_MODE (x)) / UNITS_PER_WORD; 1542 if (factor == 0) 1543 factor = 1; 1544 *total = factor * COSTS_N_INSNS (1); 1545 return true; 1546 1547 case ASHIFT: 1548 case ASHIFTRT: 1549 case LSHIFTRT: 1550 *total = COSTS_N_INSNS (1); 1551 return true; 1552 1553 default: 1554 return false; 1555 } 1556} 1557 1558/* Ensure mode of ORIG, a REG rtx, is MODE. Returns either ORIG or a 1559 new rtx with the correct mode. */ 1560static inline rtx 1561force_mode (machine_mode mode, rtx orig) 1562{ 1563 if (mode == GET_MODE (orig)) 1564 return orig; 1565 1566 gcc_assert (REGNO (orig) < FIRST_PSEUDO_REGISTER); 1567 1568 return gen_rtx_REG (mode, REGNO (orig)); 1569} 1570 1571/* Implement TARGET_CANNOT_FORCE_CONST_MEM. */ 1572 1573static bool 1574pa_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x) 1575{ 1576 return tls_referenced_p (x); 1577} 1578 1579/* Emit insns to move operands[1] into operands[0]. 1580 1581 Return 1 if we have written out everything that needs to be done to 1582 do the move. Otherwise, return 0 and the caller will emit the move 1583 normally. 1584 1585 Note SCRATCH_REG may not be in the proper mode depending on how it 1586 will be used. This routine is responsible for creating a new copy 1587 of SCRATCH_REG in the proper mode. */ 1588 1589int 1590pa_emit_move_sequence (rtx *operands, machine_mode mode, rtx scratch_reg) 1591{ 1592 register rtx operand0 = operands[0]; 1593 register rtx operand1 = operands[1]; 1594 register rtx tem; 1595 1596 /* We can only handle indexed addresses in the destination operand 1597 of floating point stores. Thus, we need to break out indexed 1598 addresses from the destination operand. */ 1599 if (GET_CODE (operand0) == MEM && IS_INDEX_ADDR_P (XEXP (operand0, 0))) 1600 { 1601 gcc_assert (can_create_pseudo_p ()); 1602 1603 tem = copy_to_mode_reg (Pmode, XEXP (operand0, 0)); 1604 operand0 = replace_equiv_address (operand0, tem); 1605 } 1606 1607 /* On targets with non-equivalent space registers, break out unscaled 1608 indexed addresses from the source operand before the final CSE. 1609 We have to do this because the REG_POINTER flag is not correctly 1610 carried through various optimization passes and CSE may substitute 1611 a pseudo without the pointer set for one with the pointer set. As 1612 a result, we loose various opportunities to create insns with 1613 unscaled indexed addresses. */ 1614 if (!TARGET_NO_SPACE_REGS 1615 && !cse_not_expected 1616 && GET_CODE (operand1) == MEM 1617 && GET_CODE (XEXP (operand1, 0)) == PLUS 1618 && REG_P (XEXP (XEXP (operand1, 0), 0)) 1619 && REG_P (XEXP (XEXP (operand1, 0), 1))) 1620 operand1 1621 = replace_equiv_address (operand1, 1622 copy_to_mode_reg (Pmode, XEXP (operand1, 0))); 1623 1624 if (scratch_reg 1625 && reload_in_progress && GET_CODE (operand0) == REG 1626 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER) 1627 operand0 = reg_equiv_mem (REGNO (operand0)); 1628 else if (scratch_reg 1629 && reload_in_progress && GET_CODE (operand0) == SUBREG 1630 && GET_CODE (SUBREG_REG (operand0)) == REG 1631 && REGNO (SUBREG_REG (operand0)) >= FIRST_PSEUDO_REGISTER) 1632 { 1633 /* We must not alter SUBREG_BYTE (operand0) since that would confuse 1634 the code which tracks sets/uses for delete_output_reload. */ 1635 rtx temp = gen_rtx_SUBREG (GET_MODE (operand0), 1636 reg_equiv_mem (REGNO (SUBREG_REG (operand0))), 1637 SUBREG_BYTE (operand0)); 1638 operand0 = alter_subreg (&temp, true); 1639 } 1640 1641 if (scratch_reg 1642 && reload_in_progress && GET_CODE (operand1) == REG 1643 && REGNO (operand1) >= FIRST_PSEUDO_REGISTER) 1644 operand1 = reg_equiv_mem (REGNO (operand1)); 1645 else if (scratch_reg 1646 && reload_in_progress && GET_CODE (operand1) == SUBREG 1647 && GET_CODE (SUBREG_REG (operand1)) == REG 1648 && REGNO (SUBREG_REG (operand1)) >= FIRST_PSEUDO_REGISTER) 1649 { 1650 /* We must not alter SUBREG_BYTE (operand0) since that would confuse 1651 the code which tracks sets/uses for delete_output_reload. */ 1652 rtx temp = gen_rtx_SUBREG (GET_MODE (operand1), 1653 reg_equiv_mem (REGNO (SUBREG_REG (operand1))), 1654 SUBREG_BYTE (operand1)); 1655 operand1 = alter_subreg (&temp, true); 1656 } 1657 1658 if (scratch_reg && reload_in_progress && GET_CODE (operand0) == MEM 1659 && ((tem = find_replacement (&XEXP (operand0, 0))) 1660 != XEXP (operand0, 0))) 1661 operand0 = replace_equiv_address (operand0, tem); 1662 1663 if (scratch_reg && reload_in_progress && GET_CODE (operand1) == MEM 1664 && ((tem = find_replacement (&XEXP (operand1, 0))) 1665 != XEXP (operand1, 0))) 1666 operand1 = replace_equiv_address (operand1, tem); 1667 1668 /* Handle secondary reloads for loads/stores of FP registers from 1669 REG+D addresses where D does not fit in 5 or 14 bits, including 1670 (subreg (mem (addr))) cases, and reloads for other unsupported 1671 memory operands. */ 1672 if (scratch_reg 1673 && FP_REG_P (operand0) 1674 && (MEM_P (operand1) 1675 || (GET_CODE (operand1) == SUBREG 1676 && MEM_P (XEXP (operand1, 0))))) 1677 { 1678 rtx op1 = operand1; 1679 1680 if (GET_CODE (op1) == SUBREG) 1681 op1 = XEXP (op1, 0); 1682 1683 if (reg_plus_base_memory_operand (op1, GET_MODE (op1))) 1684 { 1685 if (!(TARGET_PA_20 1686 && !TARGET_ELF32 1687 && INT_14_BITS (XEXP (XEXP (op1, 0), 1))) 1688 && !INT_5_BITS (XEXP (XEXP (op1, 0), 1))) 1689 { 1690 /* SCRATCH_REG will hold an address and maybe the actual data. 1691 We want it in WORD_MODE regardless of what mode it was 1692 originally given to us. */ 1693 scratch_reg = force_mode (word_mode, scratch_reg); 1694 1695 /* D might not fit in 14 bits either; for such cases load D 1696 into scratch reg. */ 1697 if (!INT_14_BITS (XEXP (XEXP (op1, 0), 1))) 1698 { 1699 emit_move_insn (scratch_reg, XEXP (XEXP (op1, 0), 1)); 1700 emit_move_insn (scratch_reg, 1701 gen_rtx_fmt_ee (GET_CODE (XEXP (op1, 0)), 1702 Pmode, 1703 XEXP (XEXP (op1, 0), 0), 1704 scratch_reg)); 1705 } 1706 else 1707 emit_move_insn (scratch_reg, XEXP (op1, 0)); 1708 emit_insn (gen_rtx_SET (VOIDmode, operand0, 1709 replace_equiv_address (op1, scratch_reg))); 1710 return 1; 1711 } 1712 } 1713 else if ((!INT14_OK_STRICT && symbolic_memory_operand (op1, VOIDmode)) 1714 || IS_LO_SUM_DLT_ADDR_P (XEXP (op1, 0)) 1715 || IS_INDEX_ADDR_P (XEXP (op1, 0))) 1716 { 1717 /* Load memory address into SCRATCH_REG. */ 1718 scratch_reg = force_mode (word_mode, scratch_reg); 1719 emit_move_insn (scratch_reg, XEXP (op1, 0)); 1720 emit_insn (gen_rtx_SET (VOIDmode, operand0, 1721 replace_equiv_address (op1, scratch_reg))); 1722 return 1; 1723 } 1724 } 1725 else if (scratch_reg 1726 && FP_REG_P (operand1) 1727 && (MEM_P (operand0) 1728 || (GET_CODE (operand0) == SUBREG 1729 && MEM_P (XEXP (operand0, 0))))) 1730 { 1731 rtx op0 = operand0; 1732 1733 if (GET_CODE (op0) == SUBREG) 1734 op0 = XEXP (op0, 0); 1735 1736 if (reg_plus_base_memory_operand (op0, GET_MODE (op0))) 1737 { 1738 if (!(TARGET_PA_20 1739 && !TARGET_ELF32 1740 && INT_14_BITS (XEXP (XEXP (op0, 0), 1))) 1741 && !INT_5_BITS (XEXP (XEXP (op0, 0), 1))) 1742 { 1743 /* SCRATCH_REG will hold an address and maybe the actual data. 1744 We want it in WORD_MODE regardless of what mode it was 1745 originally given to us. */ 1746 scratch_reg = force_mode (word_mode, scratch_reg); 1747 1748 /* D might not fit in 14 bits either; for such cases load D 1749 into scratch reg. */ 1750 if (!INT_14_BITS (XEXP (XEXP (op0, 0), 1))) 1751 { 1752 emit_move_insn (scratch_reg, XEXP (XEXP (op0, 0), 1)); 1753 emit_move_insn (scratch_reg, 1754 gen_rtx_fmt_ee (GET_CODE (XEXP (op0, 0)), 1755 Pmode, 1756 XEXP (XEXP (op0, 0), 0), 1757 scratch_reg)); 1758 } 1759 else 1760 emit_move_insn (scratch_reg, XEXP (op0, 0)); 1761 emit_insn (gen_rtx_SET (VOIDmode, 1762 replace_equiv_address (op0, scratch_reg), 1763 operand1)); 1764 return 1; 1765 } 1766 } 1767 else if ((!INT14_OK_STRICT && symbolic_memory_operand (op0, VOIDmode)) 1768 || IS_LO_SUM_DLT_ADDR_P (XEXP (op0, 0)) 1769 || IS_INDEX_ADDR_P (XEXP (op0, 0))) 1770 { 1771 /* Load memory address into SCRATCH_REG. */ 1772 emit_move_insn (scratch_reg, XEXP (op0, 0)); 1773 emit_insn (gen_rtx_SET (VOIDmode, 1774 replace_equiv_address (op0, scratch_reg), 1775 operand1)); 1776 return 1; 1777 } 1778 } 1779 /* Handle secondary reloads for loads of FP registers from constant 1780 expressions by forcing the constant into memory. For the most part, 1781 this is only necessary for SImode and DImode. 1782 1783 Use scratch_reg to hold the address of the memory location. */ 1784 else if (scratch_reg 1785 && CONSTANT_P (operand1) 1786 && FP_REG_P (operand0)) 1787 { 1788 rtx const_mem, xoperands[2]; 1789 1790 if (operand1 == CONST0_RTX (mode)) 1791 { 1792 emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1)); 1793 return 1; 1794 } 1795 1796 /* SCRATCH_REG will hold an address and maybe the actual data. We want 1797 it in WORD_MODE regardless of what mode it was originally given 1798 to us. */ 1799 scratch_reg = force_mode (word_mode, scratch_reg); 1800 1801 /* Force the constant into memory and put the address of the 1802 memory location into scratch_reg. */ 1803 const_mem = force_const_mem (mode, operand1); 1804 xoperands[0] = scratch_reg; 1805 xoperands[1] = XEXP (const_mem, 0); 1806 pa_emit_move_sequence (xoperands, Pmode, 0); 1807 1808 /* Now load the destination register. */ 1809 emit_insn (gen_rtx_SET (mode, operand0, 1810 replace_equiv_address (const_mem, scratch_reg))); 1811 return 1; 1812 } 1813 /* Handle secondary reloads for SAR. These occur when trying to load 1814 the SAR from memory or a constant. */ 1815 else if (scratch_reg 1816 && GET_CODE (operand0) == REG 1817 && REGNO (operand0) < FIRST_PSEUDO_REGISTER 1818 && REGNO_REG_CLASS (REGNO (operand0)) == SHIFT_REGS 1819 && (GET_CODE (operand1) == MEM || GET_CODE (operand1) == CONST_INT)) 1820 { 1821 /* D might not fit in 14 bits either; for such cases load D into 1822 scratch reg. */ 1823 if (GET_CODE (operand1) == MEM 1824 && !memory_address_p (GET_MODE (operand0), XEXP (operand1, 0))) 1825 { 1826 /* We are reloading the address into the scratch register, so we 1827 want to make sure the scratch register is a full register. */ 1828 scratch_reg = force_mode (word_mode, scratch_reg); 1829 1830 emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1)); 1831 emit_move_insn (scratch_reg, 1832 gen_rtx_fmt_ee (GET_CODE (XEXP (operand1, 0)), 1833 Pmode, 1834 XEXP (XEXP (operand1, 0), 0), 1835 scratch_reg)); 1836 1837 /* Now we are going to load the scratch register from memory, 1838 we want to load it in the same width as the original MEM, 1839 which must be the same as the width of the ultimate destination, 1840 OPERAND0. */ 1841 scratch_reg = force_mode (GET_MODE (operand0), scratch_reg); 1842 1843 emit_move_insn (scratch_reg, 1844 replace_equiv_address (operand1, scratch_reg)); 1845 } 1846 else 1847 { 1848 /* We want to load the scratch register using the same mode as 1849 the ultimate destination. */ 1850 scratch_reg = force_mode (GET_MODE (operand0), scratch_reg); 1851 1852 emit_move_insn (scratch_reg, operand1); 1853 } 1854 1855 /* And emit the insn to set the ultimate destination. We know that 1856 the scratch register has the same mode as the destination at this 1857 point. */ 1858 emit_move_insn (operand0, scratch_reg); 1859 return 1; 1860 } 1861 1862 /* Handle the most common case: storing into a register. */ 1863 if (register_operand (operand0, mode)) 1864 { 1865 /* Legitimize TLS symbol references. This happens for references 1866 that aren't a legitimate constant. */ 1867 if (PA_SYMBOL_REF_TLS_P (operand1)) 1868 operand1 = legitimize_tls_address (operand1); 1869 1870 if (register_operand (operand1, mode) 1871 || (GET_CODE (operand1) == CONST_INT 1872 && pa_cint_ok_for_move (INTVAL (operand1))) 1873 || (operand1 == CONST0_RTX (mode)) 1874 || (GET_CODE (operand1) == HIGH 1875 && !symbolic_operand (XEXP (operand1, 0), VOIDmode)) 1876 /* Only `general_operands' can come here, so MEM is ok. */ 1877 || GET_CODE (operand1) == MEM) 1878 { 1879 /* Various sets are created during RTL generation which don't 1880 have the REG_POINTER flag correctly set. After the CSE pass, 1881 instruction recognition can fail if we don't consistently 1882 set this flag when performing register copies. This should 1883 also improve the opportunities for creating insns that use 1884 unscaled indexing. */ 1885 if (REG_P (operand0) && REG_P (operand1)) 1886 { 1887 if (REG_POINTER (operand1) 1888 && !REG_POINTER (operand0) 1889 && !HARD_REGISTER_P (operand0)) 1890 copy_reg_pointer (operand0, operand1); 1891 } 1892 1893 /* When MEMs are broken out, the REG_POINTER flag doesn't 1894 get set. In some cases, we can set the REG_POINTER flag 1895 from the declaration for the MEM. */ 1896 if (REG_P (operand0) 1897 && GET_CODE (operand1) == MEM 1898 && !REG_POINTER (operand0)) 1899 { 1900 tree decl = MEM_EXPR (operand1); 1901 1902 /* Set the register pointer flag and register alignment 1903 if the declaration for this memory reference is a 1904 pointer type. */ 1905 if (decl) 1906 { 1907 tree type; 1908 1909 /* If this is a COMPONENT_REF, use the FIELD_DECL from 1910 tree operand 1. */ 1911 if (TREE_CODE (decl) == COMPONENT_REF) 1912 decl = TREE_OPERAND (decl, 1); 1913 1914 type = TREE_TYPE (decl); 1915 type = strip_array_types (type); 1916 1917 if (POINTER_TYPE_P (type)) 1918 { 1919 int align; 1920 1921 type = TREE_TYPE (type); 1922 /* Using TYPE_ALIGN_OK is rather conservative as 1923 only the ada frontend actually sets it. */ 1924 align = (TYPE_ALIGN_OK (type) ? TYPE_ALIGN (type) 1925 : BITS_PER_UNIT); 1926 mark_reg_pointer (operand0, align); 1927 } 1928 } 1929 } 1930 1931 emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1)); 1932 return 1; 1933 } 1934 } 1935 else if (GET_CODE (operand0) == MEM) 1936 { 1937 if (mode == DFmode && operand1 == CONST0_RTX (mode) 1938 && !(reload_in_progress || reload_completed)) 1939 { 1940 rtx temp = gen_reg_rtx (DFmode); 1941 1942 emit_insn (gen_rtx_SET (VOIDmode, temp, operand1)); 1943 emit_insn (gen_rtx_SET (VOIDmode, operand0, temp)); 1944 return 1; 1945 } 1946 if (register_operand (operand1, mode) || operand1 == CONST0_RTX (mode)) 1947 { 1948 /* Run this case quickly. */ 1949 emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1)); 1950 return 1; 1951 } 1952 if (! (reload_in_progress || reload_completed)) 1953 { 1954 operands[0] = validize_mem (operand0); 1955 operands[1] = operand1 = force_reg (mode, operand1); 1956 } 1957 } 1958 1959 /* Simplify the source if we need to. 1960 Note we do have to handle function labels here, even though we do 1961 not consider them legitimate constants. Loop optimizations can 1962 call the emit_move_xxx with one as a source. */ 1963 if ((GET_CODE (operand1) != HIGH && immediate_operand (operand1, mode)) 1964 || (GET_CODE (operand1) == HIGH 1965 && symbolic_operand (XEXP (operand1, 0), mode)) 1966 || function_label_operand (operand1, VOIDmode) 1967 || tls_referenced_p (operand1)) 1968 { 1969 int ishighonly = 0; 1970 1971 if (GET_CODE (operand1) == HIGH) 1972 { 1973 ishighonly = 1; 1974 operand1 = XEXP (operand1, 0); 1975 } 1976 if (symbolic_operand (operand1, mode)) 1977 { 1978 /* Argh. The assembler and linker can't handle arithmetic 1979 involving plabels. 1980 1981 So we force the plabel into memory, load operand0 from 1982 the memory location, then add in the constant part. */ 1983 if ((GET_CODE (operand1) == CONST 1984 && GET_CODE (XEXP (operand1, 0)) == PLUS 1985 && function_label_operand (XEXP (XEXP (operand1, 0), 0), 1986 VOIDmode)) 1987 || function_label_operand (operand1, VOIDmode)) 1988 { 1989 rtx temp, const_part; 1990 1991 /* Figure out what (if any) scratch register to use. */ 1992 if (reload_in_progress || reload_completed) 1993 { 1994 scratch_reg = scratch_reg ? scratch_reg : operand0; 1995 /* SCRATCH_REG will hold an address and maybe the actual 1996 data. We want it in WORD_MODE regardless of what mode it 1997 was originally given to us. */ 1998 scratch_reg = force_mode (word_mode, scratch_reg); 1999 } 2000 else if (flag_pic) 2001 scratch_reg = gen_reg_rtx (Pmode); 2002 2003 if (GET_CODE (operand1) == CONST) 2004 { 2005 /* Save away the constant part of the expression. */ 2006 const_part = XEXP (XEXP (operand1, 0), 1); 2007 gcc_assert (GET_CODE (const_part) == CONST_INT); 2008 2009 /* Force the function label into memory. */ 2010 temp = force_const_mem (mode, XEXP (XEXP (operand1, 0), 0)); 2011 } 2012 else 2013 { 2014 /* No constant part. */ 2015 const_part = NULL_RTX; 2016 2017 /* Force the function label into memory. */ 2018 temp = force_const_mem (mode, operand1); 2019 } 2020 2021 2022 /* Get the address of the memory location. PIC-ify it if 2023 necessary. */ 2024 temp = XEXP (temp, 0); 2025 if (flag_pic) 2026 temp = legitimize_pic_address (temp, mode, scratch_reg); 2027 2028 /* Put the address of the memory location into our destination 2029 register. */ 2030 operands[1] = temp; 2031 pa_emit_move_sequence (operands, mode, scratch_reg); 2032 2033 /* Now load from the memory location into our destination 2034 register. */ 2035 operands[1] = gen_rtx_MEM (Pmode, operands[0]); 2036 pa_emit_move_sequence (operands, mode, scratch_reg); 2037 2038 /* And add back in the constant part. */ 2039 if (const_part != NULL_RTX) 2040 expand_inc (operand0, const_part); 2041 2042 return 1; 2043 } 2044 2045 if (flag_pic) 2046 { 2047 rtx_insn *insn; 2048 rtx temp; 2049 2050 if (reload_in_progress || reload_completed) 2051 { 2052 temp = scratch_reg ? scratch_reg : operand0; 2053 /* TEMP will hold an address and maybe the actual 2054 data. We want it in WORD_MODE regardless of what mode it 2055 was originally given to us. */ 2056 temp = force_mode (word_mode, temp); 2057 } 2058 else 2059 temp = gen_reg_rtx (Pmode); 2060 2061 /* Force (const (plus (symbol) (const_int))) to memory 2062 if the const_int will not fit in 14 bits. Although 2063 this requires a relocation, the instruction sequence 2064 needed to load the value is shorter. */ 2065 if (GET_CODE (operand1) == CONST 2066 && GET_CODE (XEXP (operand1, 0)) == PLUS 2067 && GET_CODE (XEXP (XEXP (operand1, 0), 1)) == CONST_INT 2068 && !INT_14_BITS (XEXP (XEXP (operand1, 0), 1))) 2069 { 2070 rtx x, m = force_const_mem (mode, operand1); 2071 2072 x = legitimize_pic_address (XEXP (m, 0), mode, temp); 2073 x = replace_equiv_address (m, x); 2074 insn = emit_move_insn (operand0, x); 2075 } 2076 else 2077 { 2078 operands[1] = legitimize_pic_address (operand1, mode, temp); 2079 if (REG_P (operand0) && REG_P (operands[1])) 2080 copy_reg_pointer (operand0, operands[1]); 2081 insn = emit_move_insn (operand0, operands[1]); 2082 } 2083 2084 /* Put a REG_EQUAL note on this insn. */ 2085 set_unique_reg_note (insn, REG_EQUAL, operand1); 2086 } 2087 /* On the HPPA, references to data space are supposed to use dp, 2088 register 27, but showing it in the RTL inhibits various cse 2089 and loop optimizations. */ 2090 else 2091 { 2092 rtx temp, set; 2093 2094 if (reload_in_progress || reload_completed) 2095 { 2096 temp = scratch_reg ? scratch_reg : operand0; 2097 /* TEMP will hold an address and maybe the actual 2098 data. We want it in WORD_MODE regardless of what mode it 2099 was originally given to us. */ 2100 temp = force_mode (word_mode, temp); 2101 } 2102 else 2103 temp = gen_reg_rtx (mode); 2104 2105 /* Loading a SYMBOL_REF into a register makes that register 2106 safe to be used as the base in an indexed address. 2107 2108 Don't mark hard registers though. That loses. */ 2109 if (GET_CODE (operand0) == REG 2110 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER) 2111 mark_reg_pointer (operand0, BITS_PER_UNIT); 2112 if (REGNO (temp) >= FIRST_PSEUDO_REGISTER) 2113 mark_reg_pointer (temp, BITS_PER_UNIT); 2114 2115 if (ishighonly) 2116 set = gen_rtx_SET (mode, operand0, temp); 2117 else 2118 set = gen_rtx_SET (VOIDmode, 2119 operand0, 2120 gen_rtx_LO_SUM (mode, temp, operand1)); 2121 2122 emit_insn (gen_rtx_SET (VOIDmode, 2123 temp, 2124 gen_rtx_HIGH (mode, operand1))); 2125 emit_insn (set); 2126 2127 } 2128 return 1; 2129 } 2130 else if (tls_referenced_p (operand1)) 2131 { 2132 rtx tmp = operand1; 2133 rtx addend = NULL; 2134 2135 if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS) 2136 { 2137 addend = XEXP (XEXP (tmp, 0), 1); 2138 tmp = XEXP (XEXP (tmp, 0), 0); 2139 } 2140 2141 gcc_assert (GET_CODE (tmp) == SYMBOL_REF); 2142 tmp = legitimize_tls_address (tmp); 2143 if (addend) 2144 { 2145 tmp = gen_rtx_PLUS (mode, tmp, addend); 2146 tmp = force_operand (tmp, operands[0]); 2147 } 2148 operands[1] = tmp; 2149 } 2150 else if (GET_CODE (operand1) != CONST_INT 2151 || !pa_cint_ok_for_move (INTVAL (operand1))) 2152 { 2153 rtx temp; 2154 rtx_insn *insn; 2155 rtx op1 = operand1; 2156 HOST_WIDE_INT value = 0; 2157 HOST_WIDE_INT insv = 0; 2158 int insert = 0; 2159 2160 if (GET_CODE (operand1) == CONST_INT) 2161 value = INTVAL (operand1); 2162 2163 if (TARGET_64BIT 2164 && GET_CODE (operand1) == CONST_INT 2165 && HOST_BITS_PER_WIDE_INT > 32 2166 && GET_MODE_BITSIZE (GET_MODE (operand0)) > 32) 2167 { 2168 HOST_WIDE_INT nval; 2169 2170 /* Extract the low order 32 bits of the value and sign extend. 2171 If the new value is the same as the original value, we can 2172 can use the original value as-is. If the new value is 2173 different, we use it and insert the most-significant 32-bits 2174 of the original value into the final result. */ 2175 nval = ((value & (((HOST_WIDE_INT) 2 << 31) - 1)) 2176 ^ ((HOST_WIDE_INT) 1 << 31)) - ((HOST_WIDE_INT) 1 << 31); 2177 if (value != nval) 2178 { 2179#if HOST_BITS_PER_WIDE_INT > 32 2180 insv = value >= 0 ? value >> 32 : ~(~value >> 32); 2181#endif 2182 insert = 1; 2183 value = nval; 2184 operand1 = GEN_INT (nval); 2185 } 2186 } 2187 2188 if (reload_in_progress || reload_completed) 2189 temp = scratch_reg ? scratch_reg : operand0; 2190 else 2191 temp = gen_reg_rtx (mode); 2192 2193 /* We don't directly split DImode constants on 32-bit targets 2194 because PLUS uses an 11-bit immediate and the insn sequence 2195 generated is not as efficient as the one using HIGH/LO_SUM. */ 2196 if (GET_CODE (operand1) == CONST_INT 2197 && GET_MODE_BITSIZE (mode) <= BITS_PER_WORD 2198 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT 2199 && !insert) 2200 { 2201 /* Directly break constant into high and low parts. This 2202 provides better optimization opportunities because various 2203 passes recognize constants split with PLUS but not LO_SUM. 2204 We use a 14-bit signed low part except when the addition 2205 of 0x4000 to the high part might change the sign of the 2206 high part. */ 2207 HOST_WIDE_INT low = value & 0x3fff; 2208 HOST_WIDE_INT high = value & ~ 0x3fff; 2209 2210 if (low >= 0x2000) 2211 { 2212 if (high == 0x7fffc000 || (mode == HImode && high == 0x4000)) 2213 high += 0x2000; 2214 else 2215 high += 0x4000; 2216 } 2217 2218 low = value - high; 2219 2220 emit_insn (gen_rtx_SET (VOIDmode, temp, GEN_INT (high))); 2221 operands[1] = gen_rtx_PLUS (mode, temp, GEN_INT (low)); 2222 } 2223 else 2224 { 2225 emit_insn (gen_rtx_SET (VOIDmode, temp, 2226 gen_rtx_HIGH (mode, operand1))); 2227 operands[1] = gen_rtx_LO_SUM (mode, temp, operand1); 2228 } 2229 2230 insn = emit_move_insn (operands[0], operands[1]); 2231 2232 /* Now insert the most significant 32 bits of the value 2233 into the register. When we don't have a second register 2234 available, it could take up to nine instructions to load 2235 a 64-bit integer constant. Prior to reload, we force 2236 constants that would take more than three instructions 2237 to load to the constant pool. During and after reload, 2238 we have to handle all possible values. */ 2239 if (insert) 2240 { 2241 /* Use a HIGH/LO_SUM/INSV sequence if we have a second 2242 register and the value to be inserted is outside the 2243 range that can be loaded with three depdi instructions. */ 2244 if (temp != operand0 && (insv >= 16384 || insv < -16384)) 2245 { 2246 operand1 = GEN_INT (insv); 2247 2248 emit_insn (gen_rtx_SET (VOIDmode, temp, 2249 gen_rtx_HIGH (mode, operand1))); 2250 emit_move_insn (temp, gen_rtx_LO_SUM (mode, temp, operand1)); 2251 if (mode == DImode) 2252 insn = emit_insn (gen_insvdi (operand0, GEN_INT (32), 2253 const0_rtx, temp)); 2254 else 2255 insn = emit_insn (gen_insvsi (operand0, GEN_INT (32), 2256 const0_rtx, temp)); 2257 } 2258 else 2259 { 2260 int len = 5, pos = 27; 2261 2262 /* Insert the bits using the depdi instruction. */ 2263 while (pos >= 0) 2264 { 2265 HOST_WIDE_INT v5 = ((insv & 31) ^ 16) - 16; 2266 HOST_WIDE_INT sign = v5 < 0; 2267 2268 /* Left extend the insertion. */ 2269 insv = (insv >= 0 ? insv >> len : ~(~insv >> len)); 2270 while (pos > 0 && (insv & 1) == sign) 2271 { 2272 insv = (insv >= 0 ? insv >> 1 : ~(~insv >> 1)); 2273 len += 1; 2274 pos -= 1; 2275 } 2276 2277 if (mode == DImode) 2278 insn = emit_insn (gen_insvdi (operand0, 2279 GEN_INT (len), 2280 GEN_INT (pos), 2281 GEN_INT (v5))); 2282 else 2283 insn = emit_insn (gen_insvsi (operand0, 2284 GEN_INT (len), 2285 GEN_INT (pos), 2286 GEN_INT (v5))); 2287 2288 len = pos > 0 && pos < 5 ? pos : 5; 2289 pos -= len; 2290 } 2291 } 2292 } 2293 2294 set_unique_reg_note (insn, REG_EQUAL, op1); 2295 2296 return 1; 2297 } 2298 } 2299 /* Now have insn-emit do whatever it normally does. */ 2300 return 0; 2301} 2302 2303/* Examine EXP and return nonzero if it contains an ADDR_EXPR (meaning 2304 it will need a link/runtime reloc). */ 2305 2306int 2307pa_reloc_needed (tree exp) 2308{ 2309 int reloc = 0; 2310 2311 switch (TREE_CODE (exp)) 2312 { 2313 case ADDR_EXPR: 2314 return 1; 2315 2316 case POINTER_PLUS_EXPR: 2317 case PLUS_EXPR: 2318 case MINUS_EXPR: 2319 reloc = pa_reloc_needed (TREE_OPERAND (exp, 0)); 2320 reloc |= pa_reloc_needed (TREE_OPERAND (exp, 1)); 2321 break; 2322 2323 CASE_CONVERT: 2324 case NON_LVALUE_EXPR: 2325 reloc = pa_reloc_needed (TREE_OPERAND (exp, 0)); 2326 break; 2327 2328 case CONSTRUCTOR: 2329 { 2330 tree value; 2331 unsigned HOST_WIDE_INT ix; 2332 2333 FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (exp), ix, value) 2334 if (value) 2335 reloc |= pa_reloc_needed (value); 2336 } 2337 break; 2338 2339 case ERROR_MARK: 2340 break; 2341 2342 default: 2343 break; 2344 } 2345 return reloc; 2346} 2347 2348 2349/* Return the best assembler insn template 2350 for moving operands[1] into operands[0] as a fullword. */ 2351const char * 2352pa_singlemove_string (rtx *operands) 2353{ 2354 HOST_WIDE_INT intval; 2355 2356 if (GET_CODE (operands[0]) == MEM) 2357 return "stw %r1,%0"; 2358 if (GET_CODE (operands[1]) == MEM) 2359 return "ldw %1,%0"; 2360 if (GET_CODE (operands[1]) == CONST_DOUBLE) 2361 { 2362 long i; 2363 REAL_VALUE_TYPE d; 2364 2365 gcc_assert (GET_MODE (operands[1]) == SFmode); 2366 2367 /* Translate the CONST_DOUBLE to a CONST_INT with the same target 2368 bit pattern. */ 2369 REAL_VALUE_FROM_CONST_DOUBLE (d, operands[1]); 2370 REAL_VALUE_TO_TARGET_SINGLE (d, i); 2371 2372 operands[1] = GEN_INT (i); 2373 /* Fall through to CONST_INT case. */ 2374 } 2375 if (GET_CODE (operands[1]) == CONST_INT) 2376 { 2377 intval = INTVAL (operands[1]); 2378 2379 if (VAL_14_BITS_P (intval)) 2380 return "ldi %1,%0"; 2381 else if ((intval & 0x7ff) == 0) 2382 return "ldil L'%1,%0"; 2383 else if (pa_zdepi_cint_p (intval)) 2384 return "{zdepi %Z1,%0|depwi,z %Z1,%0}"; 2385 else 2386 return "ldil L'%1,%0\n\tldo R'%1(%0),%0"; 2387 } 2388 return "copy %1,%0"; 2389} 2390 2391 2392/* Compute position (in OP[1]) and width (in OP[2]) 2393 useful for copying IMM to a register using the zdepi 2394 instructions. Store the immediate value to insert in OP[0]. */ 2395static void 2396compute_zdepwi_operands (unsigned HOST_WIDE_INT imm, unsigned *op) 2397{ 2398 int lsb, len; 2399 2400 /* Find the least significant set bit in IMM. */ 2401 for (lsb = 0; lsb < 32; lsb++) 2402 { 2403 if ((imm & 1) != 0) 2404 break; 2405 imm >>= 1; 2406 } 2407 2408 /* Choose variants based on *sign* of the 5-bit field. */ 2409 if ((imm & 0x10) == 0) 2410 len = (lsb <= 28) ? 4 : 32 - lsb; 2411 else 2412 { 2413 /* Find the width of the bitstring in IMM. */ 2414 for (len = 5; len < 32 - lsb; len++) 2415 { 2416 if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0) 2417 break; 2418 } 2419 2420 /* Sign extend IMM as a 5-bit value. */ 2421 imm = (imm & 0xf) - 0x10; 2422 } 2423 2424 op[0] = imm; 2425 op[1] = 31 - lsb; 2426 op[2] = len; 2427} 2428 2429/* Compute position (in OP[1]) and width (in OP[2]) 2430 useful for copying IMM to a register using the depdi,z 2431 instructions. Store the immediate value to insert in OP[0]. */ 2432 2433static void 2434compute_zdepdi_operands (unsigned HOST_WIDE_INT imm, unsigned *op) 2435{ 2436 int lsb, len, maxlen; 2437 2438 maxlen = MIN (HOST_BITS_PER_WIDE_INT, 64); 2439 2440 /* Find the least significant set bit in IMM. */ 2441 for (lsb = 0; lsb < maxlen; lsb++) 2442 { 2443 if ((imm & 1) != 0) 2444 break; 2445 imm >>= 1; 2446 } 2447 2448 /* Choose variants based on *sign* of the 5-bit field. */ 2449 if ((imm & 0x10) == 0) 2450 len = (lsb <= maxlen - 4) ? 4 : maxlen - lsb; 2451 else 2452 { 2453 /* Find the width of the bitstring in IMM. */ 2454 for (len = 5; len < maxlen - lsb; len++) 2455 { 2456 if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0) 2457 break; 2458 } 2459 2460 /* Extend length if host is narrow and IMM is negative. */ 2461 if (HOST_BITS_PER_WIDE_INT == 32 && len == maxlen - lsb) 2462 len += 32; 2463 2464 /* Sign extend IMM as a 5-bit value. */ 2465 imm = (imm & 0xf) - 0x10; 2466 } 2467 2468 op[0] = imm; 2469 op[1] = 63 - lsb; 2470 op[2] = len; 2471} 2472 2473/* Output assembler code to perform a doubleword move insn 2474 with operands OPERANDS. */ 2475 2476const char * 2477pa_output_move_double (rtx *operands) 2478{ 2479 enum { REGOP, OFFSOP, MEMOP, CNSTOP, RNDOP } optype0, optype1; 2480 rtx latehalf[2]; 2481 rtx addreg0 = 0, addreg1 = 0; 2482 int highonly = 0; 2483 2484 /* First classify both operands. */ 2485 2486 if (REG_P (operands[0])) 2487 optype0 = REGOP; 2488 else if (offsettable_memref_p (operands[0])) 2489 optype0 = OFFSOP; 2490 else if (GET_CODE (operands[0]) == MEM) 2491 optype0 = MEMOP; 2492 else 2493 optype0 = RNDOP; 2494 2495 if (REG_P (operands[1])) 2496 optype1 = REGOP; 2497 else if (CONSTANT_P (operands[1])) 2498 optype1 = CNSTOP; 2499 else if (offsettable_memref_p (operands[1])) 2500 optype1 = OFFSOP; 2501 else if (GET_CODE (operands[1]) == MEM) 2502 optype1 = MEMOP; 2503 else 2504 optype1 = RNDOP; 2505 2506 /* Check for the cases that the operand constraints are not 2507 supposed to allow to happen. */ 2508 gcc_assert (optype0 == REGOP || optype1 == REGOP); 2509 2510 /* Handle copies between general and floating registers. */ 2511 2512 if (optype0 == REGOP && optype1 == REGOP 2513 && FP_REG_P (operands[0]) ^ FP_REG_P (operands[1])) 2514 { 2515 if (FP_REG_P (operands[0])) 2516 { 2517 output_asm_insn ("{stws|stw} %1,-16(%%sp)", operands); 2518 output_asm_insn ("{stws|stw} %R1,-12(%%sp)", operands); 2519 return "{fldds|fldd} -16(%%sp),%0"; 2520 } 2521 else 2522 { 2523 output_asm_insn ("{fstds|fstd} %1,-16(%%sp)", operands); 2524 output_asm_insn ("{ldws|ldw} -16(%%sp),%0", operands); 2525 return "{ldws|ldw} -12(%%sp),%R0"; 2526 } 2527 } 2528 2529 /* Handle auto decrementing and incrementing loads and stores 2530 specifically, since the structure of the function doesn't work 2531 for them without major modification. Do it better when we learn 2532 this port about the general inc/dec addressing of PA. 2533 (This was written by tege. Chide him if it doesn't work.) */ 2534 2535 if (optype0 == MEMOP) 2536 { 2537 /* We have to output the address syntax ourselves, since print_operand 2538 doesn't deal with the addresses we want to use. Fix this later. */ 2539 2540 rtx addr = XEXP (operands[0], 0); 2541 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC) 2542 { 2543 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0); 2544 2545 operands[0] = XEXP (addr, 0); 2546 gcc_assert (GET_CODE (operands[1]) == REG 2547 && GET_CODE (operands[0]) == REG); 2548 2549 gcc_assert (!reg_overlap_mentioned_p (high_reg, addr)); 2550 2551 /* No overlap between high target register and address 2552 register. (We do this in a non-obvious way to 2553 save a register file writeback) */ 2554 if (GET_CODE (addr) == POST_INC) 2555 return "{stws|stw},ma %1,8(%0)\n\tstw %R1,-4(%0)"; 2556 return "{stws|stw},ma %1,-8(%0)\n\tstw %R1,12(%0)"; 2557 } 2558 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC) 2559 { 2560 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0); 2561 2562 operands[0] = XEXP (addr, 0); 2563 gcc_assert (GET_CODE (operands[1]) == REG 2564 && GET_CODE (operands[0]) == REG); 2565 2566 gcc_assert (!reg_overlap_mentioned_p (high_reg, addr)); 2567 /* No overlap between high target register and address 2568 register. (We do this in a non-obvious way to save a 2569 register file writeback) */ 2570 if (GET_CODE (addr) == PRE_INC) 2571 return "{stws|stw},mb %1,8(%0)\n\tstw %R1,4(%0)"; 2572 return "{stws|stw},mb %1,-8(%0)\n\tstw %R1,4(%0)"; 2573 } 2574 } 2575 if (optype1 == MEMOP) 2576 { 2577 /* We have to output the address syntax ourselves, since print_operand 2578 doesn't deal with the addresses we want to use. Fix this later. */ 2579 2580 rtx addr = XEXP (operands[1], 0); 2581 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC) 2582 { 2583 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0); 2584 2585 operands[1] = XEXP (addr, 0); 2586 gcc_assert (GET_CODE (operands[0]) == REG 2587 && GET_CODE (operands[1]) == REG); 2588 2589 if (!reg_overlap_mentioned_p (high_reg, addr)) 2590 { 2591 /* No overlap between high target register and address 2592 register. (We do this in a non-obvious way to 2593 save a register file writeback) */ 2594 if (GET_CODE (addr) == POST_INC) 2595 return "{ldws|ldw},ma 8(%1),%0\n\tldw -4(%1),%R0"; 2596 return "{ldws|ldw},ma -8(%1),%0\n\tldw 12(%1),%R0"; 2597 } 2598 else 2599 { 2600 /* This is an undefined situation. We should load into the 2601 address register *and* update that register. Probably 2602 we don't need to handle this at all. */ 2603 if (GET_CODE (addr) == POST_INC) 2604 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma 8(%1),%0"; 2605 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma -8(%1),%0"; 2606 } 2607 } 2608 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC) 2609 { 2610 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0); 2611 2612 operands[1] = XEXP (addr, 0); 2613 gcc_assert (GET_CODE (operands[0]) == REG 2614 && GET_CODE (operands[1]) == REG); 2615 2616 if (!reg_overlap_mentioned_p (high_reg, addr)) 2617 { 2618 /* No overlap between high target register and address 2619 register. (We do this in a non-obvious way to 2620 save a register file writeback) */ 2621 if (GET_CODE (addr) == PRE_INC) 2622 return "{ldws|ldw},mb 8(%1),%0\n\tldw 4(%1),%R0"; 2623 return "{ldws|ldw},mb -8(%1),%0\n\tldw 4(%1),%R0"; 2624 } 2625 else 2626 { 2627 /* This is an undefined situation. We should load into the 2628 address register *and* update that register. Probably 2629 we don't need to handle this at all. */ 2630 if (GET_CODE (addr) == PRE_INC) 2631 return "ldw 12(%1),%R0\n\t{ldws|ldw},mb 8(%1),%0"; 2632 return "ldw -4(%1),%R0\n\t{ldws|ldw},mb -8(%1),%0"; 2633 } 2634 } 2635 else if (GET_CODE (addr) == PLUS 2636 && GET_CODE (XEXP (addr, 0)) == MULT) 2637 { 2638 rtx xoperands[4]; 2639 2640 /* Load address into left half of destination register. */ 2641 xoperands[0] = gen_rtx_SUBREG (SImode, operands[0], 0); 2642 xoperands[1] = XEXP (addr, 1); 2643 xoperands[2] = XEXP (XEXP (addr, 0), 0); 2644 xoperands[3] = XEXP (XEXP (addr, 0), 1); 2645 output_asm_insn ("{sh%O3addl %2,%1,%0|shladd,l %2,%O3,%1,%0}", 2646 xoperands); 2647 return "ldw 4(%0),%R0\n\tldw 0(%0),%0"; 2648 } 2649 else if (GET_CODE (addr) == PLUS 2650 && REG_P (XEXP (addr, 0)) 2651 && REG_P (XEXP (addr, 1))) 2652 { 2653 rtx xoperands[3]; 2654 2655 /* Load address into left half of destination register. */ 2656 xoperands[0] = gen_rtx_SUBREG (SImode, operands[0], 0); 2657 xoperands[1] = XEXP (addr, 0); 2658 xoperands[2] = XEXP (addr, 1); 2659 output_asm_insn ("{addl|add,l} %1,%2,%0", 2660 xoperands); 2661 return "ldw 4(%0),%R0\n\tldw 0(%0),%0"; 2662 } 2663 } 2664 2665 /* If an operand is an unoffsettable memory ref, find a register 2666 we can increment temporarily to make it refer to the second word. */ 2667 2668 if (optype0 == MEMOP) 2669 addreg0 = find_addr_reg (XEXP (operands[0], 0)); 2670 2671 if (optype1 == MEMOP) 2672 addreg1 = find_addr_reg (XEXP (operands[1], 0)); 2673 2674 /* Ok, we can do one word at a time. 2675 Normally we do the low-numbered word first. 2676 2677 In either case, set up in LATEHALF the operands to use 2678 for the high-numbered word and in some cases alter the 2679 operands in OPERANDS to be suitable for the low-numbered word. */ 2680 2681 if (optype0 == REGOP) 2682 latehalf[0] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1); 2683 else if (optype0 == OFFSOP) 2684 latehalf[0] = adjust_address_nv (operands[0], SImode, 4); 2685 else 2686 latehalf[0] = operands[0]; 2687 2688 if (optype1 == REGOP) 2689 latehalf[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1); 2690 else if (optype1 == OFFSOP) 2691 latehalf[1] = adjust_address_nv (operands[1], SImode, 4); 2692 else if (optype1 == CNSTOP) 2693 { 2694 if (GET_CODE (operands[1]) == HIGH) 2695 { 2696 operands[1] = XEXP (operands[1], 0); 2697 highonly = 1; 2698 } 2699 split_double (operands[1], &operands[1], &latehalf[1]); 2700 } 2701 else 2702 latehalf[1] = operands[1]; 2703 2704 /* If the first move would clobber the source of the second one, 2705 do them in the other order. 2706 2707 This can happen in two cases: 2708 2709 mem -> register where the first half of the destination register 2710 is the same register used in the memory's address. Reload 2711 can create such insns. 2712 2713 mem in this case will be either register indirect or register 2714 indirect plus a valid offset. 2715 2716 register -> register move where REGNO(dst) == REGNO(src + 1) 2717 someone (Tim/Tege?) claimed this can happen for parameter loads. 2718 2719 Handle mem -> register case first. */ 2720 if (optype0 == REGOP 2721 && (optype1 == MEMOP || optype1 == OFFSOP) 2722 && refers_to_regno_p (REGNO (operands[0]), operands[1])) 2723 { 2724 /* Do the late half first. */ 2725 if (addreg1) 2726 output_asm_insn ("ldo 4(%0),%0", &addreg1); 2727 output_asm_insn (pa_singlemove_string (latehalf), latehalf); 2728 2729 /* Then clobber. */ 2730 if (addreg1) 2731 output_asm_insn ("ldo -4(%0),%0", &addreg1); 2732 return pa_singlemove_string (operands); 2733 } 2734 2735 /* Now handle register -> register case. */ 2736 if (optype0 == REGOP && optype1 == REGOP 2737 && REGNO (operands[0]) == REGNO (operands[1]) + 1) 2738 { 2739 output_asm_insn (pa_singlemove_string (latehalf), latehalf); 2740 return pa_singlemove_string (operands); 2741 } 2742 2743 /* Normal case: do the two words, low-numbered first. */ 2744 2745 output_asm_insn (pa_singlemove_string (operands), operands); 2746 2747 /* Make any unoffsettable addresses point at high-numbered word. */ 2748 if (addreg0) 2749 output_asm_insn ("ldo 4(%0),%0", &addreg0); 2750 if (addreg1) 2751 output_asm_insn ("ldo 4(%0),%0", &addreg1); 2752 2753 /* Do high-numbered word. */ 2754 if (highonly) 2755 output_asm_insn ("ldil L'%1,%0", latehalf); 2756 else 2757 output_asm_insn (pa_singlemove_string (latehalf), latehalf); 2758 2759 /* Undo the adds we just did. */ 2760 if (addreg0) 2761 output_asm_insn ("ldo -4(%0),%0", &addreg0); 2762 if (addreg1) 2763 output_asm_insn ("ldo -4(%0),%0", &addreg1); 2764 2765 return ""; 2766} 2767 2768const char * 2769pa_output_fp_move_double (rtx *operands) 2770{ 2771 if (FP_REG_P (operands[0])) 2772 { 2773 if (FP_REG_P (operands[1]) 2774 || operands[1] == CONST0_RTX (GET_MODE (operands[0]))) 2775 output_asm_insn ("fcpy,dbl %f1,%0", operands); 2776 else 2777 output_asm_insn ("fldd%F1 %1,%0", operands); 2778 } 2779 else if (FP_REG_P (operands[1])) 2780 { 2781 output_asm_insn ("fstd%F0 %1,%0", operands); 2782 } 2783 else 2784 { 2785 rtx xoperands[2]; 2786 2787 gcc_assert (operands[1] == CONST0_RTX (GET_MODE (operands[0]))); 2788 2789 /* This is a pain. You have to be prepared to deal with an 2790 arbitrary address here including pre/post increment/decrement. 2791 2792 so avoid this in the MD. */ 2793 gcc_assert (GET_CODE (operands[0]) == REG); 2794 2795 xoperands[1] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1); 2796 xoperands[0] = operands[0]; 2797 output_asm_insn ("copy %%r0,%0\n\tcopy %%r0,%1", xoperands); 2798 } 2799 return ""; 2800} 2801 2802/* Return a REG that occurs in ADDR with coefficient 1. 2803 ADDR can be effectively incremented by incrementing REG. */ 2804 2805static rtx 2806find_addr_reg (rtx addr) 2807{ 2808 while (GET_CODE (addr) == PLUS) 2809 { 2810 if (GET_CODE (XEXP (addr, 0)) == REG) 2811 addr = XEXP (addr, 0); 2812 else if (GET_CODE (XEXP (addr, 1)) == REG) 2813 addr = XEXP (addr, 1); 2814 else if (CONSTANT_P (XEXP (addr, 0))) 2815 addr = XEXP (addr, 1); 2816 else if (CONSTANT_P (XEXP (addr, 1))) 2817 addr = XEXP (addr, 0); 2818 else 2819 gcc_unreachable (); 2820 } 2821 gcc_assert (GET_CODE (addr) == REG); 2822 return addr; 2823} 2824 2825/* Emit code to perform a block move. 2826 2827 OPERANDS[0] is the destination pointer as a REG, clobbered. 2828 OPERANDS[1] is the source pointer as a REG, clobbered. 2829 OPERANDS[2] is a register for temporary storage. 2830 OPERANDS[3] is a register for temporary storage. 2831 OPERANDS[4] is the size as a CONST_INT 2832 OPERANDS[5] is the alignment safe to use, as a CONST_INT. 2833 OPERANDS[6] is another temporary register. */ 2834 2835const char * 2836pa_output_block_move (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED) 2837{ 2838 int align = INTVAL (operands[5]); 2839 unsigned long n_bytes = INTVAL (operands[4]); 2840 2841 /* We can't move more than a word at a time because the PA 2842 has no longer integer move insns. (Could use fp mem ops?) */ 2843 if (align > (TARGET_64BIT ? 8 : 4)) 2844 align = (TARGET_64BIT ? 8 : 4); 2845 2846 /* Note that we know each loop below will execute at least twice 2847 (else we would have open-coded the copy). */ 2848 switch (align) 2849 { 2850 case 8: 2851 /* Pre-adjust the loop counter. */ 2852 operands[4] = GEN_INT (n_bytes - 16); 2853 output_asm_insn ("ldi %4,%2", operands); 2854 2855 /* Copying loop. */ 2856 output_asm_insn ("ldd,ma 8(%1),%3", operands); 2857 output_asm_insn ("ldd,ma 8(%1),%6", operands); 2858 output_asm_insn ("std,ma %3,8(%0)", operands); 2859 output_asm_insn ("addib,>= -16,%2,.-12", operands); 2860 output_asm_insn ("std,ma %6,8(%0)", operands); 2861 2862 /* Handle the residual. There could be up to 7 bytes of 2863 residual to copy! */ 2864 if (n_bytes % 16 != 0) 2865 { 2866 operands[4] = GEN_INT (n_bytes % 8); 2867 if (n_bytes % 16 >= 8) 2868 output_asm_insn ("ldd,ma 8(%1),%3", operands); 2869 if (n_bytes % 8 != 0) 2870 output_asm_insn ("ldd 0(%1),%6", operands); 2871 if (n_bytes % 16 >= 8) 2872 output_asm_insn ("std,ma %3,8(%0)", operands); 2873 if (n_bytes % 8 != 0) 2874 output_asm_insn ("stdby,e %6,%4(%0)", operands); 2875 } 2876 return ""; 2877 2878 case 4: 2879 /* Pre-adjust the loop counter. */ 2880 operands[4] = GEN_INT (n_bytes - 8); 2881 output_asm_insn ("ldi %4,%2", operands); 2882 2883 /* Copying loop. */ 2884 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands); 2885 output_asm_insn ("{ldws|ldw},ma 4(%1),%6", operands); 2886 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands); 2887 output_asm_insn ("addib,>= -8,%2,.-12", operands); 2888 output_asm_insn ("{stws|stw},ma %6,4(%0)", operands); 2889 2890 /* Handle the residual. There could be up to 7 bytes of 2891 residual to copy! */ 2892 if (n_bytes % 8 != 0) 2893 { 2894 operands[4] = GEN_INT (n_bytes % 4); 2895 if (n_bytes % 8 >= 4) 2896 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands); 2897 if (n_bytes % 4 != 0) 2898 output_asm_insn ("ldw 0(%1),%6", operands); 2899 if (n_bytes % 8 >= 4) 2900 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands); 2901 if (n_bytes % 4 != 0) 2902 output_asm_insn ("{stbys|stby},e %6,%4(%0)", operands); 2903 } 2904 return ""; 2905 2906 case 2: 2907 /* Pre-adjust the loop counter. */ 2908 operands[4] = GEN_INT (n_bytes - 4); 2909 output_asm_insn ("ldi %4,%2", operands); 2910 2911 /* Copying loop. */ 2912 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands); 2913 output_asm_insn ("{ldhs|ldh},ma 2(%1),%6", operands); 2914 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands); 2915 output_asm_insn ("addib,>= -4,%2,.-12", operands); 2916 output_asm_insn ("{sths|sth},ma %6,2(%0)", operands); 2917 2918 /* Handle the residual. */ 2919 if (n_bytes % 4 != 0) 2920 { 2921 if (n_bytes % 4 >= 2) 2922 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands); 2923 if (n_bytes % 2 != 0) 2924 output_asm_insn ("ldb 0(%1),%6", operands); 2925 if (n_bytes % 4 >= 2) 2926 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands); 2927 if (n_bytes % 2 != 0) 2928 output_asm_insn ("stb %6,0(%0)", operands); 2929 } 2930 return ""; 2931 2932 case 1: 2933 /* Pre-adjust the loop counter. */ 2934 operands[4] = GEN_INT (n_bytes - 2); 2935 output_asm_insn ("ldi %4,%2", operands); 2936 2937 /* Copying loop. */ 2938 output_asm_insn ("{ldbs|ldb},ma 1(%1),%3", operands); 2939 output_asm_insn ("{ldbs|ldb},ma 1(%1),%6", operands); 2940 output_asm_insn ("{stbs|stb},ma %3,1(%0)", operands); 2941 output_asm_insn ("addib,>= -2,%2,.-12", operands); 2942 output_asm_insn ("{stbs|stb},ma %6,1(%0)", operands); 2943 2944 /* Handle the residual. */ 2945 if (n_bytes % 2 != 0) 2946 { 2947 output_asm_insn ("ldb 0(%1),%3", operands); 2948 output_asm_insn ("stb %3,0(%0)", operands); 2949 } 2950 return ""; 2951 2952 default: 2953 gcc_unreachable (); 2954 } 2955} 2956 2957/* Count the number of insns necessary to handle this block move. 2958 2959 Basic structure is the same as emit_block_move, except that we 2960 count insns rather than emit them. */ 2961 2962static int 2963compute_movmem_length (rtx_insn *insn) 2964{ 2965 rtx pat = PATTERN (insn); 2966 unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 7), 0)); 2967 unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 6), 0)); 2968 unsigned int n_insns = 0; 2969 2970 /* We can't move more than four bytes at a time because the PA 2971 has no longer integer move insns. (Could use fp mem ops?) */ 2972 if (align > (TARGET_64BIT ? 8 : 4)) 2973 align = (TARGET_64BIT ? 8 : 4); 2974 2975 /* The basic copying loop. */ 2976 n_insns = 6; 2977 2978 /* Residuals. */ 2979 if (n_bytes % (2 * align) != 0) 2980 { 2981 if ((n_bytes % (2 * align)) >= align) 2982 n_insns += 2; 2983 2984 if ((n_bytes % align) != 0) 2985 n_insns += 2; 2986 } 2987 2988 /* Lengths are expressed in bytes now; each insn is 4 bytes. */ 2989 return n_insns * 4; 2990} 2991 2992/* Emit code to perform a block clear. 2993 2994 OPERANDS[0] is the destination pointer as a REG, clobbered. 2995 OPERANDS[1] is a register for temporary storage. 2996 OPERANDS[2] is the size as a CONST_INT 2997 OPERANDS[3] is the alignment safe to use, as a CONST_INT. */ 2998 2999const char * 3000pa_output_block_clear (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED) 3001{ 3002 int align = INTVAL (operands[3]); 3003 unsigned long n_bytes = INTVAL (operands[2]); 3004 3005 /* We can't clear more than a word at a time because the PA 3006 has no longer integer move insns. */ 3007 if (align > (TARGET_64BIT ? 8 : 4)) 3008 align = (TARGET_64BIT ? 8 : 4); 3009 3010 /* Note that we know each loop below will execute at least twice 3011 (else we would have open-coded the copy). */ 3012 switch (align) 3013 { 3014 case 8: 3015 /* Pre-adjust the loop counter. */ 3016 operands[2] = GEN_INT (n_bytes - 16); 3017 output_asm_insn ("ldi %2,%1", operands); 3018 3019 /* Loop. */ 3020 output_asm_insn ("std,ma %%r0,8(%0)", operands); 3021 output_asm_insn ("addib,>= -16,%1,.-4", operands); 3022 output_asm_insn ("std,ma %%r0,8(%0)", operands); 3023 3024 /* Handle the residual. There could be up to 7 bytes of 3025 residual to copy! */ 3026 if (n_bytes % 16 != 0) 3027 { 3028 operands[2] = GEN_INT (n_bytes % 8); 3029 if (n_bytes % 16 >= 8) 3030 output_asm_insn ("std,ma %%r0,8(%0)", operands); 3031 if (n_bytes % 8 != 0) 3032 output_asm_insn ("stdby,e %%r0,%2(%0)", operands); 3033 } 3034 return ""; 3035 3036 case 4: 3037 /* Pre-adjust the loop counter. */ 3038 operands[2] = GEN_INT (n_bytes - 8); 3039 output_asm_insn ("ldi %2,%1", operands); 3040 3041 /* Loop. */ 3042 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands); 3043 output_asm_insn ("addib,>= -8,%1,.-4", operands); 3044 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands); 3045 3046 /* Handle the residual. There could be up to 7 bytes of 3047 residual to copy! */ 3048 if (n_bytes % 8 != 0) 3049 { 3050 operands[2] = GEN_INT (n_bytes % 4); 3051 if (n_bytes % 8 >= 4) 3052 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands); 3053 if (n_bytes % 4 != 0) 3054 output_asm_insn ("{stbys|stby},e %%r0,%2(%0)", operands); 3055 } 3056 return ""; 3057 3058 case 2: 3059 /* Pre-adjust the loop counter. */ 3060 operands[2] = GEN_INT (n_bytes - 4); 3061 output_asm_insn ("ldi %2,%1", operands); 3062 3063 /* Loop. */ 3064 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands); 3065 output_asm_insn ("addib,>= -4,%1,.-4", operands); 3066 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands); 3067 3068 /* Handle the residual. */ 3069 if (n_bytes % 4 != 0) 3070 { 3071 if (n_bytes % 4 >= 2) 3072 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands); 3073 if (n_bytes % 2 != 0) 3074 output_asm_insn ("stb %%r0,0(%0)", operands); 3075 } 3076 return ""; 3077 3078 case 1: 3079 /* Pre-adjust the loop counter. */ 3080 operands[2] = GEN_INT (n_bytes - 2); 3081 output_asm_insn ("ldi %2,%1", operands); 3082 3083 /* Loop. */ 3084 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands); 3085 output_asm_insn ("addib,>= -2,%1,.-4", operands); 3086 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands); 3087 3088 /* Handle the residual. */ 3089 if (n_bytes % 2 != 0) 3090 output_asm_insn ("stb %%r0,0(%0)", operands); 3091 3092 return ""; 3093 3094 default: 3095 gcc_unreachable (); 3096 } 3097} 3098 3099/* Count the number of insns necessary to handle this block move. 3100 3101 Basic structure is the same as emit_block_move, except that we 3102 count insns rather than emit them. */ 3103 3104static int 3105compute_clrmem_length (rtx_insn *insn) 3106{ 3107 rtx pat = PATTERN (insn); 3108 unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 4), 0)); 3109 unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 3), 0)); 3110 unsigned int n_insns = 0; 3111 3112 /* We can't clear more than a word at a time because the PA 3113 has no longer integer move insns. */ 3114 if (align > (TARGET_64BIT ? 8 : 4)) 3115 align = (TARGET_64BIT ? 8 : 4); 3116 3117 /* The basic loop. */ 3118 n_insns = 4; 3119 3120 /* Residuals. */ 3121 if (n_bytes % (2 * align) != 0) 3122 { 3123 if ((n_bytes % (2 * align)) >= align) 3124 n_insns++; 3125 3126 if ((n_bytes % align) != 0) 3127 n_insns++; 3128 } 3129 3130 /* Lengths are expressed in bytes now; each insn is 4 bytes. */ 3131 return n_insns * 4; 3132} 3133 3134 3135const char * 3136pa_output_and (rtx *operands) 3137{ 3138 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0) 3139 { 3140 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]); 3141 int ls0, ls1, ms0, p, len; 3142 3143 for (ls0 = 0; ls0 < 32; ls0++) 3144 if ((mask & (1 << ls0)) == 0) 3145 break; 3146 3147 for (ls1 = ls0; ls1 < 32; ls1++) 3148 if ((mask & (1 << ls1)) != 0) 3149 break; 3150 3151 for (ms0 = ls1; ms0 < 32; ms0++) 3152 if ((mask & (1 << ms0)) == 0) 3153 break; 3154 3155 gcc_assert (ms0 == 32); 3156 3157 if (ls1 == 32) 3158 { 3159 len = ls0; 3160 3161 gcc_assert (len); 3162 3163 operands[2] = GEN_INT (len); 3164 return "{extru|extrw,u} %1,31,%2,%0"; 3165 } 3166 else 3167 { 3168 /* We could use this `depi' for the case above as well, but `depi' 3169 requires one more register file access than an `extru'. */ 3170 3171 p = 31 - ls0; 3172 len = ls1 - ls0; 3173 3174 operands[2] = GEN_INT (p); 3175 operands[3] = GEN_INT (len); 3176 return "{depi|depwi} 0,%2,%3,%0"; 3177 } 3178 } 3179 else 3180 return "and %1,%2,%0"; 3181} 3182 3183/* Return a string to perform a bitwise-and of operands[1] with operands[2] 3184 storing the result in operands[0]. */ 3185const char * 3186pa_output_64bit_and (rtx *operands) 3187{ 3188 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0) 3189 { 3190 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]); 3191 int ls0, ls1, ms0, p, len; 3192 3193 for (ls0 = 0; ls0 < HOST_BITS_PER_WIDE_INT; ls0++) 3194 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls0)) == 0) 3195 break; 3196 3197 for (ls1 = ls0; ls1 < HOST_BITS_PER_WIDE_INT; ls1++) 3198 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls1)) != 0) 3199 break; 3200 3201 for (ms0 = ls1; ms0 < HOST_BITS_PER_WIDE_INT; ms0++) 3202 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ms0)) == 0) 3203 break; 3204 3205 gcc_assert (ms0 == HOST_BITS_PER_WIDE_INT); 3206 3207 if (ls1 == HOST_BITS_PER_WIDE_INT) 3208 { 3209 len = ls0; 3210 3211 gcc_assert (len); 3212 3213 operands[2] = GEN_INT (len); 3214 return "extrd,u %1,63,%2,%0"; 3215 } 3216 else 3217 { 3218 /* We could use this `depi' for the case above as well, but `depi' 3219 requires one more register file access than an `extru'. */ 3220 3221 p = 63 - ls0; 3222 len = ls1 - ls0; 3223 3224 operands[2] = GEN_INT (p); 3225 operands[3] = GEN_INT (len); 3226 return "depdi 0,%2,%3,%0"; 3227 } 3228 } 3229 else 3230 return "and %1,%2,%0"; 3231} 3232 3233const char * 3234pa_output_ior (rtx *operands) 3235{ 3236 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]); 3237 int bs0, bs1, p, len; 3238 3239 if (INTVAL (operands[2]) == 0) 3240 return "copy %1,%0"; 3241 3242 for (bs0 = 0; bs0 < 32; bs0++) 3243 if ((mask & (1 << bs0)) != 0) 3244 break; 3245 3246 for (bs1 = bs0; bs1 < 32; bs1++) 3247 if ((mask & (1 << bs1)) == 0) 3248 break; 3249 3250 gcc_assert (bs1 == 32 || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask); 3251 3252 p = 31 - bs0; 3253 len = bs1 - bs0; 3254 3255 operands[2] = GEN_INT (p); 3256 operands[3] = GEN_INT (len); 3257 return "{depi|depwi} -1,%2,%3,%0"; 3258} 3259 3260/* Return a string to perform a bitwise-and of operands[1] with operands[2] 3261 storing the result in operands[0]. */ 3262const char * 3263pa_output_64bit_ior (rtx *operands) 3264{ 3265 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]); 3266 int bs0, bs1, p, len; 3267 3268 if (INTVAL (operands[2]) == 0) 3269 return "copy %1,%0"; 3270 3271 for (bs0 = 0; bs0 < HOST_BITS_PER_WIDE_INT; bs0++) 3272 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs0)) != 0) 3273 break; 3274 3275 for (bs1 = bs0; bs1 < HOST_BITS_PER_WIDE_INT; bs1++) 3276 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs1)) == 0) 3277 break; 3278 3279 gcc_assert (bs1 == HOST_BITS_PER_WIDE_INT 3280 || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask); 3281 3282 p = 63 - bs0; 3283 len = bs1 - bs0; 3284 3285 operands[2] = GEN_INT (p); 3286 operands[3] = GEN_INT (len); 3287 return "depdi -1,%2,%3,%0"; 3288} 3289 3290/* Target hook for assembling integer objects. This code handles 3291 aligned SI and DI integers specially since function references 3292 must be preceded by P%. */ 3293 3294static bool 3295pa_assemble_integer (rtx x, unsigned int size, int aligned_p) 3296{ 3297 if (size == UNITS_PER_WORD 3298 && aligned_p 3299 && function_label_operand (x, VOIDmode)) 3300 { 3301 fputs (size == 8? "\t.dword\t" : "\t.word\t", asm_out_file); 3302 3303 /* We don't want an OPD when generating fast indirect calls. */ 3304 if (!TARGET_FAST_INDIRECT_CALLS) 3305 fputs ("P%", asm_out_file); 3306 3307 output_addr_const (asm_out_file, x); 3308 fputc ('\n', asm_out_file); 3309 return true; 3310 } 3311 return default_assemble_integer (x, size, aligned_p); 3312} 3313 3314/* Output an ascii string. */ 3315void 3316pa_output_ascii (FILE *file, const char *p, int size) 3317{ 3318 int i; 3319 int chars_output; 3320 unsigned char partial_output[16]; /* Max space 4 chars can occupy. */ 3321 3322 /* The HP assembler can only take strings of 256 characters at one 3323 time. This is a limitation on input line length, *not* the 3324 length of the string. Sigh. Even worse, it seems that the 3325 restriction is in number of input characters (see \xnn & 3326 \whatever). So we have to do this very carefully. */ 3327 3328 fputs ("\t.STRING \"", file); 3329 3330 chars_output = 0; 3331 for (i = 0; i < size; i += 4) 3332 { 3333 int co = 0; 3334 int io = 0; 3335 for (io = 0, co = 0; io < MIN (4, size - i); io++) 3336 { 3337 register unsigned int c = (unsigned char) p[i + io]; 3338 3339 if (c == '\"' || c == '\\') 3340 partial_output[co++] = '\\'; 3341 if (c >= ' ' && c < 0177) 3342 partial_output[co++] = c; 3343 else 3344 { 3345 unsigned int hexd; 3346 partial_output[co++] = '\\'; 3347 partial_output[co++] = 'x'; 3348 hexd = c / 16 - 0 + '0'; 3349 if (hexd > '9') 3350 hexd -= '9' - 'a' + 1; 3351 partial_output[co++] = hexd; 3352 hexd = c % 16 - 0 + '0'; 3353 if (hexd > '9') 3354 hexd -= '9' - 'a' + 1; 3355 partial_output[co++] = hexd; 3356 } 3357 } 3358 if (chars_output + co > 243) 3359 { 3360 fputs ("\"\n\t.STRING \"", file); 3361 chars_output = 0; 3362 } 3363 fwrite (partial_output, 1, (size_t) co, file); 3364 chars_output += co; 3365 co = 0; 3366 } 3367 fputs ("\"\n", file); 3368} 3369 3370/* Try to rewrite floating point comparisons & branches to avoid 3371 useless add,tr insns. 3372 3373 CHECK_NOTES is nonzero if we should examine REG_DEAD notes 3374 to see if FPCC is dead. CHECK_NOTES is nonzero for the 3375 first attempt to remove useless add,tr insns. It is zero 3376 for the second pass as reorg sometimes leaves bogus REG_DEAD 3377 notes lying around. 3378 3379 When CHECK_NOTES is zero we can only eliminate add,tr insns 3380 when there's a 1:1 correspondence between fcmp and ftest/fbranch 3381 instructions. */ 3382static void 3383remove_useless_addtr_insns (int check_notes) 3384{ 3385 rtx_insn *insn; 3386 static int pass = 0; 3387 3388 /* This is fairly cheap, so always run it when optimizing. */ 3389 if (optimize > 0) 3390 { 3391 int fcmp_count = 0; 3392 int fbranch_count = 0; 3393 3394 /* Walk all the insns in this function looking for fcmp & fbranch 3395 instructions. Keep track of how many of each we find. */ 3396 for (insn = get_insns (); insn; insn = next_insn (insn)) 3397 { 3398 rtx tmp; 3399 3400 /* Ignore anything that isn't an INSN or a JUMP_INSN. */ 3401 if (! NONJUMP_INSN_P (insn) && ! JUMP_P (insn)) 3402 continue; 3403 3404 tmp = PATTERN (insn); 3405 3406 /* It must be a set. */ 3407 if (GET_CODE (tmp) != SET) 3408 continue; 3409 3410 /* If the destination is CCFP, then we've found an fcmp insn. */ 3411 tmp = SET_DEST (tmp); 3412 if (GET_CODE (tmp) == REG && REGNO (tmp) == 0) 3413 { 3414 fcmp_count++; 3415 continue; 3416 } 3417 3418 tmp = PATTERN (insn); 3419 /* If this is an fbranch instruction, bump the fbranch counter. */ 3420 if (GET_CODE (tmp) == SET 3421 && SET_DEST (tmp) == pc_rtx 3422 && GET_CODE (SET_SRC (tmp)) == IF_THEN_ELSE 3423 && GET_CODE (XEXP (SET_SRC (tmp), 0)) == NE 3424 && GET_CODE (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == REG 3425 && REGNO (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == 0) 3426 { 3427 fbranch_count++; 3428 continue; 3429 } 3430 } 3431 3432 3433 /* Find all floating point compare + branch insns. If possible, 3434 reverse the comparison & the branch to avoid add,tr insns. */ 3435 for (insn = get_insns (); insn; insn = next_insn (insn)) 3436 { 3437 rtx tmp; 3438 rtx_insn *next; 3439 3440 /* Ignore anything that isn't an INSN. */ 3441 if (! NONJUMP_INSN_P (insn)) 3442 continue; 3443 3444 tmp = PATTERN (insn); 3445 3446 /* It must be a set. */ 3447 if (GET_CODE (tmp) != SET) 3448 continue; 3449 3450 /* The destination must be CCFP, which is register zero. */ 3451 tmp = SET_DEST (tmp); 3452 if (GET_CODE (tmp) != REG || REGNO (tmp) != 0) 3453 continue; 3454 3455 /* INSN should be a set of CCFP. 3456 3457 See if the result of this insn is used in a reversed FP 3458 conditional branch. If so, reverse our condition and 3459 the branch. Doing so avoids useless add,tr insns. */ 3460 next = next_insn (insn); 3461 while (next) 3462 { 3463 /* Jumps, calls and labels stop our search. */ 3464 if (JUMP_P (next) || CALL_P (next) || LABEL_P (next)) 3465 break; 3466 3467 /* As does another fcmp insn. */ 3468 if (NONJUMP_INSN_P (next) 3469 && GET_CODE (PATTERN (next)) == SET 3470 && GET_CODE (SET_DEST (PATTERN (next))) == REG 3471 && REGNO (SET_DEST (PATTERN (next))) == 0) 3472 break; 3473 3474 next = next_insn (next); 3475 } 3476 3477 /* Is NEXT_INSN a branch? */ 3478 if (next && JUMP_P (next)) 3479 { 3480 rtx pattern = PATTERN (next); 3481 3482 /* If it a reversed fp conditional branch (e.g. uses add,tr) 3483 and CCFP dies, then reverse our conditional and the branch 3484 to avoid the add,tr. */ 3485 if (GET_CODE (pattern) == SET 3486 && SET_DEST (pattern) == pc_rtx 3487 && GET_CODE (SET_SRC (pattern)) == IF_THEN_ELSE 3488 && GET_CODE (XEXP (SET_SRC (pattern), 0)) == NE 3489 && GET_CODE (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == REG 3490 && REGNO (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == 0 3491 && GET_CODE (XEXP (SET_SRC (pattern), 1)) == PC 3492 && (fcmp_count == fbranch_count 3493 || (check_notes 3494 && find_regno_note (next, REG_DEAD, 0)))) 3495 { 3496 /* Reverse the branch. */ 3497 tmp = XEXP (SET_SRC (pattern), 1); 3498 XEXP (SET_SRC (pattern), 1) = XEXP (SET_SRC (pattern), 2); 3499 XEXP (SET_SRC (pattern), 2) = tmp; 3500 INSN_CODE (next) = -1; 3501 3502 /* Reverse our condition. */ 3503 tmp = PATTERN (insn); 3504 PUT_CODE (XEXP (tmp, 1), 3505 (reverse_condition_maybe_unordered 3506 (GET_CODE (XEXP (tmp, 1))))); 3507 } 3508 } 3509 } 3510 } 3511 3512 pass = !pass; 3513 3514} 3515 3516/* You may have trouble believing this, but this is the 32 bit HP-PA 3517 stack layout. Wow. 3518 3519 Offset Contents 3520 3521 Variable arguments (optional; any number may be allocated) 3522 3523 SP-(4*(N+9)) arg word N 3524 : : 3525 SP-56 arg word 5 3526 SP-52 arg word 4 3527 3528 Fixed arguments (must be allocated; may remain unused) 3529 3530 SP-48 arg word 3 3531 SP-44 arg word 2 3532 SP-40 arg word 1 3533 SP-36 arg word 0 3534 3535 Frame Marker 3536 3537 SP-32 External Data Pointer (DP) 3538 SP-28 External sr4 3539 SP-24 External/stub RP (RP') 3540 SP-20 Current RP 3541 SP-16 Static Link 3542 SP-12 Clean up 3543 SP-8 Calling Stub RP (RP'') 3544 SP-4 Previous SP 3545 3546 Top of Frame 3547 3548 SP-0 Stack Pointer (points to next available address) 3549 3550*/ 3551 3552/* This function saves registers as follows. Registers marked with ' are 3553 this function's registers (as opposed to the previous function's). 3554 If a frame_pointer isn't needed, r4 is saved as a general register; 3555 the space for the frame pointer is still allocated, though, to keep 3556 things simple. 3557 3558 3559 Top of Frame 3560 3561 SP (FP') Previous FP 3562 SP + 4 Alignment filler (sigh) 3563 SP + 8 Space for locals reserved here. 3564 . 3565 . 3566 . 3567 SP + n All call saved register used. 3568 . 3569 . 3570 . 3571 SP + o All call saved fp registers used. 3572 . 3573 . 3574 . 3575 SP + p (SP') points to next available address. 3576 3577*/ 3578 3579/* Global variables set by output_function_prologue(). */ 3580/* Size of frame. Need to know this to emit return insns from 3581 leaf procedures. */ 3582static HOST_WIDE_INT actual_fsize, local_fsize; 3583static int save_fregs; 3584 3585/* Emit RTL to store REG at the memory location specified by BASE+DISP. 3586 Handle case where DISP > 8k by using the add_high_const patterns. 3587 3588 Note in DISP > 8k case, we will leave the high part of the address 3589 in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/ 3590 3591static void 3592store_reg (int reg, HOST_WIDE_INT disp, int base) 3593{ 3594 rtx dest, src, basereg; 3595 rtx_insn *insn; 3596 3597 src = gen_rtx_REG (word_mode, reg); 3598 basereg = gen_rtx_REG (Pmode, base); 3599 if (VAL_14_BITS_P (disp)) 3600 { 3601 dest = gen_rtx_MEM (word_mode, plus_constant (Pmode, basereg, disp)); 3602 insn = emit_move_insn (dest, src); 3603 } 3604 else if (TARGET_64BIT && !VAL_32_BITS_P (disp)) 3605 { 3606 rtx delta = GEN_INT (disp); 3607 rtx tmpreg = gen_rtx_REG (Pmode, 1); 3608 3609 emit_move_insn (tmpreg, delta); 3610 insn = emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg)); 3611 if (DO_FRAME_NOTES) 3612 { 3613 add_reg_note (insn, REG_FRAME_RELATED_EXPR, 3614 gen_rtx_SET (VOIDmode, tmpreg, 3615 gen_rtx_PLUS (Pmode, basereg, delta))); 3616 RTX_FRAME_RELATED_P (insn) = 1; 3617 } 3618 dest = gen_rtx_MEM (word_mode, tmpreg); 3619 insn = emit_move_insn (dest, src); 3620 } 3621 else 3622 { 3623 rtx delta = GEN_INT (disp); 3624 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta)); 3625 rtx tmpreg = gen_rtx_REG (Pmode, 1); 3626 3627 emit_move_insn (tmpreg, high); 3628 dest = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta)); 3629 insn = emit_move_insn (dest, src); 3630 if (DO_FRAME_NOTES) 3631 add_reg_note (insn, REG_FRAME_RELATED_EXPR, 3632 gen_rtx_SET (VOIDmode, 3633 gen_rtx_MEM (word_mode, 3634 gen_rtx_PLUS (word_mode, 3635 basereg, 3636 delta)), 3637 src)); 3638 } 3639 3640 if (DO_FRAME_NOTES) 3641 RTX_FRAME_RELATED_P (insn) = 1; 3642} 3643 3644/* Emit RTL to store REG at the memory location specified by BASE and then 3645 add MOD to BASE. MOD must be <= 8k. */ 3646 3647static void 3648store_reg_modify (int base, int reg, HOST_WIDE_INT mod) 3649{ 3650 rtx basereg, srcreg, delta; 3651 rtx_insn *insn; 3652 3653 gcc_assert (VAL_14_BITS_P (mod)); 3654 3655 basereg = gen_rtx_REG (Pmode, base); 3656 srcreg = gen_rtx_REG (word_mode, reg); 3657 delta = GEN_INT (mod); 3658 3659 insn = emit_insn (gen_post_store (basereg, srcreg, delta)); 3660 if (DO_FRAME_NOTES) 3661 { 3662 RTX_FRAME_RELATED_P (insn) = 1; 3663 3664 /* RTX_FRAME_RELATED_P must be set on each frame related set 3665 in a parallel with more than one element. */ 3666 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 0)) = 1; 3667 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1; 3668 } 3669} 3670 3671/* Emit RTL to set REG to the value specified by BASE+DISP. Handle case 3672 where DISP > 8k by using the add_high_const patterns. NOTE indicates 3673 whether to add a frame note or not. 3674 3675 In the DISP > 8k case, we leave the high part of the address in %r1. 3676 There is code in expand_hppa_{prologue,epilogue} that knows about this. */ 3677 3678static void 3679set_reg_plus_d (int reg, int base, HOST_WIDE_INT disp, int note) 3680{ 3681 rtx_insn *insn; 3682 3683 if (VAL_14_BITS_P (disp)) 3684 { 3685 insn = emit_move_insn (gen_rtx_REG (Pmode, reg), 3686 plus_constant (Pmode, 3687 gen_rtx_REG (Pmode, base), disp)); 3688 } 3689 else if (TARGET_64BIT && !VAL_32_BITS_P (disp)) 3690 { 3691 rtx basereg = gen_rtx_REG (Pmode, base); 3692 rtx delta = GEN_INT (disp); 3693 rtx tmpreg = gen_rtx_REG (Pmode, 1); 3694 3695 emit_move_insn (tmpreg, delta); 3696 insn = emit_move_insn (gen_rtx_REG (Pmode, reg), 3697 gen_rtx_PLUS (Pmode, tmpreg, basereg)); 3698 if (DO_FRAME_NOTES) 3699 add_reg_note (insn, REG_FRAME_RELATED_EXPR, 3700 gen_rtx_SET (VOIDmode, tmpreg, 3701 gen_rtx_PLUS (Pmode, basereg, delta))); 3702 } 3703 else 3704 { 3705 rtx basereg = gen_rtx_REG (Pmode, base); 3706 rtx delta = GEN_INT (disp); 3707 rtx tmpreg = gen_rtx_REG (Pmode, 1); 3708 3709 emit_move_insn (tmpreg, 3710 gen_rtx_PLUS (Pmode, basereg, 3711 gen_rtx_HIGH (Pmode, delta))); 3712 insn = emit_move_insn (gen_rtx_REG (Pmode, reg), 3713 gen_rtx_LO_SUM (Pmode, tmpreg, delta)); 3714 } 3715 3716 if (DO_FRAME_NOTES && note) 3717 RTX_FRAME_RELATED_P (insn) = 1; 3718} 3719 3720HOST_WIDE_INT 3721pa_compute_frame_size (HOST_WIDE_INT size, int *fregs_live) 3722{ 3723 int freg_saved = 0; 3724 int i, j; 3725 3726 /* The code in pa_expand_prologue and pa_expand_epilogue must 3727 be consistent with the rounding and size calculation done here. 3728 Change them at the same time. */ 3729 3730 /* We do our own stack alignment. First, round the size of the 3731 stack locals up to a word boundary. */ 3732 size = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1); 3733 3734 /* Space for previous frame pointer + filler. If any frame is 3735 allocated, we need to add in the STARTING_FRAME_OFFSET. We 3736 waste some space here for the sake of HP compatibility. The 3737 first slot is only used when the frame pointer is needed. */ 3738 if (size || frame_pointer_needed) 3739 size += STARTING_FRAME_OFFSET; 3740 3741 /* If the current function calls __builtin_eh_return, then we need 3742 to allocate stack space for registers that will hold data for 3743 the exception handler. */ 3744 if (DO_FRAME_NOTES && crtl->calls_eh_return) 3745 { 3746 unsigned int i; 3747 3748 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i) 3749 continue; 3750 size += i * UNITS_PER_WORD; 3751 } 3752 3753 /* Account for space used by the callee general register saves. */ 3754 for (i = 18, j = frame_pointer_needed ? 4 : 3; i >= j; i--) 3755 if (df_regs_ever_live_p (i)) 3756 size += UNITS_PER_WORD; 3757 3758 /* Account for space used by the callee floating point register saves. */ 3759 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP) 3760 if (df_regs_ever_live_p (i) 3761 || (!TARGET_64BIT && df_regs_ever_live_p (i + 1))) 3762 { 3763 freg_saved = 1; 3764 3765 /* We always save both halves of the FP register, so always 3766 increment the frame size by 8 bytes. */ 3767 size += 8; 3768 } 3769 3770 /* If any of the floating registers are saved, account for the 3771 alignment needed for the floating point register save block. */ 3772 if (freg_saved) 3773 { 3774 size = (size + 7) & ~7; 3775 if (fregs_live) 3776 *fregs_live = 1; 3777 } 3778 3779 /* The various ABIs include space for the outgoing parameters in the 3780 size of the current function's stack frame. We don't need to align 3781 for the outgoing arguments as their alignment is set by the final 3782 rounding for the frame as a whole. */ 3783 size += crtl->outgoing_args_size; 3784 3785 /* Allocate space for the fixed frame marker. This space must be 3786 allocated for any function that makes calls or allocates 3787 stack space. */ 3788 if (!crtl->is_leaf || size) 3789 size += TARGET_64BIT ? 48 : 32; 3790 3791 /* Finally, round to the preferred stack boundary. */ 3792 return ((size + PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1) 3793 & ~(PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1)); 3794} 3795 3796/* Generate the assembly code for function entry. FILE is a stdio 3797 stream to output the code to. SIZE is an int: how many units of 3798 temporary storage to allocate. 3799 3800 Refer to the array `regs_ever_live' to determine which registers to 3801 save; `regs_ever_live[I]' is nonzero if register number I is ever 3802 used in the function. This function is responsible for knowing 3803 which registers should not be saved even if used. */ 3804 3805/* On HP-PA, move-double insns between fpu and cpu need an 8-byte block 3806 of memory. If any fpu reg is used in the function, we allocate 3807 such a block here, at the bottom of the frame, just in case it's needed. 3808 3809 If this function is a leaf procedure, then we may choose not 3810 to do a "save" insn. The decision about whether or not 3811 to do this is made in regclass.c. */ 3812 3813static void 3814pa_output_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED) 3815{ 3816 /* The function's label and associated .PROC must never be 3817 separated and must be output *after* any profiling declarations 3818 to avoid changing spaces/subspaces within a procedure. */ 3819 ASM_OUTPUT_LABEL (file, XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0)); 3820 fputs ("\t.PROC\n", file); 3821 3822 /* pa_expand_prologue does the dirty work now. We just need 3823 to output the assembler directives which denote the start 3824 of a function. */ 3825 fprintf (file, "\t.CALLINFO FRAME=" HOST_WIDE_INT_PRINT_DEC, actual_fsize); 3826 if (crtl->is_leaf) 3827 fputs (",NO_CALLS", file); 3828 else 3829 fputs (",CALLS", file); 3830 if (rp_saved) 3831 fputs (",SAVE_RP", file); 3832 3833 /* The SAVE_SP flag is used to indicate that register %r3 is stored 3834 at the beginning of the frame and that it is used as the frame 3835 pointer for the frame. We do this because our current frame 3836 layout doesn't conform to that specified in the HP runtime 3837 documentation and we need a way to indicate to programs such as 3838 GDB where %r3 is saved. The SAVE_SP flag was chosen because it 3839 isn't used by HP compilers but is supported by the assembler. 3840 However, SAVE_SP is supposed to indicate that the previous stack 3841 pointer has been saved in the frame marker. */ 3842 if (frame_pointer_needed) 3843 fputs (",SAVE_SP", file); 3844 3845 /* Pass on information about the number of callee register saves 3846 performed in the prologue. 3847 3848 The compiler is supposed to pass the highest register number 3849 saved, the assembler then has to adjust that number before 3850 entering it into the unwind descriptor (to account for any 3851 caller saved registers with lower register numbers than the 3852 first callee saved register). */ 3853 if (gr_saved) 3854 fprintf (file, ",ENTRY_GR=%d", gr_saved + 2); 3855 3856 if (fr_saved) 3857 fprintf (file, ",ENTRY_FR=%d", fr_saved + 11); 3858 3859 fputs ("\n\t.ENTRY\n", file); 3860 3861 remove_useless_addtr_insns (0); 3862} 3863 3864void 3865pa_expand_prologue (void) 3866{ 3867 int merge_sp_adjust_with_store = 0; 3868 HOST_WIDE_INT size = get_frame_size (); 3869 HOST_WIDE_INT offset; 3870 int i; 3871 rtx tmpreg; 3872 rtx_insn *insn; 3873 3874 gr_saved = 0; 3875 fr_saved = 0; 3876 save_fregs = 0; 3877 3878 /* Compute total size for frame pointer, filler, locals and rounding to 3879 the next word boundary. Similar code appears in pa_compute_frame_size 3880 and must be changed in tandem with this code. */ 3881 local_fsize = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1); 3882 if (local_fsize || frame_pointer_needed) 3883 local_fsize += STARTING_FRAME_OFFSET; 3884 3885 actual_fsize = pa_compute_frame_size (size, &save_fregs); 3886 if (flag_stack_usage_info) 3887 current_function_static_stack_size = actual_fsize; 3888 3889 /* Compute a few things we will use often. */ 3890 tmpreg = gen_rtx_REG (word_mode, 1); 3891 3892 /* Save RP first. The calling conventions manual states RP will 3893 always be stored into the caller's frame at sp - 20 or sp - 16 3894 depending on which ABI is in use. */ 3895 if (df_regs_ever_live_p (2) || crtl->calls_eh_return) 3896 { 3897 store_reg (2, TARGET_64BIT ? -16 : -20, STACK_POINTER_REGNUM); 3898 rp_saved = true; 3899 } 3900 else 3901 rp_saved = false; 3902 3903 /* Allocate the local frame and set up the frame pointer if needed. */ 3904 if (actual_fsize != 0) 3905 { 3906 if (frame_pointer_needed) 3907 { 3908 /* Copy the old frame pointer temporarily into %r1. Set up the 3909 new stack pointer, then store away the saved old frame pointer 3910 into the stack at sp and at the same time update the stack 3911 pointer by actual_fsize bytes. Two versions, first 3912 handles small (<8k) frames. The second handles large (>=8k) 3913 frames. */ 3914 insn = emit_move_insn (tmpreg, hard_frame_pointer_rtx); 3915 if (DO_FRAME_NOTES) 3916 RTX_FRAME_RELATED_P (insn) = 1; 3917 3918 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx); 3919 if (DO_FRAME_NOTES) 3920 RTX_FRAME_RELATED_P (insn) = 1; 3921 3922 if (VAL_14_BITS_P (actual_fsize)) 3923 store_reg_modify (STACK_POINTER_REGNUM, 1, actual_fsize); 3924 else 3925 { 3926 /* It is incorrect to store the saved frame pointer at *sp, 3927 then increment sp (writes beyond the current stack boundary). 3928 3929 So instead use stwm to store at *sp and post-increment the 3930 stack pointer as an atomic operation. Then increment sp to 3931 finish allocating the new frame. */ 3932 HOST_WIDE_INT adjust1 = 8192 - 64; 3933 HOST_WIDE_INT adjust2 = actual_fsize - adjust1; 3934 3935 store_reg_modify (STACK_POINTER_REGNUM, 1, adjust1); 3936 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM, 3937 adjust2, 1); 3938 } 3939 3940 /* We set SAVE_SP in frames that need a frame pointer. Thus, 3941 we need to store the previous stack pointer (frame pointer) 3942 into the frame marker on targets that use the HP unwind 3943 library. This allows the HP unwind library to be used to 3944 unwind GCC frames. However, we are not fully compatible 3945 with the HP library because our frame layout differs from 3946 that specified in the HP runtime specification. 3947 3948 We don't want a frame note on this instruction as the frame 3949 marker moves during dynamic stack allocation. 3950 3951 This instruction also serves as a blockage to prevent 3952 register spills from being scheduled before the stack 3953 pointer is raised. This is necessary as we store 3954 registers using the frame pointer as a base register, 3955 and the frame pointer is set before sp is raised. */ 3956 if (TARGET_HPUX_UNWIND_LIBRARY) 3957 { 3958 rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx, 3959 GEN_INT (TARGET_64BIT ? -8 : -4)); 3960 3961 emit_move_insn (gen_rtx_MEM (word_mode, addr), 3962 hard_frame_pointer_rtx); 3963 } 3964 else 3965 emit_insn (gen_blockage ()); 3966 } 3967 /* no frame pointer needed. */ 3968 else 3969 { 3970 /* In some cases we can perform the first callee register save 3971 and allocating the stack frame at the same time. If so, just 3972 make a note of it and defer allocating the frame until saving 3973 the callee registers. */ 3974 if (VAL_14_BITS_P (actual_fsize) && local_fsize == 0) 3975 merge_sp_adjust_with_store = 1; 3976 /* Can not optimize. Adjust the stack frame by actual_fsize 3977 bytes. */ 3978 else 3979 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM, 3980 actual_fsize, 1); 3981 } 3982 } 3983 3984 /* Normal register save. 3985 3986 Do not save the frame pointer in the frame_pointer_needed case. It 3987 was done earlier. */ 3988 if (frame_pointer_needed) 3989 { 3990 offset = local_fsize; 3991 3992 /* Saving the EH return data registers in the frame is the simplest 3993 way to get the frame unwind information emitted. We put them 3994 just before the general registers. */ 3995 if (DO_FRAME_NOTES && crtl->calls_eh_return) 3996 { 3997 unsigned int i, regno; 3998 3999 for (i = 0; ; ++i) 4000 { 4001 regno = EH_RETURN_DATA_REGNO (i); 4002 if (regno == INVALID_REGNUM) 4003 break; 4004 4005 store_reg (regno, offset, HARD_FRAME_POINTER_REGNUM); 4006 offset += UNITS_PER_WORD; 4007 } 4008 } 4009 4010 for (i = 18; i >= 4; i--) 4011 if (df_regs_ever_live_p (i) && ! call_used_regs[i]) 4012 { 4013 store_reg (i, offset, HARD_FRAME_POINTER_REGNUM); 4014 offset += UNITS_PER_WORD; 4015 gr_saved++; 4016 } 4017 /* Account for %r3 which is saved in a special place. */ 4018 gr_saved++; 4019 } 4020 /* No frame pointer needed. */ 4021 else 4022 { 4023 offset = local_fsize - actual_fsize; 4024 4025 /* Saving the EH return data registers in the frame is the simplest 4026 way to get the frame unwind information emitted. */ 4027 if (DO_FRAME_NOTES && crtl->calls_eh_return) 4028 { 4029 unsigned int i, regno; 4030 4031 for (i = 0; ; ++i) 4032 { 4033 regno = EH_RETURN_DATA_REGNO (i); 4034 if (regno == INVALID_REGNUM) 4035 break; 4036 4037 /* If merge_sp_adjust_with_store is nonzero, then we can 4038 optimize the first save. */ 4039 if (merge_sp_adjust_with_store) 4040 { 4041 store_reg_modify (STACK_POINTER_REGNUM, regno, -offset); 4042 merge_sp_adjust_with_store = 0; 4043 } 4044 else 4045 store_reg (regno, offset, STACK_POINTER_REGNUM); 4046 offset += UNITS_PER_WORD; 4047 } 4048 } 4049 4050 for (i = 18; i >= 3; i--) 4051 if (df_regs_ever_live_p (i) && ! call_used_regs[i]) 4052 { 4053 /* If merge_sp_adjust_with_store is nonzero, then we can 4054 optimize the first GR save. */ 4055 if (merge_sp_adjust_with_store) 4056 { 4057 store_reg_modify (STACK_POINTER_REGNUM, i, -offset); 4058 merge_sp_adjust_with_store = 0; 4059 } 4060 else 4061 store_reg (i, offset, STACK_POINTER_REGNUM); 4062 offset += UNITS_PER_WORD; 4063 gr_saved++; 4064 } 4065 4066 /* If we wanted to merge the SP adjustment with a GR save, but we never 4067 did any GR saves, then just emit the adjustment here. */ 4068 if (merge_sp_adjust_with_store) 4069 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM, 4070 actual_fsize, 1); 4071 } 4072 4073 /* The hppa calling conventions say that %r19, the pic offset 4074 register, is saved at sp - 32 (in this function's frame) 4075 when generating PIC code. FIXME: What is the correct thing 4076 to do for functions which make no calls and allocate no 4077 frame? Do we need to allocate a frame, or can we just omit 4078 the save? For now we'll just omit the save. 4079 4080 We don't want a note on this insn as the frame marker can 4081 move if there is a dynamic stack allocation. */ 4082 if (flag_pic && actual_fsize != 0 && !TARGET_64BIT) 4083 { 4084 rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx, GEN_INT (-32)); 4085 4086 emit_move_insn (gen_rtx_MEM (word_mode, addr), pic_offset_table_rtx); 4087 4088 } 4089 4090 /* Align pointer properly (doubleword boundary). */ 4091 offset = (offset + 7) & ~7; 4092 4093 /* Floating point register store. */ 4094 if (save_fregs) 4095 { 4096 rtx base; 4097 4098 /* First get the frame or stack pointer to the start of the FP register 4099 save area. */ 4100 if (frame_pointer_needed) 4101 { 4102 set_reg_plus_d (1, HARD_FRAME_POINTER_REGNUM, offset, 0); 4103 base = hard_frame_pointer_rtx; 4104 } 4105 else 4106 { 4107 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0); 4108 base = stack_pointer_rtx; 4109 } 4110 4111 /* Now actually save the FP registers. */ 4112 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP) 4113 { 4114 if (df_regs_ever_live_p (i) 4115 || (! TARGET_64BIT && df_regs_ever_live_p (i + 1))) 4116 { 4117 rtx addr, reg; 4118 rtx_insn *insn; 4119 addr = gen_rtx_MEM (DFmode, 4120 gen_rtx_POST_INC (word_mode, tmpreg)); 4121 reg = gen_rtx_REG (DFmode, i); 4122 insn = emit_move_insn (addr, reg); 4123 if (DO_FRAME_NOTES) 4124 { 4125 RTX_FRAME_RELATED_P (insn) = 1; 4126 if (TARGET_64BIT) 4127 { 4128 rtx mem = gen_rtx_MEM (DFmode, 4129 plus_constant (Pmode, base, 4130 offset)); 4131 add_reg_note (insn, REG_FRAME_RELATED_EXPR, 4132 gen_rtx_SET (VOIDmode, mem, reg)); 4133 } 4134 else 4135 { 4136 rtx meml = gen_rtx_MEM (SFmode, 4137 plus_constant (Pmode, base, 4138 offset)); 4139 rtx memr = gen_rtx_MEM (SFmode, 4140 plus_constant (Pmode, base, 4141 offset + 4)); 4142 rtx regl = gen_rtx_REG (SFmode, i); 4143 rtx regr = gen_rtx_REG (SFmode, i + 1); 4144 rtx setl = gen_rtx_SET (VOIDmode, meml, regl); 4145 rtx setr = gen_rtx_SET (VOIDmode, memr, regr); 4146 rtvec vec; 4147 4148 RTX_FRAME_RELATED_P (setl) = 1; 4149 RTX_FRAME_RELATED_P (setr) = 1; 4150 vec = gen_rtvec (2, setl, setr); 4151 add_reg_note (insn, REG_FRAME_RELATED_EXPR, 4152 gen_rtx_SEQUENCE (VOIDmode, vec)); 4153 } 4154 } 4155 offset += GET_MODE_SIZE (DFmode); 4156 fr_saved++; 4157 } 4158 } 4159 } 4160} 4161 4162/* Emit RTL to load REG from the memory location specified by BASE+DISP. 4163 Handle case where DISP > 8k by using the add_high_const patterns. */ 4164 4165static void 4166load_reg (int reg, HOST_WIDE_INT disp, int base) 4167{ 4168 rtx dest = gen_rtx_REG (word_mode, reg); 4169 rtx basereg = gen_rtx_REG (Pmode, base); 4170 rtx src; 4171 4172 if (VAL_14_BITS_P (disp)) 4173 src = gen_rtx_MEM (word_mode, plus_constant (Pmode, basereg, disp)); 4174 else if (TARGET_64BIT && !VAL_32_BITS_P (disp)) 4175 { 4176 rtx delta = GEN_INT (disp); 4177 rtx tmpreg = gen_rtx_REG (Pmode, 1); 4178 4179 emit_move_insn (tmpreg, delta); 4180 if (TARGET_DISABLE_INDEXING) 4181 { 4182 emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg)); 4183 src = gen_rtx_MEM (word_mode, tmpreg); 4184 } 4185 else 4186 src = gen_rtx_MEM (word_mode, gen_rtx_PLUS (Pmode, tmpreg, basereg)); 4187 } 4188 else 4189 { 4190 rtx delta = GEN_INT (disp); 4191 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta)); 4192 rtx tmpreg = gen_rtx_REG (Pmode, 1); 4193 4194 emit_move_insn (tmpreg, high); 4195 src = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta)); 4196 } 4197 4198 emit_move_insn (dest, src); 4199} 4200 4201/* Update the total code bytes output to the text section. */ 4202 4203static void 4204update_total_code_bytes (unsigned int nbytes) 4205{ 4206 if ((TARGET_PORTABLE_RUNTIME || !TARGET_GAS || !TARGET_SOM) 4207 && !IN_NAMED_SECTION_P (cfun->decl)) 4208 { 4209 unsigned int old_total = total_code_bytes; 4210 4211 total_code_bytes += nbytes; 4212 4213 /* Be prepared to handle overflows. */ 4214 if (old_total > total_code_bytes) 4215 total_code_bytes = UINT_MAX; 4216 } 4217} 4218 4219/* This function generates the assembly code for function exit. 4220 Args are as for output_function_prologue (). 4221 4222 The function epilogue should not depend on the current stack 4223 pointer! It should use the frame pointer only. This is mandatory 4224 because of alloca; we also take advantage of it to omit stack 4225 adjustments before returning. */ 4226 4227static void 4228pa_output_function_epilogue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED) 4229{ 4230 rtx_insn *insn = get_last_insn (); 4231 bool extra_nop; 4232 4233 /* pa_expand_epilogue does the dirty work now. We just need 4234 to output the assembler directives which denote the end 4235 of a function. 4236 4237 To make debuggers happy, emit a nop if the epilogue was completely 4238 eliminated due to a volatile call as the last insn in the 4239 current function. That way the return address (in %r2) will 4240 always point to a valid instruction in the current function. */ 4241 4242 /* Get the last real insn. */ 4243 if (NOTE_P (insn)) 4244 insn = prev_real_insn (insn); 4245 4246 /* If it is a sequence, then look inside. */ 4247 if (insn && NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == SEQUENCE) 4248 insn = as_a <rtx_sequence *> (PATTERN (insn))-> insn (0); 4249 4250 /* If insn is a CALL_INSN, then it must be a call to a volatile 4251 function (otherwise there would be epilogue insns). */ 4252 if (insn && CALL_P (insn)) 4253 { 4254 fputs ("\tnop\n", file); 4255 extra_nop = true; 4256 } 4257 else 4258 extra_nop = false; 4259 4260 fputs ("\t.EXIT\n\t.PROCEND\n", file); 4261 4262 if (TARGET_SOM && TARGET_GAS) 4263 { 4264 /* We are done with this subspace except possibly for some additional 4265 debug information. Forget that we are in this subspace to ensure 4266 that the next function is output in its own subspace. */ 4267 in_section = NULL; 4268 cfun->machine->in_nsubspa = 2; 4269 } 4270 4271 /* Thunks do their own insn accounting. */ 4272 if (cfun->is_thunk) 4273 return; 4274 4275 if (INSN_ADDRESSES_SET_P ()) 4276 { 4277 last_address = extra_nop ? 4 : 0; 4278 insn = get_last_nonnote_insn (); 4279 if (insn) 4280 { 4281 last_address += INSN_ADDRESSES (INSN_UID (insn)); 4282 if (INSN_P (insn)) 4283 last_address += insn_default_length (insn); 4284 } 4285 last_address = ((last_address + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1) 4286 & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)); 4287 } 4288 else 4289 last_address = UINT_MAX; 4290 4291 /* Finally, update the total number of code bytes output so far. */ 4292 update_total_code_bytes (last_address); 4293} 4294 4295void 4296pa_expand_epilogue (void) 4297{ 4298 rtx tmpreg; 4299 HOST_WIDE_INT offset; 4300 HOST_WIDE_INT ret_off = 0; 4301 int i; 4302 int merge_sp_adjust_with_load = 0; 4303 4304 /* We will use this often. */ 4305 tmpreg = gen_rtx_REG (word_mode, 1); 4306 4307 /* Try to restore RP early to avoid load/use interlocks when 4308 RP gets used in the return (bv) instruction. This appears to still 4309 be necessary even when we schedule the prologue and epilogue. */ 4310 if (rp_saved) 4311 { 4312 ret_off = TARGET_64BIT ? -16 : -20; 4313 if (frame_pointer_needed) 4314 { 4315 load_reg (2, ret_off, HARD_FRAME_POINTER_REGNUM); 4316 ret_off = 0; 4317 } 4318 else 4319 { 4320 /* No frame pointer, and stack is smaller than 8k. */ 4321 if (VAL_14_BITS_P (ret_off - actual_fsize)) 4322 { 4323 load_reg (2, ret_off - actual_fsize, STACK_POINTER_REGNUM); 4324 ret_off = 0; 4325 } 4326 } 4327 } 4328 4329 /* General register restores. */ 4330 if (frame_pointer_needed) 4331 { 4332 offset = local_fsize; 4333 4334 /* If the current function calls __builtin_eh_return, then we need 4335 to restore the saved EH data registers. */ 4336 if (DO_FRAME_NOTES && crtl->calls_eh_return) 4337 { 4338 unsigned int i, regno; 4339 4340 for (i = 0; ; ++i) 4341 { 4342 regno = EH_RETURN_DATA_REGNO (i); 4343 if (regno == INVALID_REGNUM) 4344 break; 4345 4346 load_reg (regno, offset, HARD_FRAME_POINTER_REGNUM); 4347 offset += UNITS_PER_WORD; 4348 } 4349 } 4350 4351 for (i = 18; i >= 4; i--) 4352 if (df_regs_ever_live_p (i) && ! call_used_regs[i]) 4353 { 4354 load_reg (i, offset, HARD_FRAME_POINTER_REGNUM); 4355 offset += UNITS_PER_WORD; 4356 } 4357 } 4358 else 4359 { 4360 offset = local_fsize - actual_fsize; 4361 4362 /* If the current function calls __builtin_eh_return, then we need 4363 to restore the saved EH data registers. */ 4364 if (DO_FRAME_NOTES && crtl->calls_eh_return) 4365 { 4366 unsigned int i, regno; 4367 4368 for (i = 0; ; ++i) 4369 { 4370 regno = EH_RETURN_DATA_REGNO (i); 4371 if (regno == INVALID_REGNUM) 4372 break; 4373 4374 /* Only for the first load. 4375 merge_sp_adjust_with_load holds the register load 4376 with which we will merge the sp adjustment. */ 4377 if (merge_sp_adjust_with_load == 0 4378 && local_fsize == 0 4379 && VAL_14_BITS_P (-actual_fsize)) 4380 merge_sp_adjust_with_load = regno; 4381 else 4382 load_reg (regno, offset, STACK_POINTER_REGNUM); 4383 offset += UNITS_PER_WORD; 4384 } 4385 } 4386 4387 for (i = 18; i >= 3; i--) 4388 { 4389 if (df_regs_ever_live_p (i) && ! call_used_regs[i]) 4390 { 4391 /* Only for the first load. 4392 merge_sp_adjust_with_load holds the register load 4393 with which we will merge the sp adjustment. */ 4394 if (merge_sp_adjust_with_load == 0 4395 && local_fsize == 0 4396 && VAL_14_BITS_P (-actual_fsize)) 4397 merge_sp_adjust_with_load = i; 4398 else 4399 load_reg (i, offset, STACK_POINTER_REGNUM); 4400 offset += UNITS_PER_WORD; 4401 } 4402 } 4403 } 4404 4405 /* Align pointer properly (doubleword boundary). */ 4406 offset = (offset + 7) & ~7; 4407 4408 /* FP register restores. */ 4409 if (save_fregs) 4410 { 4411 /* Adjust the register to index off of. */ 4412 if (frame_pointer_needed) 4413 set_reg_plus_d (1, HARD_FRAME_POINTER_REGNUM, offset, 0); 4414 else 4415 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0); 4416 4417 /* Actually do the restores now. */ 4418 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP) 4419 if (df_regs_ever_live_p (i) 4420 || (! TARGET_64BIT && df_regs_ever_live_p (i + 1))) 4421 { 4422 rtx src = gen_rtx_MEM (DFmode, 4423 gen_rtx_POST_INC (word_mode, tmpreg)); 4424 rtx dest = gen_rtx_REG (DFmode, i); 4425 emit_move_insn (dest, src); 4426 } 4427 } 4428 4429 /* Emit a blockage insn here to keep these insns from being moved to 4430 an earlier spot in the epilogue, or into the main instruction stream. 4431 4432 This is necessary as we must not cut the stack back before all the 4433 restores are finished. */ 4434 emit_insn (gen_blockage ()); 4435 4436 /* Reset stack pointer (and possibly frame pointer). The stack 4437 pointer is initially set to fp + 64 to avoid a race condition. */ 4438 if (frame_pointer_needed) 4439 { 4440 rtx delta = GEN_INT (-64); 4441 4442 set_reg_plus_d (STACK_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM, 64, 0); 4443 emit_insn (gen_pre_load (hard_frame_pointer_rtx, 4444 stack_pointer_rtx, delta)); 4445 } 4446 /* If we were deferring a callee register restore, do it now. */ 4447 else if (merge_sp_adjust_with_load) 4448 { 4449 rtx delta = GEN_INT (-actual_fsize); 4450 rtx dest = gen_rtx_REG (word_mode, merge_sp_adjust_with_load); 4451 4452 emit_insn (gen_pre_load (dest, stack_pointer_rtx, delta)); 4453 } 4454 else if (actual_fsize != 0) 4455 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM, 4456 - actual_fsize, 0); 4457 4458 /* If we haven't restored %r2 yet (no frame pointer, and a stack 4459 frame greater than 8k), do so now. */ 4460 if (ret_off != 0) 4461 load_reg (2, ret_off, STACK_POINTER_REGNUM); 4462 4463 if (DO_FRAME_NOTES && crtl->calls_eh_return) 4464 { 4465 rtx sa = EH_RETURN_STACKADJ_RTX; 4466 4467 emit_insn (gen_blockage ()); 4468 emit_insn (TARGET_64BIT 4469 ? gen_subdi3 (stack_pointer_rtx, stack_pointer_rtx, sa) 4470 : gen_subsi3 (stack_pointer_rtx, stack_pointer_rtx, sa)); 4471 } 4472} 4473 4474bool 4475pa_can_use_return_insn (void) 4476{ 4477 if (!reload_completed) 4478 return false; 4479 4480 if (frame_pointer_needed) 4481 return false; 4482 4483 if (df_regs_ever_live_p (2)) 4484 return false; 4485 4486 if (crtl->profile) 4487 return false; 4488 4489 return pa_compute_frame_size (get_frame_size (), 0) == 0; 4490} 4491 4492rtx 4493hppa_pic_save_rtx (void) 4494{ 4495 return get_hard_reg_initial_val (word_mode, PIC_OFFSET_TABLE_REGNUM); 4496} 4497 4498#ifndef NO_DEFERRED_PROFILE_COUNTERS 4499#define NO_DEFERRED_PROFILE_COUNTERS 0 4500#endif 4501 4502 4503/* Vector of funcdef numbers. */ 4504static vec<int> funcdef_nos; 4505 4506/* Output deferred profile counters. */ 4507static void 4508output_deferred_profile_counters (void) 4509{ 4510 unsigned int i; 4511 int align, n; 4512 4513 if (funcdef_nos.is_empty ()) 4514 return; 4515 4516 switch_to_section (data_section); 4517 align = MIN (BIGGEST_ALIGNMENT, LONG_TYPE_SIZE); 4518 ASM_OUTPUT_ALIGN (asm_out_file, floor_log2 (align / BITS_PER_UNIT)); 4519 4520 for (i = 0; funcdef_nos.iterate (i, &n); i++) 4521 { 4522 targetm.asm_out.internal_label (asm_out_file, "LP", n); 4523 assemble_integer (const0_rtx, LONG_TYPE_SIZE / BITS_PER_UNIT, align, 1); 4524 } 4525 4526 funcdef_nos.release (); 4527} 4528 4529void 4530hppa_profile_hook (int label_no) 4531{ 4532 /* We use SImode for the address of the function in both 32 and 4533 64-bit code to avoid having to provide DImode versions of the 4534 lcla2 and load_offset_label_address insn patterns. */ 4535 rtx reg = gen_reg_rtx (SImode); 4536 rtx_code_label *label_rtx = gen_label_rtx (); 4537 rtx begin_label_rtx; 4538 rtx_insn *call_insn; 4539 char begin_label_name[16]; 4540 4541 ASM_GENERATE_INTERNAL_LABEL (begin_label_name, FUNC_BEGIN_PROLOG_LABEL, 4542 label_no); 4543 begin_label_rtx = gen_rtx_SYMBOL_REF (SImode, ggc_strdup (begin_label_name)); 4544 4545 if (TARGET_64BIT) 4546 emit_move_insn (arg_pointer_rtx, 4547 gen_rtx_PLUS (word_mode, virtual_outgoing_args_rtx, 4548 GEN_INT (64))); 4549 4550 emit_move_insn (gen_rtx_REG (word_mode, 26), gen_rtx_REG (word_mode, 2)); 4551 4552 /* The address of the function is loaded into %r25 with an instruction- 4553 relative sequence that avoids the use of relocations. The sequence 4554 is split so that the load_offset_label_address instruction can 4555 occupy the delay slot of the call to _mcount. */ 4556 if (TARGET_PA_20) 4557 emit_insn (gen_lcla2 (reg, label_rtx)); 4558 else 4559 emit_insn (gen_lcla1 (reg, label_rtx)); 4560 4561 emit_insn (gen_load_offset_label_address (gen_rtx_REG (SImode, 25), 4562 reg, begin_label_rtx, label_rtx)); 4563 4564#if !NO_DEFERRED_PROFILE_COUNTERS 4565 { 4566 rtx count_label_rtx, addr, r24; 4567 char count_label_name[16]; 4568 4569 funcdef_nos.safe_push (label_no); 4570 ASM_GENERATE_INTERNAL_LABEL (count_label_name, "LP", label_no); 4571 count_label_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (count_label_name)); 4572 4573 addr = force_reg (Pmode, count_label_rtx); 4574 r24 = gen_rtx_REG (Pmode, 24); 4575 emit_move_insn (r24, addr); 4576 4577 call_insn = 4578 emit_call_insn (gen_call (gen_rtx_MEM (Pmode, 4579 gen_rtx_SYMBOL_REF (Pmode, 4580 "_mcount")), 4581 GEN_INT (TARGET_64BIT ? 24 : 12))); 4582 4583 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), r24); 4584 } 4585#else 4586 4587 call_insn = 4588 emit_call_insn (gen_call (gen_rtx_MEM (Pmode, 4589 gen_rtx_SYMBOL_REF (Pmode, 4590 "_mcount")), 4591 GEN_INT (TARGET_64BIT ? 16 : 8))); 4592 4593#endif 4594 4595 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 25)); 4596 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 26)); 4597 4598 /* Indicate the _mcount call cannot throw, nor will it execute a 4599 non-local goto. */ 4600 make_reg_eh_region_note_nothrow_nononlocal (call_insn); 4601} 4602 4603/* Fetch the return address for the frame COUNT steps up from 4604 the current frame, after the prologue. FRAMEADDR is the 4605 frame pointer of the COUNT frame. 4606 4607 We want to ignore any export stub remnants here. To handle this, 4608 we examine the code at the return address, and if it is an export 4609 stub, we return a memory rtx for the stub return address stored 4610 at frame-24. 4611 4612 The value returned is used in two different ways: 4613 4614 1. To find a function's caller. 4615 4616 2. To change the return address for a function. 4617 4618 This function handles most instances of case 1; however, it will 4619 fail if there are two levels of stubs to execute on the return 4620 path. The only way I believe that can happen is if the return value 4621 needs a parameter relocation, which never happens for C code. 4622 4623 This function handles most instances of case 2; however, it will 4624 fail if we did not originally have stub code on the return path 4625 but will need stub code on the new return path. This can happen if 4626 the caller & callee are both in the main program, but the new 4627 return location is in a shared library. */ 4628 4629rtx 4630pa_return_addr_rtx (int count, rtx frameaddr) 4631{ 4632 rtx label; 4633 rtx rp; 4634 rtx saved_rp; 4635 rtx ins; 4636 4637 /* The instruction stream at the return address of a PA1.X export stub is: 4638 4639 0x4bc23fd1 | stub+8: ldw -18(sr0,sp),rp 4640 0x004010a1 | stub+12: ldsid (sr0,rp),r1 4641 0x00011820 | stub+16: mtsp r1,sr0 4642 0xe0400002 | stub+20: be,n 0(sr0,rp) 4643 4644 0xe0400002 must be specified as -532676606 so that it won't be 4645 rejected as an invalid immediate operand on 64-bit hosts. 4646 4647 The instruction stream at the return address of a PA2.0 export stub is: 4648 4649 0x4bc23fd1 | stub+8: ldw -18(sr0,sp),rp 4650 0xe840d002 | stub+12: bve,n (rp) 4651 */ 4652 4653 HOST_WIDE_INT insns[4]; 4654 int i, len; 4655 4656 if (count != 0) 4657 return NULL_RTX; 4658 4659 rp = get_hard_reg_initial_val (Pmode, 2); 4660 4661 if (TARGET_64BIT || TARGET_NO_SPACE_REGS) 4662 return rp; 4663 4664 /* If there is no export stub then just use the value saved from 4665 the return pointer register. */ 4666 4667 saved_rp = gen_reg_rtx (Pmode); 4668 emit_move_insn (saved_rp, rp); 4669 4670 /* Get pointer to the instruction stream. We have to mask out the 4671 privilege level from the two low order bits of the return address 4672 pointer here so that ins will point to the start of the first 4673 instruction that would have been executed if we returned. */ 4674 ins = copy_to_reg (gen_rtx_AND (Pmode, rp, MASK_RETURN_ADDR)); 4675 label = gen_label_rtx (); 4676 4677 if (TARGET_PA_20) 4678 { 4679 insns[0] = 0x4bc23fd1; 4680 insns[1] = -398405630; 4681 len = 2; 4682 } 4683 else 4684 { 4685 insns[0] = 0x4bc23fd1; 4686 insns[1] = 0x004010a1; 4687 insns[2] = 0x00011820; 4688 insns[3] = -532676606; 4689 len = 4; 4690 } 4691 4692 /* Check the instruction stream at the normal return address for the 4693 export stub. If it is an export stub, than our return address is 4694 really in -24[frameaddr]. */ 4695 4696 for (i = 0; i < len; i++) 4697 { 4698 rtx op0 = gen_rtx_MEM (SImode, plus_constant (Pmode, ins, i * 4)); 4699 rtx op1 = GEN_INT (insns[i]); 4700 emit_cmp_and_jump_insns (op0, op1, NE, NULL, SImode, 0, label); 4701 } 4702 4703 /* Here we know that our return address points to an export 4704 stub. We don't want to return the address of the export stub, 4705 but rather the return address of the export stub. That return 4706 address is stored at -24[frameaddr]. */ 4707 4708 emit_move_insn (saved_rp, 4709 gen_rtx_MEM (Pmode, 4710 memory_address (Pmode, 4711 plus_constant (Pmode, frameaddr, 4712 -24)))); 4713 4714 emit_label (label); 4715 4716 return saved_rp; 4717} 4718 4719void 4720pa_emit_bcond_fp (rtx operands[]) 4721{ 4722 enum rtx_code code = GET_CODE (operands[0]); 4723 rtx operand0 = operands[1]; 4724 rtx operand1 = operands[2]; 4725 rtx label = operands[3]; 4726 4727 emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_REG (CCFPmode, 0), 4728 gen_rtx_fmt_ee (code, CCFPmode, operand0, operand1))); 4729 4730 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, 4731 gen_rtx_IF_THEN_ELSE (VOIDmode, 4732 gen_rtx_fmt_ee (NE, 4733 VOIDmode, 4734 gen_rtx_REG (CCFPmode, 0), 4735 const0_rtx), 4736 gen_rtx_LABEL_REF (VOIDmode, label), 4737 pc_rtx))); 4738 4739} 4740 4741/* Adjust the cost of a scheduling dependency. Return the new cost of 4742 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */ 4743 4744static int 4745pa_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn, int cost) 4746{ 4747 enum attr_type attr_type; 4748 4749 /* Don't adjust costs for a pa8000 chip, also do not adjust any 4750 true dependencies as they are described with bypasses now. */ 4751 if (pa_cpu >= PROCESSOR_8000 || REG_NOTE_KIND (link) == 0) 4752 return cost; 4753 4754 if (! recog_memoized (insn)) 4755 return 0; 4756 4757 attr_type = get_attr_type (insn); 4758 4759 switch (REG_NOTE_KIND (link)) 4760 { 4761 case REG_DEP_ANTI: 4762 /* Anti dependency; DEP_INSN reads a register that INSN writes some 4763 cycles later. */ 4764 4765 if (attr_type == TYPE_FPLOAD) 4766 { 4767 rtx pat = PATTERN (insn); 4768 rtx dep_pat = PATTERN (dep_insn); 4769 if (GET_CODE (pat) == PARALLEL) 4770 { 4771 /* This happens for the fldXs,mb patterns. */ 4772 pat = XVECEXP (pat, 0, 0); 4773 } 4774 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET) 4775 /* If this happens, we have to extend this to schedule 4776 optimally. Return 0 for now. */ 4777 return 0; 4778 4779 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat))) 4780 { 4781 if (! recog_memoized (dep_insn)) 4782 return 0; 4783 switch (get_attr_type (dep_insn)) 4784 { 4785 case TYPE_FPALU: 4786 case TYPE_FPMULSGL: 4787 case TYPE_FPMULDBL: 4788 case TYPE_FPDIVSGL: 4789 case TYPE_FPDIVDBL: 4790 case TYPE_FPSQRTSGL: 4791 case TYPE_FPSQRTDBL: 4792 /* A fpload can't be issued until one cycle before a 4793 preceding arithmetic operation has finished if 4794 the target of the fpload is any of the sources 4795 (or destination) of the arithmetic operation. */ 4796 return insn_default_latency (dep_insn) - 1; 4797 4798 default: 4799 return 0; 4800 } 4801 } 4802 } 4803 else if (attr_type == TYPE_FPALU) 4804 { 4805 rtx pat = PATTERN (insn); 4806 rtx dep_pat = PATTERN (dep_insn); 4807 if (GET_CODE (pat) == PARALLEL) 4808 { 4809 /* This happens for the fldXs,mb patterns. */ 4810 pat = XVECEXP (pat, 0, 0); 4811 } 4812 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET) 4813 /* If this happens, we have to extend this to schedule 4814 optimally. Return 0 for now. */ 4815 return 0; 4816 4817 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat))) 4818 { 4819 if (! recog_memoized (dep_insn)) 4820 return 0; 4821 switch (get_attr_type (dep_insn)) 4822 { 4823 case TYPE_FPDIVSGL: 4824 case TYPE_FPDIVDBL: 4825 case TYPE_FPSQRTSGL: 4826 case TYPE_FPSQRTDBL: 4827 /* An ALU flop can't be issued until two cycles before a 4828 preceding divide or sqrt operation has finished if 4829 the target of the ALU flop is any of the sources 4830 (or destination) of the divide or sqrt operation. */ 4831 return insn_default_latency (dep_insn) - 2; 4832 4833 default: 4834 return 0; 4835 } 4836 } 4837 } 4838 4839 /* For other anti dependencies, the cost is 0. */ 4840 return 0; 4841 4842 case REG_DEP_OUTPUT: 4843 /* Output dependency; DEP_INSN writes a register that INSN writes some 4844 cycles later. */ 4845 if (attr_type == TYPE_FPLOAD) 4846 { 4847 rtx pat = PATTERN (insn); 4848 rtx dep_pat = PATTERN (dep_insn); 4849 if (GET_CODE (pat) == PARALLEL) 4850 { 4851 /* This happens for the fldXs,mb patterns. */ 4852 pat = XVECEXP (pat, 0, 0); 4853 } 4854 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET) 4855 /* If this happens, we have to extend this to schedule 4856 optimally. Return 0 for now. */ 4857 return 0; 4858 4859 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat))) 4860 { 4861 if (! recog_memoized (dep_insn)) 4862 return 0; 4863 switch (get_attr_type (dep_insn)) 4864 { 4865 case TYPE_FPALU: 4866 case TYPE_FPMULSGL: 4867 case TYPE_FPMULDBL: 4868 case TYPE_FPDIVSGL: 4869 case TYPE_FPDIVDBL: 4870 case TYPE_FPSQRTSGL: 4871 case TYPE_FPSQRTDBL: 4872 /* A fpload can't be issued until one cycle before a 4873 preceding arithmetic operation has finished if 4874 the target of the fpload is the destination of the 4875 arithmetic operation. 4876 4877 Exception: For PA7100LC, PA7200 and PA7300, the cost 4878 is 3 cycles, unless they bundle together. We also 4879 pay the penalty if the second insn is a fpload. */ 4880 return insn_default_latency (dep_insn) - 1; 4881 4882 default: 4883 return 0; 4884 } 4885 } 4886 } 4887 else if (attr_type == TYPE_FPALU) 4888 { 4889 rtx pat = PATTERN (insn); 4890 rtx dep_pat = PATTERN (dep_insn); 4891 if (GET_CODE (pat) == PARALLEL) 4892 { 4893 /* This happens for the fldXs,mb patterns. */ 4894 pat = XVECEXP (pat, 0, 0); 4895 } 4896 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET) 4897 /* If this happens, we have to extend this to schedule 4898 optimally. Return 0 for now. */ 4899 return 0; 4900 4901 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat))) 4902 { 4903 if (! recog_memoized (dep_insn)) 4904 return 0; 4905 switch (get_attr_type (dep_insn)) 4906 { 4907 case TYPE_FPDIVSGL: 4908 case TYPE_FPDIVDBL: 4909 case TYPE_FPSQRTSGL: 4910 case TYPE_FPSQRTDBL: 4911 /* An ALU flop can't be issued until two cycles before a 4912 preceding divide or sqrt operation has finished if 4913 the target of the ALU flop is also the target of 4914 the divide or sqrt operation. */ 4915 return insn_default_latency (dep_insn) - 2; 4916 4917 default: 4918 return 0; 4919 } 4920 } 4921 } 4922 4923 /* For other output dependencies, the cost is 0. */ 4924 return 0; 4925 4926 default: 4927 gcc_unreachable (); 4928 } 4929} 4930 4931/* Adjust scheduling priorities. We use this to try and keep addil 4932 and the next use of %r1 close together. */ 4933static int 4934pa_adjust_priority (rtx_insn *insn, int priority) 4935{ 4936 rtx set = single_set (insn); 4937 rtx src, dest; 4938 if (set) 4939 { 4940 src = SET_SRC (set); 4941 dest = SET_DEST (set); 4942 if (GET_CODE (src) == LO_SUM 4943 && symbolic_operand (XEXP (src, 1), VOIDmode) 4944 && ! read_only_operand (XEXP (src, 1), VOIDmode)) 4945 priority >>= 3; 4946 4947 else if (GET_CODE (src) == MEM 4948 && GET_CODE (XEXP (src, 0)) == LO_SUM 4949 && symbolic_operand (XEXP (XEXP (src, 0), 1), VOIDmode) 4950 && ! read_only_operand (XEXP (XEXP (src, 0), 1), VOIDmode)) 4951 priority >>= 1; 4952 4953 else if (GET_CODE (dest) == MEM 4954 && GET_CODE (XEXP (dest, 0)) == LO_SUM 4955 && symbolic_operand (XEXP (XEXP (dest, 0), 1), VOIDmode) 4956 && ! read_only_operand (XEXP (XEXP (dest, 0), 1), VOIDmode)) 4957 priority >>= 3; 4958 } 4959 return priority; 4960} 4961 4962/* The 700 can only issue a single insn at a time. 4963 The 7XXX processors can issue two insns at a time. 4964 The 8000 can issue 4 insns at a time. */ 4965static int 4966pa_issue_rate (void) 4967{ 4968 switch (pa_cpu) 4969 { 4970 case PROCESSOR_700: return 1; 4971 case PROCESSOR_7100: return 2; 4972 case PROCESSOR_7100LC: return 2; 4973 case PROCESSOR_7200: return 2; 4974 case PROCESSOR_7300: return 2; 4975 case PROCESSOR_8000: return 4; 4976 4977 default: 4978 gcc_unreachable (); 4979 } 4980} 4981 4982 4983 4984/* Return any length plus adjustment needed by INSN which already has 4985 its length computed as LENGTH. Return LENGTH if no adjustment is 4986 necessary. 4987 4988 Also compute the length of an inline block move here as it is too 4989 complicated to express as a length attribute in pa.md. */ 4990int 4991pa_adjust_insn_length (rtx_insn *insn, int length) 4992{ 4993 rtx pat = PATTERN (insn); 4994 4995 /* If length is negative or undefined, provide initial length. */ 4996 if ((unsigned int) length >= INT_MAX) 4997 { 4998 if (GET_CODE (pat) == SEQUENCE) 4999 insn = as_a <rtx_insn *> (XVECEXP (pat, 0, 0)); 5000 5001 switch (get_attr_type (insn)) 5002 { 5003 case TYPE_MILLI: 5004 length = pa_attr_length_millicode_call (insn); 5005 break; 5006 case TYPE_CALL: 5007 length = pa_attr_length_call (insn, 0); 5008 break; 5009 case TYPE_SIBCALL: 5010 length = pa_attr_length_call (insn, 1); 5011 break; 5012 case TYPE_DYNCALL: 5013 length = pa_attr_length_indirect_call (insn); 5014 break; 5015 case TYPE_SH_FUNC_ADRS: 5016 length = pa_attr_length_millicode_call (insn) + 20; 5017 break; 5018 default: 5019 gcc_unreachable (); 5020 } 5021 } 5022 5023 /* Block move pattern. */ 5024 if (NONJUMP_INSN_P (insn) 5025 && GET_CODE (pat) == PARALLEL 5026 && GET_CODE (XVECEXP (pat, 0, 0)) == SET 5027 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM 5028 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 1)) == MEM 5029 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode 5030 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 1)) == BLKmode) 5031 length += compute_movmem_length (insn) - 4; 5032 /* Block clear pattern. */ 5033 else if (NONJUMP_INSN_P (insn) 5034 && GET_CODE (pat) == PARALLEL 5035 && GET_CODE (XVECEXP (pat, 0, 0)) == SET 5036 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM 5037 && XEXP (XVECEXP (pat, 0, 0), 1) == const0_rtx 5038 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode) 5039 length += compute_clrmem_length (insn) - 4; 5040 /* Conditional branch with an unfilled delay slot. */ 5041 else if (JUMP_P (insn) && ! simplejump_p (insn)) 5042 { 5043 /* Adjust a short backwards conditional with an unfilled delay slot. */ 5044 if (GET_CODE (pat) == SET 5045 && length == 4 5046 && JUMP_LABEL (insn) != NULL_RTX 5047 && ! forward_branch_p (insn)) 5048 length += 4; 5049 else if (GET_CODE (pat) == PARALLEL 5050 && get_attr_type (insn) == TYPE_PARALLEL_BRANCH 5051 && length == 4) 5052 length += 4; 5053 /* Adjust dbra insn with short backwards conditional branch with 5054 unfilled delay slot -- only for case where counter is in a 5055 general register register. */ 5056 else if (GET_CODE (pat) == PARALLEL 5057 && GET_CODE (XVECEXP (pat, 0, 1)) == SET 5058 && GET_CODE (XEXP (XVECEXP (pat, 0, 1), 0)) == REG 5059 && ! FP_REG_P (XEXP (XVECEXP (pat, 0, 1), 0)) 5060 && length == 4 5061 && ! forward_branch_p (insn)) 5062 length += 4; 5063 } 5064 return length; 5065} 5066 5067/* Implement the TARGET_PRINT_OPERAND_PUNCT_VALID_P hook. */ 5068 5069static bool 5070pa_print_operand_punct_valid_p (unsigned char code) 5071{ 5072 if (code == '@' 5073 || code == '#' 5074 || code == '*' 5075 || code == '^') 5076 return true; 5077 5078 return false; 5079} 5080 5081/* Print operand X (an rtx) in assembler syntax to file FILE. 5082 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified. 5083 For `%' followed by punctuation, CODE is the punctuation and X is null. */ 5084 5085void 5086pa_print_operand (FILE *file, rtx x, int code) 5087{ 5088 switch (code) 5089 { 5090 case '#': 5091 /* Output a 'nop' if there's nothing for the delay slot. */ 5092 if (dbr_sequence_length () == 0) 5093 fputs ("\n\tnop", file); 5094 return; 5095 case '*': 5096 /* Output a nullification completer if there's nothing for the */ 5097 /* delay slot or nullification is requested. */ 5098 if (dbr_sequence_length () == 0 || 5099 (final_sequence && 5100 INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0)))) 5101 fputs (",n", file); 5102 return; 5103 case 'R': 5104 /* Print out the second register name of a register pair. 5105 I.e., R (6) => 7. */ 5106 fputs (reg_names[REGNO (x) + 1], file); 5107 return; 5108 case 'r': 5109 /* A register or zero. */ 5110 if (x == const0_rtx 5111 || (x == CONST0_RTX (DFmode)) 5112 || (x == CONST0_RTX (SFmode))) 5113 { 5114 fputs ("%r0", file); 5115 return; 5116 } 5117 else 5118 break; 5119 case 'f': 5120 /* A register or zero (floating point). */ 5121 if (x == const0_rtx 5122 || (x == CONST0_RTX (DFmode)) 5123 || (x == CONST0_RTX (SFmode))) 5124 { 5125 fputs ("%fr0", file); 5126 return; 5127 } 5128 else 5129 break; 5130 case 'A': 5131 { 5132 rtx xoperands[2]; 5133 5134 xoperands[0] = XEXP (XEXP (x, 0), 0); 5135 xoperands[1] = XVECEXP (XEXP (XEXP (x, 0), 1), 0, 0); 5136 pa_output_global_address (file, xoperands[1], 0); 5137 fprintf (file, "(%s)", reg_names [REGNO (xoperands[0])]); 5138 return; 5139 } 5140 5141 case 'C': /* Plain (C)ondition */ 5142 case 'X': 5143 switch (GET_CODE (x)) 5144 { 5145 case EQ: 5146 fputs ("=", file); break; 5147 case NE: 5148 fputs ("<>", file); break; 5149 case GT: 5150 fputs (">", file); break; 5151 case GE: 5152 fputs (">=", file); break; 5153 case GEU: 5154 fputs (">>=", file); break; 5155 case GTU: 5156 fputs (">>", file); break; 5157 case LT: 5158 fputs ("<", file); break; 5159 case LE: 5160 fputs ("<=", file); break; 5161 case LEU: 5162 fputs ("<<=", file); break; 5163 case LTU: 5164 fputs ("<<", file); break; 5165 default: 5166 gcc_unreachable (); 5167 } 5168 return; 5169 case 'N': /* Condition, (N)egated */ 5170 switch (GET_CODE (x)) 5171 { 5172 case EQ: 5173 fputs ("<>", file); break; 5174 case NE: 5175 fputs ("=", file); break; 5176 case GT: 5177 fputs ("<=", file); break; 5178 case GE: 5179 fputs ("<", file); break; 5180 case GEU: 5181 fputs ("<<", file); break; 5182 case GTU: 5183 fputs ("<<=", file); break; 5184 case LT: 5185 fputs (">=", file); break; 5186 case LE: 5187 fputs (">", file); break; 5188 case LEU: 5189 fputs (">>", file); break; 5190 case LTU: 5191 fputs (">>=", file); break; 5192 default: 5193 gcc_unreachable (); 5194 } 5195 return; 5196 /* For floating point comparisons. Note that the output 5197 predicates are the complement of the desired mode. The 5198 conditions for GT, GE, LT, LE and LTGT cause an invalid 5199 operation exception if the result is unordered and this 5200 exception is enabled in the floating-point status register. */ 5201 case 'Y': 5202 switch (GET_CODE (x)) 5203 { 5204 case EQ: 5205 fputs ("!=", file); break; 5206 case NE: 5207 fputs ("=", file); break; 5208 case GT: 5209 fputs ("!>", file); break; 5210 case GE: 5211 fputs ("!>=", file); break; 5212 case LT: 5213 fputs ("!<", file); break; 5214 case LE: 5215 fputs ("!<=", file); break; 5216 case LTGT: 5217 fputs ("!<>", file); break; 5218 case UNLE: 5219 fputs ("!?<=", file); break; 5220 case UNLT: 5221 fputs ("!?<", file); break; 5222 case UNGE: 5223 fputs ("!?>=", file); break; 5224 case UNGT: 5225 fputs ("!?>", file); break; 5226 case UNEQ: 5227 fputs ("!?=", file); break; 5228 case UNORDERED: 5229 fputs ("!?", file); break; 5230 case ORDERED: 5231 fputs ("?", file); break; 5232 default: 5233 gcc_unreachable (); 5234 } 5235 return; 5236 case 'S': /* Condition, operands are (S)wapped. */ 5237 switch (GET_CODE (x)) 5238 { 5239 case EQ: 5240 fputs ("=", file); break; 5241 case NE: 5242 fputs ("<>", file); break; 5243 case GT: 5244 fputs ("<", file); break; 5245 case GE: 5246 fputs ("<=", file); break; 5247 case GEU: 5248 fputs ("<<=", file); break; 5249 case GTU: 5250 fputs ("<<", file); break; 5251 case LT: 5252 fputs (">", file); break; 5253 case LE: 5254 fputs (">=", file); break; 5255 case LEU: 5256 fputs (">>=", file); break; 5257 case LTU: 5258 fputs (">>", file); break; 5259 default: 5260 gcc_unreachable (); 5261 } 5262 return; 5263 case 'B': /* Condition, (B)oth swapped and negate. */ 5264 switch (GET_CODE (x)) 5265 { 5266 case EQ: 5267 fputs ("<>", file); break; 5268 case NE: 5269 fputs ("=", file); break; 5270 case GT: 5271 fputs (">=", file); break; 5272 case GE: 5273 fputs (">", file); break; 5274 case GEU: 5275 fputs (">>", file); break; 5276 case GTU: 5277 fputs (">>=", file); break; 5278 case LT: 5279 fputs ("<=", file); break; 5280 case LE: 5281 fputs ("<", file); break; 5282 case LEU: 5283 fputs ("<<", file); break; 5284 case LTU: 5285 fputs ("<<=", file); break; 5286 default: 5287 gcc_unreachable (); 5288 } 5289 return; 5290 case 'k': 5291 gcc_assert (GET_CODE (x) == CONST_INT); 5292 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~INTVAL (x)); 5293 return; 5294 case 'Q': 5295 gcc_assert (GET_CODE (x) == CONST_INT); 5296 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - (INTVAL (x) & 63)); 5297 return; 5298 case 'L': 5299 gcc_assert (GET_CODE (x) == CONST_INT); 5300 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - (INTVAL (x) & 31)); 5301 return; 5302 case 'O': 5303 gcc_assert (GET_CODE (x) == CONST_INT && exact_log2 (INTVAL (x)) >= 0); 5304 fprintf (file, "%d", exact_log2 (INTVAL (x))); 5305 return; 5306 case 'p': 5307 gcc_assert (GET_CODE (x) == CONST_INT); 5308 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 63 - (INTVAL (x) & 63)); 5309 return; 5310 case 'P': 5311 gcc_assert (GET_CODE (x) == CONST_INT); 5312 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 31 - (INTVAL (x) & 31)); 5313 return; 5314 case 'I': 5315 if (GET_CODE (x) == CONST_INT) 5316 fputs ("i", file); 5317 return; 5318 case 'M': 5319 case 'F': 5320 switch (GET_CODE (XEXP (x, 0))) 5321 { 5322 case PRE_DEC: 5323 case PRE_INC: 5324 if (ASSEMBLER_DIALECT == 0) 5325 fputs ("s,mb", file); 5326 else 5327 fputs (",mb", file); 5328 break; 5329 case POST_DEC: 5330 case POST_INC: 5331 if (ASSEMBLER_DIALECT == 0) 5332 fputs ("s,ma", file); 5333 else 5334 fputs (",ma", file); 5335 break; 5336 case PLUS: 5337 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG 5338 && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG) 5339 { 5340 if (ASSEMBLER_DIALECT == 0) 5341 fputs ("x", file); 5342 } 5343 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT 5344 || GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT) 5345 { 5346 if (ASSEMBLER_DIALECT == 0) 5347 fputs ("x,s", file); 5348 else 5349 fputs (",s", file); 5350 } 5351 else if (code == 'F' && ASSEMBLER_DIALECT == 0) 5352 fputs ("s", file); 5353 break; 5354 default: 5355 if (code == 'F' && ASSEMBLER_DIALECT == 0) 5356 fputs ("s", file); 5357 break; 5358 } 5359 return; 5360 case 'G': 5361 pa_output_global_address (file, x, 0); 5362 return; 5363 case 'H': 5364 pa_output_global_address (file, x, 1); 5365 return; 5366 case 0: /* Don't do anything special */ 5367 break; 5368 case 'Z': 5369 { 5370 unsigned op[3]; 5371 compute_zdepwi_operands (INTVAL (x), op); 5372 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]); 5373 return; 5374 } 5375 case 'z': 5376 { 5377 unsigned op[3]; 5378 compute_zdepdi_operands (INTVAL (x), op); 5379 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]); 5380 return; 5381 } 5382 case 'c': 5383 /* We can get here from a .vtable_inherit due to our 5384 CONSTANT_ADDRESS_P rejecting perfectly good constant 5385 addresses. */ 5386 break; 5387 default: 5388 gcc_unreachable (); 5389 } 5390 if (GET_CODE (x) == REG) 5391 { 5392 fputs (reg_names [REGNO (x)], file); 5393 if (TARGET_64BIT && FP_REG_P (x) && GET_MODE_SIZE (GET_MODE (x)) <= 4) 5394 { 5395 fputs ("R", file); 5396 return; 5397 } 5398 if (FP_REG_P (x) 5399 && GET_MODE_SIZE (GET_MODE (x)) <= 4 5400 && (REGNO (x) & 1) == 0) 5401 fputs ("L", file); 5402 } 5403 else if (GET_CODE (x) == MEM) 5404 { 5405 int size = GET_MODE_SIZE (GET_MODE (x)); 5406 rtx base = NULL_RTX; 5407 switch (GET_CODE (XEXP (x, 0))) 5408 { 5409 case PRE_DEC: 5410 case POST_DEC: 5411 base = XEXP (XEXP (x, 0), 0); 5412 fprintf (file, "-%d(%s)", size, reg_names [REGNO (base)]); 5413 break; 5414 case PRE_INC: 5415 case POST_INC: 5416 base = XEXP (XEXP (x, 0), 0); 5417 fprintf (file, "%d(%s)", size, reg_names [REGNO (base)]); 5418 break; 5419 case PLUS: 5420 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT) 5421 fprintf (file, "%s(%s)", 5422 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 0), 0))], 5423 reg_names [REGNO (XEXP (XEXP (x, 0), 1))]); 5424 else if (GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT) 5425 fprintf (file, "%s(%s)", 5426 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 1), 0))], 5427 reg_names [REGNO (XEXP (XEXP (x, 0), 0))]); 5428 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG 5429 && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG) 5430 { 5431 /* Because the REG_POINTER flag can get lost during reload, 5432 pa_legitimate_address_p canonicalizes the order of the 5433 index and base registers in the combined move patterns. */ 5434 rtx base = XEXP (XEXP (x, 0), 1); 5435 rtx index = XEXP (XEXP (x, 0), 0); 5436 5437 fprintf (file, "%s(%s)", 5438 reg_names [REGNO (index)], reg_names [REGNO (base)]); 5439 } 5440 else 5441 output_address (XEXP (x, 0)); 5442 break; 5443 default: 5444 output_address (XEXP (x, 0)); 5445 break; 5446 } 5447 } 5448 else 5449 output_addr_const (file, x); 5450} 5451 5452/* output a SYMBOL_REF or a CONST expression involving a SYMBOL_REF. */ 5453 5454void 5455pa_output_global_address (FILE *file, rtx x, int round_constant) 5456{ 5457 5458 /* Imagine (high (const (plus ...))). */ 5459 if (GET_CODE (x) == HIGH) 5460 x = XEXP (x, 0); 5461 5462 if (GET_CODE (x) == SYMBOL_REF && read_only_operand (x, VOIDmode)) 5463 output_addr_const (file, x); 5464 else if (GET_CODE (x) == SYMBOL_REF && !flag_pic) 5465 { 5466 output_addr_const (file, x); 5467 fputs ("-$global$", file); 5468 } 5469 else if (GET_CODE (x) == CONST) 5470 { 5471 const char *sep = ""; 5472 int offset = 0; /* assembler wants -$global$ at end */ 5473 rtx base = NULL_RTX; 5474 5475 switch (GET_CODE (XEXP (XEXP (x, 0), 0))) 5476 { 5477 case LABEL_REF: 5478 case SYMBOL_REF: 5479 base = XEXP (XEXP (x, 0), 0); 5480 output_addr_const (file, base); 5481 break; 5482 case CONST_INT: 5483 offset = INTVAL (XEXP (XEXP (x, 0), 0)); 5484 break; 5485 default: 5486 gcc_unreachable (); 5487 } 5488 5489 switch (GET_CODE (XEXP (XEXP (x, 0), 1))) 5490 { 5491 case LABEL_REF: 5492 case SYMBOL_REF: 5493 base = XEXP (XEXP (x, 0), 1); 5494 output_addr_const (file, base); 5495 break; 5496 case CONST_INT: 5497 offset = INTVAL (XEXP (XEXP (x, 0), 1)); 5498 break; 5499 default: 5500 gcc_unreachable (); 5501 } 5502 5503 /* How bogus. The compiler is apparently responsible for 5504 rounding the constant if it uses an LR field selector. 5505 5506 The linker and/or assembler seem a better place since 5507 they have to do this kind of thing already. 5508 5509 If we fail to do this, HP's optimizing linker may eliminate 5510 an addil, but not update the ldw/stw/ldo instruction that 5511 uses the result of the addil. */ 5512 if (round_constant) 5513 offset = ((offset + 0x1000) & ~0x1fff); 5514 5515 switch (GET_CODE (XEXP (x, 0))) 5516 { 5517 case PLUS: 5518 if (offset < 0) 5519 { 5520 offset = -offset; 5521 sep = "-"; 5522 } 5523 else 5524 sep = "+"; 5525 break; 5526 5527 case MINUS: 5528 gcc_assert (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF); 5529 sep = "-"; 5530 break; 5531 5532 default: 5533 gcc_unreachable (); 5534 } 5535 5536 if (!read_only_operand (base, VOIDmode) && !flag_pic) 5537 fputs ("-$global$", file); 5538 if (offset) 5539 fprintf (file, "%s%d", sep, offset); 5540 } 5541 else 5542 output_addr_const (file, x); 5543} 5544 5545/* Output boilerplate text to appear at the beginning of the file. 5546 There are several possible versions. */ 5547#define aputs(x) fputs(x, asm_out_file) 5548static inline void 5549pa_file_start_level (void) 5550{ 5551 if (TARGET_64BIT) 5552 aputs ("\t.LEVEL 2.0w\n"); 5553 else if (TARGET_PA_20) 5554 aputs ("\t.LEVEL 2.0\n"); 5555 else if (TARGET_PA_11) 5556 aputs ("\t.LEVEL 1.1\n"); 5557 else 5558 aputs ("\t.LEVEL 1.0\n"); 5559} 5560 5561static inline void 5562pa_file_start_space (int sortspace) 5563{ 5564 aputs ("\t.SPACE $PRIVATE$"); 5565 if (sortspace) 5566 aputs (",SORT=16"); 5567 aputs ("\n\t.SUBSPA $DATA$,QUAD=1,ALIGN=8,ACCESS=31"); 5568 if (flag_tm) 5569 aputs ("\n\t.SUBSPA $TM_CLONE_TABLE$,QUAD=1,ALIGN=8,ACCESS=31"); 5570 aputs ("\n\t.SUBSPA $BSS$,QUAD=1,ALIGN=8,ACCESS=31,ZERO,SORT=82" 5571 "\n\t.SPACE $TEXT$"); 5572 if (sortspace) 5573 aputs (",SORT=8"); 5574 aputs ("\n\t.SUBSPA $LIT$,QUAD=0,ALIGN=8,ACCESS=44" 5575 "\n\t.SUBSPA $CODE$,QUAD=0,ALIGN=8,ACCESS=44,CODE_ONLY\n"); 5576} 5577 5578static inline void 5579pa_file_start_file (int want_version) 5580{ 5581 if (write_symbols != NO_DEBUG) 5582 { 5583 output_file_directive (asm_out_file, main_input_filename); 5584 if (want_version) 5585 aputs ("\t.version\t\"01.01\"\n"); 5586 } 5587} 5588 5589static inline void 5590pa_file_start_mcount (const char *aswhat) 5591{ 5592 if (profile_flag) 5593 fprintf (asm_out_file, "\t.IMPORT _mcount,%s\n", aswhat); 5594} 5595 5596static void 5597pa_elf_file_start (void) 5598{ 5599 pa_file_start_level (); 5600 pa_file_start_mcount ("ENTRY"); 5601 pa_file_start_file (0); 5602} 5603 5604static void 5605pa_som_file_start (void) 5606{ 5607 pa_file_start_level (); 5608 pa_file_start_space (0); 5609 aputs ("\t.IMPORT $global$,DATA\n" 5610 "\t.IMPORT $$dyncall,MILLICODE\n"); 5611 pa_file_start_mcount ("CODE"); 5612 pa_file_start_file (0); 5613} 5614 5615static void 5616pa_linux_file_start (void) 5617{ 5618 pa_file_start_file (1); 5619 pa_file_start_level (); 5620 pa_file_start_mcount ("CODE"); 5621} 5622 5623static void 5624pa_hpux64_gas_file_start (void) 5625{ 5626 pa_file_start_level (); 5627#ifdef ASM_OUTPUT_TYPE_DIRECTIVE 5628 if (profile_flag) 5629 ASM_OUTPUT_TYPE_DIRECTIVE (asm_out_file, "_mcount", "function"); 5630#endif 5631 pa_file_start_file (1); 5632} 5633 5634static void 5635pa_hpux64_hpas_file_start (void) 5636{ 5637 pa_file_start_level (); 5638 pa_file_start_space (1); 5639 pa_file_start_mcount ("CODE"); 5640 pa_file_start_file (0); 5641} 5642#undef aputs 5643 5644/* Search the deferred plabel list for SYMBOL and return its internal 5645 label. If an entry for SYMBOL is not found, a new entry is created. */ 5646 5647rtx 5648pa_get_deferred_plabel (rtx symbol) 5649{ 5650 const char *fname = XSTR (symbol, 0); 5651 size_t i; 5652 5653 /* See if we have already put this function on the list of deferred 5654 plabels. This list is generally small, so a liner search is not 5655 too ugly. If it proves too slow replace it with something faster. */ 5656 for (i = 0; i < n_deferred_plabels; i++) 5657 if (strcmp (fname, XSTR (deferred_plabels[i].symbol, 0)) == 0) 5658 break; 5659 5660 /* If the deferred plabel list is empty, or this entry was not found 5661 on the list, create a new entry on the list. */ 5662 if (deferred_plabels == NULL || i == n_deferred_plabels) 5663 { 5664 tree id; 5665 5666 if (deferred_plabels == 0) 5667 deferred_plabels = ggc_alloc<deferred_plabel> (); 5668 else 5669 deferred_plabels = GGC_RESIZEVEC (struct deferred_plabel, 5670 deferred_plabels, 5671 n_deferred_plabels + 1); 5672 5673 i = n_deferred_plabels++; 5674 deferred_plabels[i].internal_label = gen_label_rtx (); 5675 deferred_plabels[i].symbol = symbol; 5676 5677 /* Gross. We have just implicitly taken the address of this 5678 function. Mark it in the same manner as assemble_name. */ 5679 id = maybe_get_identifier (targetm.strip_name_encoding (fname)); 5680 if (id) 5681 mark_referenced (id); 5682 } 5683 5684 return deferred_plabels[i].internal_label; 5685} 5686 5687static void 5688output_deferred_plabels (void) 5689{ 5690 size_t i; 5691 5692 /* If we have some deferred plabels, then we need to switch into the 5693 data or readonly data section, and align it to a 4 byte boundary 5694 before outputting the deferred plabels. */ 5695 if (n_deferred_plabels) 5696 { 5697 switch_to_section (flag_pic ? data_section : readonly_data_section); 5698 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2); 5699 } 5700 5701 /* Now output the deferred plabels. */ 5702 for (i = 0; i < n_deferred_plabels; i++) 5703 { 5704 targetm.asm_out.internal_label (asm_out_file, "L", 5705 CODE_LABEL_NUMBER (deferred_plabels[i].internal_label)); 5706 assemble_integer (deferred_plabels[i].symbol, 5707 TARGET_64BIT ? 8 : 4, TARGET_64BIT ? 64 : 32, 1); 5708 } 5709} 5710 5711/* Initialize optabs to point to emulation routines. */ 5712 5713static void 5714pa_init_libfuncs (void) 5715{ 5716 if (HPUX_LONG_DOUBLE_LIBRARY) 5717 { 5718 set_optab_libfunc (add_optab, TFmode, "_U_Qfadd"); 5719 set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub"); 5720 set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy"); 5721 set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv"); 5722 set_optab_libfunc (smin_optab, TFmode, "_U_Qmin"); 5723 set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax"); 5724 set_optab_libfunc (sqrt_optab, TFmode, "_U_Qfsqrt"); 5725 set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs"); 5726 set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg"); 5727 5728 set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq"); 5729 set_optab_libfunc (ne_optab, TFmode, "_U_Qfne"); 5730 set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt"); 5731 set_optab_libfunc (ge_optab, TFmode, "_U_Qfge"); 5732 set_optab_libfunc (lt_optab, TFmode, "_U_Qflt"); 5733 set_optab_libfunc (le_optab, TFmode, "_U_Qfle"); 5734 set_optab_libfunc (unord_optab, TFmode, "_U_Qfunord"); 5735 5736 set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad"); 5737 set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad"); 5738 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl"); 5739 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl"); 5740 5741 set_conv_libfunc (sfix_optab, SImode, TFmode, 5742 TARGET_64BIT ? "__U_Qfcnvfxt_quad_to_sgl" 5743 : "_U_Qfcnvfxt_quad_to_sgl"); 5744 set_conv_libfunc (sfix_optab, DImode, TFmode, 5745 "_U_Qfcnvfxt_quad_to_dbl"); 5746 set_conv_libfunc (ufix_optab, SImode, TFmode, 5747 "_U_Qfcnvfxt_quad_to_usgl"); 5748 set_conv_libfunc (ufix_optab, DImode, TFmode, 5749 "_U_Qfcnvfxt_quad_to_udbl"); 5750 5751 set_conv_libfunc (sfloat_optab, TFmode, SImode, 5752 "_U_Qfcnvxf_sgl_to_quad"); 5753 set_conv_libfunc (sfloat_optab, TFmode, DImode, 5754 "_U_Qfcnvxf_dbl_to_quad"); 5755 set_conv_libfunc (ufloat_optab, TFmode, SImode, 5756 "_U_Qfcnvxf_usgl_to_quad"); 5757 set_conv_libfunc (ufloat_optab, TFmode, DImode, 5758 "_U_Qfcnvxf_udbl_to_quad"); 5759 } 5760 5761 if (TARGET_SYNC_LIBCALL) 5762 init_sync_libfuncs (8); 5763} 5764 5765/* HP's millicode routines mean something special to the assembler. 5766 Keep track of which ones we have used. */ 5767 5768enum millicodes { remI, remU, divI, divU, mulI, end1000 }; 5769static void import_milli (enum millicodes); 5770static char imported[(int) end1000]; 5771static const char * const milli_names[] = {"remI", "remU", "divI", "divU", "mulI"}; 5772static const char import_string[] = ".IMPORT $$....,MILLICODE"; 5773#define MILLI_START 10 5774 5775static void 5776import_milli (enum millicodes code) 5777{ 5778 char str[sizeof (import_string)]; 5779 5780 if (!imported[(int) code]) 5781 { 5782 imported[(int) code] = 1; 5783 strcpy (str, import_string); 5784 strncpy (str + MILLI_START, milli_names[(int) code], 4); 5785 output_asm_insn (str, 0); 5786 } 5787} 5788 5789/* The register constraints have put the operands and return value in 5790 the proper registers. */ 5791 5792const char * 5793pa_output_mul_insn (int unsignedp ATTRIBUTE_UNUSED, rtx_insn *insn) 5794{ 5795 import_milli (mulI); 5796 return pa_output_millicode_call (insn, gen_rtx_SYMBOL_REF (Pmode, "$$mulI")); 5797} 5798 5799/* Emit the rtl for doing a division by a constant. */ 5800 5801/* Do magic division millicodes exist for this value? */ 5802const int pa_magic_milli[]= {0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1}; 5803 5804/* We'll use an array to keep track of the magic millicodes and 5805 whether or not we've used them already. [n][0] is signed, [n][1] is 5806 unsigned. */ 5807 5808static int div_milli[16][2]; 5809 5810int 5811pa_emit_hpdiv_const (rtx *operands, int unsignedp) 5812{ 5813 if (GET_CODE (operands[2]) == CONST_INT 5814 && INTVAL (operands[2]) > 0 5815 && INTVAL (operands[2]) < 16 5816 && pa_magic_milli[INTVAL (operands[2])]) 5817 { 5818 rtx ret = gen_rtx_REG (SImode, TARGET_64BIT ? 2 : 31); 5819 5820 emit_move_insn (gen_rtx_REG (SImode, 26), operands[1]); 5821 emit 5822 (gen_rtx_PARALLEL 5823 (VOIDmode, 5824 gen_rtvec (6, gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, 29), 5825 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV, 5826 SImode, 5827 gen_rtx_REG (SImode, 26), 5828 operands[2])), 5829 gen_rtx_CLOBBER (VOIDmode, operands[4]), 5830 gen_rtx_CLOBBER (VOIDmode, operands[3]), 5831 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 26)), 5832 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 25)), 5833 gen_rtx_CLOBBER (VOIDmode, ret)))); 5834 emit_move_insn (operands[0], gen_rtx_REG (SImode, 29)); 5835 return 1; 5836 } 5837 return 0; 5838} 5839 5840const char * 5841pa_output_div_insn (rtx *operands, int unsignedp, rtx_insn *insn) 5842{ 5843 int divisor; 5844 5845 /* If the divisor is a constant, try to use one of the special 5846 opcodes .*/ 5847 if (GET_CODE (operands[0]) == CONST_INT) 5848 { 5849 static char buf[100]; 5850 divisor = INTVAL (operands[0]); 5851 if (!div_milli[divisor][unsignedp]) 5852 { 5853 div_milli[divisor][unsignedp] = 1; 5854 if (unsignedp) 5855 output_asm_insn (".IMPORT $$divU_%0,MILLICODE", operands); 5856 else 5857 output_asm_insn (".IMPORT $$divI_%0,MILLICODE", operands); 5858 } 5859 if (unsignedp) 5860 { 5861 sprintf (buf, "$$divU_" HOST_WIDE_INT_PRINT_DEC, 5862 INTVAL (operands[0])); 5863 return pa_output_millicode_call (insn, 5864 gen_rtx_SYMBOL_REF (SImode, buf)); 5865 } 5866 else 5867 { 5868 sprintf (buf, "$$divI_" HOST_WIDE_INT_PRINT_DEC, 5869 INTVAL (operands[0])); 5870 return pa_output_millicode_call (insn, 5871 gen_rtx_SYMBOL_REF (SImode, buf)); 5872 } 5873 } 5874 /* Divisor isn't a special constant. */ 5875 else 5876 { 5877 if (unsignedp) 5878 { 5879 import_milli (divU); 5880 return pa_output_millicode_call (insn, 5881 gen_rtx_SYMBOL_REF (SImode, "$$divU")); 5882 } 5883 else 5884 { 5885 import_milli (divI); 5886 return pa_output_millicode_call (insn, 5887 gen_rtx_SYMBOL_REF (SImode, "$$divI")); 5888 } 5889 } 5890} 5891 5892/* Output a $$rem millicode to do mod. */ 5893 5894const char * 5895pa_output_mod_insn (int unsignedp, rtx_insn *insn) 5896{ 5897 if (unsignedp) 5898 { 5899 import_milli (remU); 5900 return pa_output_millicode_call (insn, 5901 gen_rtx_SYMBOL_REF (SImode, "$$remU")); 5902 } 5903 else 5904 { 5905 import_milli (remI); 5906 return pa_output_millicode_call (insn, 5907 gen_rtx_SYMBOL_REF (SImode, "$$remI")); 5908 } 5909} 5910 5911void 5912pa_output_arg_descriptor (rtx_insn *call_insn) 5913{ 5914 const char *arg_regs[4]; 5915 machine_mode arg_mode; 5916 rtx link; 5917 int i, output_flag = 0; 5918 int regno; 5919 5920 /* We neither need nor want argument location descriptors for the 5921 64bit runtime environment or the ELF32 environment. */ 5922 if (TARGET_64BIT || TARGET_ELF32) 5923 return; 5924 5925 for (i = 0; i < 4; i++) 5926 arg_regs[i] = 0; 5927 5928 /* Specify explicitly that no argument relocations should take place 5929 if using the portable runtime calling conventions. */ 5930 if (TARGET_PORTABLE_RUNTIME) 5931 { 5932 fputs ("\t.CALL ARGW0=NO,ARGW1=NO,ARGW2=NO,ARGW3=NO,RETVAL=NO\n", 5933 asm_out_file); 5934 return; 5935 } 5936 5937 gcc_assert (CALL_P (call_insn)); 5938 for (link = CALL_INSN_FUNCTION_USAGE (call_insn); 5939 link; link = XEXP (link, 1)) 5940 { 5941 rtx use = XEXP (link, 0); 5942 5943 if (! (GET_CODE (use) == USE 5944 && GET_CODE (XEXP (use, 0)) == REG 5945 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0))))) 5946 continue; 5947 5948 arg_mode = GET_MODE (XEXP (use, 0)); 5949 regno = REGNO (XEXP (use, 0)); 5950 if (regno >= 23 && regno <= 26) 5951 { 5952 arg_regs[26 - regno] = "GR"; 5953 if (arg_mode == DImode) 5954 arg_regs[25 - regno] = "GR"; 5955 } 5956 else if (regno >= 32 && regno <= 39) 5957 { 5958 if (arg_mode == SFmode) 5959 arg_regs[(regno - 32) / 2] = "FR"; 5960 else 5961 { 5962#ifndef HP_FP_ARG_DESCRIPTOR_REVERSED 5963 arg_regs[(regno - 34) / 2] = "FR"; 5964 arg_regs[(regno - 34) / 2 + 1] = "FU"; 5965#else 5966 arg_regs[(regno - 34) / 2] = "FU"; 5967 arg_regs[(regno - 34) / 2 + 1] = "FR"; 5968#endif 5969 } 5970 } 5971 } 5972 fputs ("\t.CALL ", asm_out_file); 5973 for (i = 0; i < 4; i++) 5974 { 5975 if (arg_regs[i]) 5976 { 5977 if (output_flag++) 5978 fputc (',', asm_out_file); 5979 fprintf (asm_out_file, "ARGW%d=%s", i, arg_regs[i]); 5980 } 5981 } 5982 fputc ('\n', asm_out_file); 5983} 5984 5985/* Inform reload about cases where moving X with a mode MODE to or from 5986 a register in RCLASS requires an extra scratch or immediate register. 5987 Return the class needed for the immediate register. */ 5988 5989static reg_class_t 5990pa_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i, 5991 machine_mode mode, secondary_reload_info *sri) 5992{ 5993 int regno; 5994 enum reg_class rclass = (enum reg_class) rclass_i; 5995 5996 /* Handle the easy stuff first. */ 5997 if (rclass == R1_REGS) 5998 return NO_REGS; 5999 6000 if (REG_P (x)) 6001 { 6002 regno = REGNO (x); 6003 if (rclass == BASE_REG_CLASS && regno < FIRST_PSEUDO_REGISTER) 6004 return NO_REGS; 6005 } 6006 else 6007 regno = -1; 6008 6009 /* If we have something like (mem (mem (...)), we can safely assume the 6010 inner MEM will end up in a general register after reloading, so there's 6011 no need for a secondary reload. */ 6012 if (GET_CODE (x) == MEM && GET_CODE (XEXP (x, 0)) == MEM) 6013 return NO_REGS; 6014 6015 /* Trying to load a constant into a FP register during PIC code 6016 generation requires %r1 as a scratch register. For float modes, 6017 the only legitimate constant is CONST0_RTX. However, there are 6018 a few patterns that accept constant double operands. */ 6019 if (flag_pic 6020 && FP_REG_CLASS_P (rclass) 6021 && (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)) 6022 { 6023 switch (mode) 6024 { 6025 case SImode: 6026 sri->icode = CODE_FOR_reload_insi_r1; 6027 break; 6028 6029 case DImode: 6030 sri->icode = CODE_FOR_reload_indi_r1; 6031 break; 6032 6033 case SFmode: 6034 sri->icode = CODE_FOR_reload_insf_r1; 6035 break; 6036 6037 case DFmode: 6038 sri->icode = CODE_FOR_reload_indf_r1; 6039 break; 6040 6041 default: 6042 gcc_unreachable (); 6043 } 6044 return NO_REGS; 6045 } 6046 6047 /* Secondary reloads of symbolic expressions require %r1 as a scratch 6048 register when we're generating PIC code or when the operand isn't 6049 readonly. */ 6050 if (pa_symbolic_expression_p (x)) 6051 { 6052 if (GET_CODE (x) == HIGH) 6053 x = XEXP (x, 0); 6054 6055 if (flag_pic || !read_only_operand (x, VOIDmode)) 6056 { 6057 switch (mode) 6058 { 6059 case SImode: 6060 sri->icode = CODE_FOR_reload_insi_r1; 6061 break; 6062 6063 case DImode: 6064 sri->icode = CODE_FOR_reload_indi_r1; 6065 break; 6066 6067 default: 6068 gcc_unreachable (); 6069 } 6070 return NO_REGS; 6071 } 6072 } 6073 6074 /* Profiling showed the PA port spends about 1.3% of its compilation 6075 time in true_regnum from calls inside pa_secondary_reload_class. */ 6076 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG) 6077 regno = true_regnum (x); 6078 6079 /* Handle reloads for floating point loads and stores. */ 6080 if ((regno >= FIRST_PSEUDO_REGISTER || regno == -1) 6081 && FP_REG_CLASS_P (rclass)) 6082 { 6083 if (MEM_P (x)) 6084 { 6085 x = XEXP (x, 0); 6086 6087 /* We don't need a secondary reload for indexed memory addresses. 6088 6089 When INT14_OK_STRICT is true, it might appear that we could 6090 directly allow register indirect memory addresses. However, 6091 this doesn't work because we don't support SUBREGs in 6092 floating-point register copies and reload doesn't tell us 6093 when it's going to use a SUBREG. */ 6094 if (IS_INDEX_ADDR_P (x)) 6095 return NO_REGS; 6096 } 6097 6098 /* Request a secondary reload with a general scratch register 6099 for everything else. ??? Could symbolic operands be handled 6100 directly when generating non-pic PA 2.0 code? */ 6101 sri->icode = (in_p 6102 ? direct_optab_handler (reload_in_optab, mode) 6103 : direct_optab_handler (reload_out_optab, mode)); 6104 return NO_REGS; 6105 } 6106 6107 /* A SAR<->FP register copy requires an intermediate general register 6108 and secondary memory. We need a secondary reload with a general 6109 scratch register for spills. */ 6110 if (rclass == SHIFT_REGS) 6111 { 6112 /* Handle spill. */ 6113 if (regno >= FIRST_PSEUDO_REGISTER || regno < 0) 6114 { 6115 sri->icode = (in_p 6116 ? direct_optab_handler (reload_in_optab, mode) 6117 : direct_optab_handler (reload_out_optab, mode)); 6118 return NO_REGS; 6119 } 6120 6121 /* Handle FP copy. */ 6122 if (FP_REG_CLASS_P (REGNO_REG_CLASS (regno))) 6123 return GENERAL_REGS; 6124 } 6125 6126 if (regno >= 0 && regno < FIRST_PSEUDO_REGISTER 6127 && REGNO_REG_CLASS (regno) == SHIFT_REGS 6128 && FP_REG_CLASS_P (rclass)) 6129 return GENERAL_REGS; 6130 6131 return NO_REGS; 6132} 6133 6134/* Implement TARGET_EXTRA_LIVE_ON_ENTRY. The argument pointer 6135 is only marked as live on entry by df-scan when it is a fixed 6136 register. It isn't a fixed register in the 64-bit runtime, 6137 so we need to mark it here. */ 6138 6139static void 6140pa_extra_live_on_entry (bitmap regs) 6141{ 6142 if (TARGET_64BIT) 6143 bitmap_set_bit (regs, ARG_POINTER_REGNUM); 6144} 6145 6146/* Implement EH_RETURN_HANDLER_RTX. The MEM needs to be volatile 6147 to prevent it from being deleted. */ 6148 6149rtx 6150pa_eh_return_handler_rtx (void) 6151{ 6152 rtx tmp; 6153 6154 tmp = gen_rtx_PLUS (word_mode, hard_frame_pointer_rtx, 6155 TARGET_64BIT ? GEN_INT (-16) : GEN_INT (-20)); 6156 tmp = gen_rtx_MEM (word_mode, tmp); 6157 tmp->volatil = 1; 6158 return tmp; 6159} 6160 6161/* In the 32-bit runtime, arguments larger than eight bytes are passed 6162 by invisible reference. As a GCC extension, we also pass anything 6163 with a zero or variable size by reference. 6164 6165 The 64-bit runtime does not describe passing any types by invisible 6166 reference. The internals of GCC can't currently handle passing 6167 empty structures, and zero or variable length arrays when they are 6168 not passed entirely on the stack or by reference. Thus, as a GCC 6169 extension, we pass these types by reference. The HP compiler doesn't 6170 support these types, so hopefully there shouldn't be any compatibility 6171 issues. This may have to be revisited when HP releases a C99 compiler 6172 or updates the ABI. */ 6173 6174static bool 6175pa_pass_by_reference (cumulative_args_t ca ATTRIBUTE_UNUSED, 6176 machine_mode mode, const_tree type, 6177 bool named ATTRIBUTE_UNUSED) 6178{ 6179 HOST_WIDE_INT size; 6180 6181 if (type) 6182 size = int_size_in_bytes (type); 6183 else 6184 size = GET_MODE_SIZE (mode); 6185 6186 if (TARGET_64BIT) 6187 return size <= 0; 6188 else 6189 return size <= 0 || size > 8; 6190} 6191 6192enum direction 6193pa_function_arg_padding (machine_mode mode, const_tree type) 6194{ 6195 if (mode == BLKmode 6196 || (TARGET_64BIT 6197 && type 6198 && (AGGREGATE_TYPE_P (type) 6199 || TREE_CODE (type) == COMPLEX_TYPE 6200 || TREE_CODE (type) == VECTOR_TYPE))) 6201 { 6202 /* Return none if justification is not required. */ 6203 if (type 6204 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST 6205 && (int_size_in_bytes (type) * BITS_PER_UNIT) % PARM_BOUNDARY == 0) 6206 return none; 6207 6208 /* The directions set here are ignored when a BLKmode argument larger 6209 than a word is placed in a register. Different code is used for 6210 the stack and registers. This makes it difficult to have a 6211 consistent data representation for both the stack and registers. 6212 For both runtimes, the justification and padding for arguments on 6213 the stack and in registers should be identical. */ 6214 if (TARGET_64BIT) 6215 /* The 64-bit runtime specifies left justification for aggregates. */ 6216 return upward; 6217 else 6218 /* The 32-bit runtime architecture specifies right justification. 6219 When the argument is passed on the stack, the argument is padded 6220 with garbage on the left. The HP compiler pads with zeros. */ 6221 return downward; 6222 } 6223 6224 if (GET_MODE_BITSIZE (mode) < PARM_BOUNDARY) 6225 return downward; 6226 else 6227 return none; 6228} 6229 6230 6231/* Do what is necessary for `va_start'. We look at the current function 6232 to determine if stdargs or varargs is used and fill in an initial 6233 va_list. A pointer to this constructor is returned. */ 6234 6235static rtx 6236hppa_builtin_saveregs (void) 6237{ 6238 rtx offset, dest; 6239 tree fntype = TREE_TYPE (current_function_decl); 6240 int argadj = ((!stdarg_p (fntype)) 6241 ? UNITS_PER_WORD : 0); 6242 6243 if (argadj) 6244 offset = plus_constant (Pmode, crtl->args.arg_offset_rtx, argadj); 6245 else 6246 offset = crtl->args.arg_offset_rtx; 6247 6248 if (TARGET_64BIT) 6249 { 6250 int i, off; 6251 6252 /* Adjust for varargs/stdarg differences. */ 6253 if (argadj) 6254 offset = plus_constant (Pmode, crtl->args.arg_offset_rtx, -argadj); 6255 else 6256 offset = crtl->args.arg_offset_rtx; 6257 6258 /* We need to save %r26 .. %r19 inclusive starting at offset -64 6259 from the incoming arg pointer and growing to larger addresses. */ 6260 for (i = 26, off = -64; i >= 19; i--, off += 8) 6261 emit_move_insn (gen_rtx_MEM (word_mode, 6262 plus_constant (Pmode, 6263 arg_pointer_rtx, off)), 6264 gen_rtx_REG (word_mode, i)); 6265 6266 /* The incoming args pointer points just beyond the flushback area; 6267 normally this is not a serious concern. However, when we are doing 6268 varargs/stdargs we want to make the arg pointer point to the start 6269 of the incoming argument area. */ 6270 emit_move_insn (virtual_incoming_args_rtx, 6271 plus_constant (Pmode, arg_pointer_rtx, -64)); 6272 6273 /* Now return a pointer to the first anonymous argument. */ 6274 return copy_to_reg (expand_binop (Pmode, add_optab, 6275 virtual_incoming_args_rtx, 6276 offset, 0, 0, OPTAB_LIB_WIDEN)); 6277 } 6278 6279 /* Store general registers on the stack. */ 6280 dest = gen_rtx_MEM (BLKmode, 6281 plus_constant (Pmode, crtl->args.internal_arg_pointer, 6282 -16)); 6283 set_mem_alias_set (dest, get_varargs_alias_set ()); 6284 set_mem_align (dest, BITS_PER_WORD); 6285 move_block_from_reg (23, dest, 4); 6286 6287 /* move_block_from_reg will emit code to store the argument registers 6288 individually as scalar stores. 6289 6290 However, other insns may later load from the same addresses for 6291 a structure load (passing a struct to a varargs routine). 6292 6293 The alias code assumes that such aliasing can never happen, so we 6294 have to keep memory referencing insns from moving up beyond the 6295 last argument register store. So we emit a blockage insn here. */ 6296 emit_insn (gen_blockage ()); 6297 6298 return copy_to_reg (expand_binop (Pmode, add_optab, 6299 crtl->args.internal_arg_pointer, 6300 offset, 0, 0, OPTAB_LIB_WIDEN)); 6301} 6302 6303static void 6304hppa_va_start (tree valist, rtx nextarg) 6305{ 6306 nextarg = expand_builtin_saveregs (); 6307 std_expand_builtin_va_start (valist, nextarg); 6308} 6309 6310static tree 6311hppa_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p, 6312 gimple_seq *post_p) 6313{ 6314 if (TARGET_64BIT) 6315 { 6316 /* Args grow upward. We can use the generic routines. */ 6317 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p); 6318 } 6319 else /* !TARGET_64BIT */ 6320 { 6321 tree ptr = build_pointer_type (type); 6322 tree valist_type; 6323 tree t, u; 6324 unsigned int size, ofs; 6325 bool indirect; 6326 6327 indirect = pass_by_reference (NULL, TYPE_MODE (type), type, 0); 6328 if (indirect) 6329 { 6330 type = ptr; 6331 ptr = build_pointer_type (type); 6332 } 6333 size = int_size_in_bytes (type); 6334 valist_type = TREE_TYPE (valist); 6335 6336 /* Args grow down. Not handled by generic routines. */ 6337 6338 u = fold_convert (sizetype, size_in_bytes (type)); 6339 u = fold_build1 (NEGATE_EXPR, sizetype, u); 6340 t = fold_build_pointer_plus (valist, u); 6341 6342 /* Align to 4 or 8 byte boundary depending on argument size. */ 6343 6344 u = build_int_cst (TREE_TYPE (t), (HOST_WIDE_INT)(size > 4 ? -8 : -4)); 6345 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t, u); 6346 t = fold_convert (valist_type, t); 6347 6348 t = build2 (MODIFY_EXPR, valist_type, valist, t); 6349 6350 ofs = (8 - size) % 4; 6351 if (ofs != 0) 6352 t = fold_build_pointer_plus_hwi (t, ofs); 6353 6354 t = fold_convert (ptr, t); 6355 t = build_va_arg_indirect_ref (t); 6356 6357 if (indirect) 6358 t = build_va_arg_indirect_ref (t); 6359 6360 return t; 6361 } 6362} 6363 6364/* True if MODE is valid for the target. By "valid", we mean able to 6365 be manipulated in non-trivial ways. In particular, this means all 6366 the arithmetic is supported. 6367 6368 Currently, TImode is not valid as the HP 64-bit runtime documentation 6369 doesn't document the alignment and calling conventions for this type. 6370 Thus, we return false when PRECISION is 2 * BITS_PER_WORD and 6371 2 * BITS_PER_WORD isn't equal LONG_LONG_TYPE_SIZE. */ 6372 6373static bool 6374pa_scalar_mode_supported_p (machine_mode mode) 6375{ 6376 int precision = GET_MODE_PRECISION (mode); 6377 6378 switch (GET_MODE_CLASS (mode)) 6379 { 6380 case MODE_PARTIAL_INT: 6381 case MODE_INT: 6382 if (precision == CHAR_TYPE_SIZE) 6383 return true; 6384 if (precision == SHORT_TYPE_SIZE) 6385 return true; 6386 if (precision == INT_TYPE_SIZE) 6387 return true; 6388 if (precision == LONG_TYPE_SIZE) 6389 return true; 6390 if (precision == LONG_LONG_TYPE_SIZE) 6391 return true; 6392 return false; 6393 6394 case MODE_FLOAT: 6395 if (precision == FLOAT_TYPE_SIZE) 6396 return true; 6397 if (precision == DOUBLE_TYPE_SIZE) 6398 return true; 6399 if (precision == LONG_DOUBLE_TYPE_SIZE) 6400 return true; 6401 return false; 6402 6403 case MODE_DECIMAL_FLOAT: 6404 return false; 6405 6406 default: 6407 gcc_unreachable (); 6408 } 6409} 6410 6411/* Return TRUE if INSN, a jump insn, has an unfilled delay slot and 6412 it branches into the delay slot. Otherwise, return FALSE. */ 6413 6414static bool 6415branch_to_delay_slot_p (rtx_insn *insn) 6416{ 6417 rtx_insn *jump_insn; 6418 6419 if (dbr_sequence_length ()) 6420 return FALSE; 6421 6422 jump_insn = next_active_insn (JUMP_LABEL (insn)); 6423 while (insn) 6424 { 6425 insn = next_active_insn (insn); 6426 if (jump_insn == insn) 6427 return TRUE; 6428 6429 /* We can't rely on the length of asms. So, we return FALSE when 6430 the branch is followed by an asm. */ 6431 if (!insn 6432 || GET_CODE (PATTERN (insn)) == ASM_INPUT 6433 || extract_asm_operands (PATTERN (insn)) != NULL_RTX 6434 || get_attr_length (insn) > 0) 6435 break; 6436 } 6437 6438 return FALSE; 6439} 6440 6441/* Return TRUE if INSN, a forward jump insn, needs a nop in its delay slot. 6442 6443 This occurs when INSN has an unfilled delay slot and is followed 6444 by an asm. Disaster can occur if the asm is empty and the jump 6445 branches into the delay slot. So, we add a nop in the delay slot 6446 when this occurs. */ 6447 6448static bool 6449branch_needs_nop_p (rtx_insn *insn) 6450{ 6451 rtx_insn *jump_insn; 6452 6453 if (dbr_sequence_length ()) 6454 return FALSE; 6455 6456 jump_insn = next_active_insn (JUMP_LABEL (insn)); 6457 while (insn) 6458 { 6459 insn = next_active_insn (insn); 6460 if (!insn || jump_insn == insn) 6461 return TRUE; 6462 6463 if (!(GET_CODE (PATTERN (insn)) == ASM_INPUT 6464 || extract_asm_operands (PATTERN (insn)) != NULL_RTX) 6465 && get_attr_length (insn) > 0) 6466 break; 6467 } 6468 6469 return FALSE; 6470} 6471 6472/* Return TRUE if INSN, a forward jump insn, can use nullification 6473 to skip the following instruction. This avoids an extra cycle due 6474 to a mis-predicted branch when we fall through. */ 6475 6476static bool 6477use_skip_p (rtx_insn *insn) 6478{ 6479 rtx_insn *jump_insn = next_active_insn (JUMP_LABEL (insn)); 6480 6481 while (insn) 6482 { 6483 insn = next_active_insn (insn); 6484 6485 /* We can't rely on the length of asms, so we can't skip asms. */ 6486 if (!insn 6487 || GET_CODE (PATTERN (insn)) == ASM_INPUT 6488 || extract_asm_operands (PATTERN (insn)) != NULL_RTX) 6489 break; 6490 if (get_attr_length (insn) == 4 6491 && jump_insn == next_active_insn (insn)) 6492 return TRUE; 6493 if (get_attr_length (insn) > 0) 6494 break; 6495 } 6496 6497 return FALSE; 6498} 6499 6500/* This routine handles all the normal conditional branch sequences we 6501 might need to generate. It handles compare immediate vs compare 6502 register, nullification of delay slots, varying length branches, 6503 negated branches, and all combinations of the above. It returns the 6504 output appropriate to emit the branch corresponding to all given 6505 parameters. */ 6506 6507const char * 6508pa_output_cbranch (rtx *operands, int negated, rtx_insn *insn) 6509{ 6510 static char buf[100]; 6511 bool useskip; 6512 int nullify = INSN_ANNULLED_BRANCH_P (insn); 6513 int length = get_attr_length (insn); 6514 int xdelay; 6515 6516 /* A conditional branch to the following instruction (e.g. the delay slot) 6517 is asking for a disaster. This can happen when not optimizing and 6518 when jump optimization fails. 6519 6520 While it is usually safe to emit nothing, this can fail if the 6521 preceding instruction is a nullified branch with an empty delay 6522 slot and the same branch target as this branch. We could check 6523 for this but jump optimization should eliminate nop jumps. It 6524 is always safe to emit a nop. */ 6525 if (branch_to_delay_slot_p (insn)) 6526 return "nop"; 6527 6528 /* The doubleword form of the cmpib instruction doesn't have the LEU 6529 and GTU conditions while the cmpb instruction does. Since we accept 6530 zero for cmpb, we must ensure that we use cmpb for the comparison. */ 6531 if (GET_MODE (operands[1]) == DImode && operands[2] == const0_rtx) 6532 operands[2] = gen_rtx_REG (DImode, 0); 6533 if (GET_MODE (operands[2]) == DImode && operands[1] == const0_rtx) 6534 operands[1] = gen_rtx_REG (DImode, 0); 6535 6536 /* If this is a long branch with its delay slot unfilled, set `nullify' 6537 as it can nullify the delay slot and save a nop. */ 6538 if (length == 8 && dbr_sequence_length () == 0) 6539 nullify = 1; 6540 6541 /* If this is a short forward conditional branch which did not get 6542 its delay slot filled, the delay slot can still be nullified. */ 6543 if (! nullify && length == 4 && dbr_sequence_length () == 0) 6544 nullify = forward_branch_p (insn); 6545 6546 /* A forward branch over a single nullified insn can be done with a 6547 comclr instruction. This avoids a single cycle penalty due to 6548 mis-predicted branch if we fall through (branch not taken). */ 6549 useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE; 6550 6551 switch (length) 6552 { 6553 /* All short conditional branches except backwards with an unfilled 6554 delay slot. */ 6555 case 4: 6556 if (useskip) 6557 strcpy (buf, "{com%I2clr,|cmp%I2clr,}"); 6558 else 6559 strcpy (buf, "{com%I2b,|cmp%I2b,}"); 6560 if (GET_MODE (operands[1]) == DImode) 6561 strcat (buf, "*"); 6562 if (negated) 6563 strcat (buf, "%B3"); 6564 else 6565 strcat (buf, "%S3"); 6566 if (useskip) 6567 strcat (buf, " %2,%r1,%%r0"); 6568 else if (nullify) 6569 { 6570 if (branch_needs_nop_p (insn)) 6571 strcat (buf, ",n %2,%r1,%0%#"); 6572 else 6573 strcat (buf, ",n %2,%r1,%0"); 6574 } 6575 else 6576 strcat (buf, " %2,%r1,%0"); 6577 break; 6578 6579 /* All long conditionals. Note a short backward branch with an 6580 unfilled delay slot is treated just like a long backward branch 6581 with an unfilled delay slot. */ 6582 case 8: 6583 /* Handle weird backwards branch with a filled delay slot 6584 which is nullified. */ 6585 if (dbr_sequence_length () != 0 6586 && ! forward_branch_p (insn) 6587 && nullify) 6588 { 6589 strcpy (buf, "{com%I2b,|cmp%I2b,}"); 6590 if (GET_MODE (operands[1]) == DImode) 6591 strcat (buf, "*"); 6592 if (negated) 6593 strcat (buf, "%S3"); 6594 else 6595 strcat (buf, "%B3"); 6596 strcat (buf, ",n %2,%r1,.+12\n\tb %0"); 6597 } 6598 /* Handle short backwards branch with an unfilled delay slot. 6599 Using a comb;nop rather than comiclr;bl saves 1 cycle for both 6600 taken and untaken branches. */ 6601 else if (dbr_sequence_length () == 0 6602 && ! forward_branch_p (insn) 6603 && INSN_ADDRESSES_SET_P () 6604 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn))) 6605 - INSN_ADDRESSES (INSN_UID (insn)) - 8)) 6606 { 6607 strcpy (buf, "{com%I2b,|cmp%I2b,}"); 6608 if (GET_MODE (operands[1]) == DImode) 6609 strcat (buf, "*"); 6610 if (negated) 6611 strcat (buf, "%B3 %2,%r1,%0%#"); 6612 else 6613 strcat (buf, "%S3 %2,%r1,%0%#"); 6614 } 6615 else 6616 { 6617 strcpy (buf, "{com%I2clr,|cmp%I2clr,}"); 6618 if (GET_MODE (operands[1]) == DImode) 6619 strcat (buf, "*"); 6620 if (negated) 6621 strcat (buf, "%S3"); 6622 else 6623 strcat (buf, "%B3"); 6624 if (nullify) 6625 strcat (buf, " %2,%r1,%%r0\n\tb,n %0"); 6626 else 6627 strcat (buf, " %2,%r1,%%r0\n\tb %0"); 6628 } 6629 break; 6630 6631 default: 6632 /* The reversed conditional branch must branch over one additional 6633 instruction if the delay slot is filled and needs to be extracted 6634 by pa_output_lbranch. If the delay slot is empty or this is a 6635 nullified forward branch, the instruction after the reversed 6636 condition branch must be nullified. */ 6637 if (dbr_sequence_length () == 0 6638 || (nullify && forward_branch_p (insn))) 6639 { 6640 nullify = 1; 6641 xdelay = 0; 6642 operands[4] = GEN_INT (length); 6643 } 6644 else 6645 { 6646 xdelay = 1; 6647 operands[4] = GEN_INT (length + 4); 6648 } 6649 6650 /* Create a reversed conditional branch which branches around 6651 the following insns. */ 6652 if (GET_MODE (operands[1]) != DImode) 6653 { 6654 if (nullify) 6655 { 6656 if (negated) 6657 strcpy (buf, 6658 "{com%I2b,%S3,n %2,%r1,.+%4|cmp%I2b,%S3,n %2,%r1,.+%4}"); 6659 else 6660 strcpy (buf, 6661 "{com%I2b,%B3,n %2,%r1,.+%4|cmp%I2b,%B3,n %2,%r1,.+%4}"); 6662 } 6663 else 6664 { 6665 if (negated) 6666 strcpy (buf, 6667 "{com%I2b,%S3 %2,%r1,.+%4|cmp%I2b,%S3 %2,%r1,.+%4}"); 6668 else 6669 strcpy (buf, 6670 "{com%I2b,%B3 %2,%r1,.+%4|cmp%I2b,%B3 %2,%r1,.+%4}"); 6671 } 6672 } 6673 else 6674 { 6675 if (nullify) 6676 { 6677 if (negated) 6678 strcpy (buf, 6679 "{com%I2b,*%S3,n %2,%r1,.+%4|cmp%I2b,*%S3,n %2,%r1,.+%4}"); 6680 else 6681 strcpy (buf, 6682 "{com%I2b,*%B3,n %2,%r1,.+%4|cmp%I2b,*%B3,n %2,%r1,.+%4}"); 6683 } 6684 else 6685 { 6686 if (negated) 6687 strcpy (buf, 6688 "{com%I2b,*%S3 %2,%r1,.+%4|cmp%I2b,*%S3 %2,%r1,.+%4}"); 6689 else 6690 strcpy (buf, 6691 "{com%I2b,*%B3 %2,%r1,.+%4|cmp%I2b,*%B3 %2,%r1,.+%4}"); 6692 } 6693 } 6694 6695 output_asm_insn (buf, operands); 6696 return pa_output_lbranch (operands[0], insn, xdelay); 6697 } 6698 return buf; 6699} 6700 6701/* This routine handles output of long unconditional branches that 6702 exceed the maximum range of a simple branch instruction. Since 6703 we don't have a register available for the branch, we save register 6704 %r1 in the frame marker, load the branch destination DEST into %r1, 6705 execute the branch, and restore %r1 in the delay slot of the branch. 6706 6707 Since long branches may have an insn in the delay slot and the 6708 delay slot is used to restore %r1, we in general need to extract 6709 this insn and execute it before the branch. However, to facilitate 6710 use of this function by conditional branches, we also provide an 6711 option to not extract the delay insn so that it will be emitted 6712 after the long branch. So, if there is an insn in the delay slot, 6713 it is extracted if XDELAY is nonzero. 6714 6715 The lengths of the various long-branch sequences are 20, 16 and 24 6716 bytes for the portable runtime, non-PIC and PIC cases, respectively. */ 6717 6718const char * 6719pa_output_lbranch (rtx dest, rtx_insn *insn, int xdelay) 6720{ 6721 rtx xoperands[2]; 6722 6723 xoperands[0] = dest; 6724 6725 /* First, free up the delay slot. */ 6726 if (xdelay && dbr_sequence_length () != 0) 6727 { 6728 /* We can't handle a jump in the delay slot. */ 6729 gcc_assert (! JUMP_P (NEXT_INSN (insn))); 6730 6731 final_scan_insn (NEXT_INSN (insn), asm_out_file, 6732 optimize, 0, NULL); 6733 6734 /* Now delete the delay insn. */ 6735 SET_INSN_DELETED (NEXT_INSN (insn)); 6736 } 6737 6738 /* Output an insn to save %r1. The runtime documentation doesn't 6739 specify whether the "Clean Up" slot in the callers frame can 6740 be clobbered by the callee. It isn't copied by HP's builtin 6741 alloca, so this suggests that it can be clobbered if necessary. 6742 The "Static Link" location is copied by HP builtin alloca, so 6743 we avoid using it. Using the cleanup slot might be a problem 6744 if we have to interoperate with languages that pass cleanup 6745 information. However, it should be possible to handle these 6746 situations with GCC's asm feature. 6747 6748 The "Current RP" slot is reserved for the called procedure, so 6749 we try to use it when we don't have a frame of our own. It's 6750 rather unlikely that we won't have a frame when we need to emit 6751 a very long branch. 6752 6753 Really the way to go long term is a register scavenger; goto 6754 the target of the jump and find a register which we can use 6755 as a scratch to hold the value in %r1. Then, we wouldn't have 6756 to free up the delay slot or clobber a slot that may be needed 6757 for other purposes. */ 6758 if (TARGET_64BIT) 6759 { 6760 if (actual_fsize == 0 && !df_regs_ever_live_p (2)) 6761 /* Use the return pointer slot in the frame marker. */ 6762 output_asm_insn ("std %%r1,-16(%%r30)", xoperands); 6763 else 6764 /* Use the slot at -40 in the frame marker since HP builtin 6765 alloca doesn't copy it. */ 6766 output_asm_insn ("std %%r1,-40(%%r30)", xoperands); 6767 } 6768 else 6769 { 6770 if (actual_fsize == 0 && !df_regs_ever_live_p (2)) 6771 /* Use the return pointer slot in the frame marker. */ 6772 output_asm_insn ("stw %%r1,-20(%%r30)", xoperands); 6773 else 6774 /* Use the "Clean Up" slot in the frame marker. In GCC, 6775 the only other use of this location is for copying a 6776 floating point double argument from a floating-point 6777 register to two general registers. The copy is done 6778 as an "atomic" operation when outputting a call, so it 6779 won't interfere with our using the location here. */ 6780 output_asm_insn ("stw %%r1,-12(%%r30)", xoperands); 6781 } 6782 6783 if (TARGET_PORTABLE_RUNTIME) 6784 { 6785 output_asm_insn ("ldil L'%0,%%r1", xoperands); 6786 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands); 6787 output_asm_insn ("bv %%r0(%%r1)", xoperands); 6788 } 6789 else if (flag_pic) 6790 { 6791 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands); 6792 if (TARGET_SOM || !TARGET_GAS) 6793 { 6794 xoperands[1] = gen_label_rtx (); 6795 output_asm_insn ("addil L'%l0-%l1,%%r1", xoperands); 6796 targetm.asm_out.internal_label (asm_out_file, "L", 6797 CODE_LABEL_NUMBER (xoperands[1])); 6798 output_asm_insn ("ldo R'%l0-%l1(%%r1),%%r1", xoperands); 6799 } 6800 else 6801 { 6802 output_asm_insn ("addil L'%l0-$PIC_pcrel$0+4,%%r1", xoperands); 6803 output_asm_insn ("ldo R'%l0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands); 6804 } 6805 output_asm_insn ("bv %%r0(%%r1)", xoperands); 6806 } 6807 else 6808 /* Now output a very long branch to the original target. */ 6809 output_asm_insn ("ldil L'%l0,%%r1\n\tbe R'%l0(%%sr4,%%r1)", xoperands); 6810 6811 /* Now restore the value of %r1 in the delay slot. */ 6812 if (TARGET_64BIT) 6813 { 6814 if (actual_fsize == 0 && !df_regs_ever_live_p (2)) 6815 return "ldd -16(%%r30),%%r1"; 6816 else 6817 return "ldd -40(%%r30),%%r1"; 6818 } 6819 else 6820 { 6821 if (actual_fsize == 0 && !df_regs_ever_live_p (2)) 6822 return "ldw -20(%%r30),%%r1"; 6823 else 6824 return "ldw -12(%%r30),%%r1"; 6825 } 6826} 6827 6828/* This routine handles all the branch-on-bit conditional branch sequences we 6829 might need to generate. It handles nullification of delay slots, 6830 varying length branches, negated branches and all combinations of the 6831 above. it returns the appropriate output template to emit the branch. */ 6832 6833const char * 6834pa_output_bb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx_insn *insn, int which) 6835{ 6836 static char buf[100]; 6837 bool useskip; 6838 int nullify = INSN_ANNULLED_BRANCH_P (insn); 6839 int length = get_attr_length (insn); 6840 int xdelay; 6841 6842 /* A conditional branch to the following instruction (e.g. the delay slot) is 6843 asking for a disaster. I do not think this can happen as this pattern 6844 is only used when optimizing; jump optimization should eliminate the 6845 jump. But be prepared just in case. */ 6846 6847 if (branch_to_delay_slot_p (insn)) 6848 return "nop"; 6849 6850 /* If this is a long branch with its delay slot unfilled, set `nullify' 6851 as it can nullify the delay slot and save a nop. */ 6852 if (length == 8 && dbr_sequence_length () == 0) 6853 nullify = 1; 6854 6855 /* If this is a short forward conditional branch which did not get 6856 its delay slot filled, the delay slot can still be nullified. */ 6857 if (! nullify && length == 4 && dbr_sequence_length () == 0) 6858 nullify = forward_branch_p (insn); 6859 6860 /* A forward branch over a single nullified insn can be done with a 6861 extrs instruction. This avoids a single cycle penalty due to 6862 mis-predicted branch if we fall through (branch not taken). */ 6863 useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE; 6864 6865 switch (length) 6866 { 6867 6868 /* All short conditional branches except backwards with an unfilled 6869 delay slot. */ 6870 case 4: 6871 if (useskip) 6872 strcpy (buf, "{extrs,|extrw,s,}"); 6873 else 6874 strcpy (buf, "bb,"); 6875 if (useskip && GET_MODE (operands[0]) == DImode) 6876 strcpy (buf, "extrd,s,*"); 6877 else if (GET_MODE (operands[0]) == DImode) 6878 strcpy (buf, "bb,*"); 6879 if ((which == 0 && negated) 6880 || (which == 1 && ! negated)) 6881 strcat (buf, ">="); 6882 else 6883 strcat (buf, "<"); 6884 if (useskip) 6885 strcat (buf, " %0,%1,1,%%r0"); 6886 else if (nullify && negated) 6887 { 6888 if (branch_needs_nop_p (insn)) 6889 strcat (buf, ",n %0,%1,%3%#"); 6890 else 6891 strcat (buf, ",n %0,%1,%3"); 6892 } 6893 else if (nullify && ! negated) 6894 { 6895 if (branch_needs_nop_p (insn)) 6896 strcat (buf, ",n %0,%1,%2%#"); 6897 else 6898 strcat (buf, ",n %0,%1,%2"); 6899 } 6900 else if (! nullify && negated) 6901 strcat (buf, " %0,%1,%3"); 6902 else if (! nullify && ! negated) 6903 strcat (buf, " %0,%1,%2"); 6904 break; 6905 6906 /* All long conditionals. Note a short backward branch with an 6907 unfilled delay slot is treated just like a long backward branch 6908 with an unfilled delay slot. */ 6909 case 8: 6910 /* Handle weird backwards branch with a filled delay slot 6911 which is nullified. */ 6912 if (dbr_sequence_length () != 0 6913 && ! forward_branch_p (insn) 6914 && nullify) 6915 { 6916 strcpy (buf, "bb,"); 6917 if (GET_MODE (operands[0]) == DImode) 6918 strcat (buf, "*"); 6919 if ((which == 0 && negated) 6920 || (which == 1 && ! negated)) 6921 strcat (buf, "<"); 6922 else 6923 strcat (buf, ">="); 6924 if (negated) 6925 strcat (buf, ",n %0,%1,.+12\n\tb %3"); 6926 else 6927 strcat (buf, ",n %0,%1,.+12\n\tb %2"); 6928 } 6929 /* Handle short backwards branch with an unfilled delay slot. 6930 Using a bb;nop rather than extrs;bl saves 1 cycle for both 6931 taken and untaken branches. */ 6932 else if (dbr_sequence_length () == 0 6933 && ! forward_branch_p (insn) 6934 && INSN_ADDRESSES_SET_P () 6935 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn))) 6936 - INSN_ADDRESSES (INSN_UID (insn)) - 8)) 6937 { 6938 strcpy (buf, "bb,"); 6939 if (GET_MODE (operands[0]) == DImode) 6940 strcat (buf, "*"); 6941 if ((which == 0 && negated) 6942 || (which == 1 && ! negated)) 6943 strcat (buf, ">="); 6944 else 6945 strcat (buf, "<"); 6946 if (negated) 6947 strcat (buf, " %0,%1,%3%#"); 6948 else 6949 strcat (buf, " %0,%1,%2%#"); 6950 } 6951 else 6952 { 6953 if (GET_MODE (operands[0]) == DImode) 6954 strcpy (buf, "extrd,s,*"); 6955 else 6956 strcpy (buf, "{extrs,|extrw,s,}"); 6957 if ((which == 0 && negated) 6958 || (which == 1 && ! negated)) 6959 strcat (buf, "<"); 6960 else 6961 strcat (buf, ">="); 6962 if (nullify && negated) 6963 strcat (buf, " %0,%1,1,%%r0\n\tb,n %3"); 6964 else if (nullify && ! negated) 6965 strcat (buf, " %0,%1,1,%%r0\n\tb,n %2"); 6966 else if (negated) 6967 strcat (buf, " %0,%1,1,%%r0\n\tb %3"); 6968 else 6969 strcat (buf, " %0,%1,1,%%r0\n\tb %2"); 6970 } 6971 break; 6972 6973 default: 6974 /* The reversed conditional branch must branch over one additional 6975 instruction if the delay slot is filled and needs to be extracted 6976 by pa_output_lbranch. If the delay slot is empty or this is a 6977 nullified forward branch, the instruction after the reversed 6978 condition branch must be nullified. */ 6979 if (dbr_sequence_length () == 0 6980 || (nullify && forward_branch_p (insn))) 6981 { 6982 nullify = 1; 6983 xdelay = 0; 6984 operands[4] = GEN_INT (length); 6985 } 6986 else 6987 { 6988 xdelay = 1; 6989 operands[4] = GEN_INT (length + 4); 6990 } 6991 6992 if (GET_MODE (operands[0]) == DImode) 6993 strcpy (buf, "bb,*"); 6994 else 6995 strcpy (buf, "bb,"); 6996 if ((which == 0 && negated) 6997 || (which == 1 && !negated)) 6998 strcat (buf, "<"); 6999 else 7000 strcat (buf, ">="); 7001 if (nullify) 7002 strcat (buf, ",n %0,%1,.+%4"); 7003 else 7004 strcat (buf, " %0,%1,.+%4"); 7005 output_asm_insn (buf, operands); 7006 return pa_output_lbranch (negated ? operands[3] : operands[2], 7007 insn, xdelay); 7008 } 7009 return buf; 7010} 7011 7012/* This routine handles all the branch-on-variable-bit conditional branch 7013 sequences we might need to generate. It handles nullification of delay 7014 slots, varying length branches, negated branches and all combinations 7015 of the above. it returns the appropriate output template to emit the 7016 branch. */ 7017 7018const char * 7019pa_output_bvb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx_insn *insn, 7020 int which) 7021{ 7022 static char buf[100]; 7023 bool useskip; 7024 int nullify = INSN_ANNULLED_BRANCH_P (insn); 7025 int length = get_attr_length (insn); 7026 int xdelay; 7027 7028 /* A conditional branch to the following instruction (e.g. the delay slot) is 7029 asking for a disaster. I do not think this can happen as this pattern 7030 is only used when optimizing; jump optimization should eliminate the 7031 jump. But be prepared just in case. */ 7032 7033 if (branch_to_delay_slot_p (insn)) 7034 return "nop"; 7035 7036 /* If this is a long branch with its delay slot unfilled, set `nullify' 7037 as it can nullify the delay slot and save a nop. */ 7038 if (length == 8 && dbr_sequence_length () == 0) 7039 nullify = 1; 7040 7041 /* If this is a short forward conditional branch which did not get 7042 its delay slot filled, the delay slot can still be nullified. */ 7043 if (! nullify && length == 4 && dbr_sequence_length () == 0) 7044 nullify = forward_branch_p (insn); 7045 7046 /* A forward branch over a single nullified insn can be done with a 7047 extrs instruction. This avoids a single cycle penalty due to 7048 mis-predicted branch if we fall through (branch not taken). */ 7049 useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE; 7050 7051 switch (length) 7052 { 7053 7054 /* All short conditional branches except backwards with an unfilled 7055 delay slot. */ 7056 case 4: 7057 if (useskip) 7058 strcpy (buf, "{vextrs,|extrw,s,}"); 7059 else 7060 strcpy (buf, "{bvb,|bb,}"); 7061 if (useskip && GET_MODE (operands[0]) == DImode) 7062 strcpy (buf, "extrd,s,*"); 7063 else if (GET_MODE (operands[0]) == DImode) 7064 strcpy (buf, "bb,*"); 7065 if ((which == 0 && negated) 7066 || (which == 1 && ! negated)) 7067 strcat (buf, ">="); 7068 else 7069 strcat (buf, "<"); 7070 if (useskip) 7071 strcat (buf, "{ %0,1,%%r0| %0,%%sar,1,%%r0}"); 7072 else if (nullify && negated) 7073 { 7074 if (branch_needs_nop_p (insn)) 7075 strcat (buf, "{,n %0,%3%#|,n %0,%%sar,%3%#}"); 7076 else 7077 strcat (buf, "{,n %0,%3|,n %0,%%sar,%3}"); 7078 } 7079 else if (nullify && ! negated) 7080 { 7081 if (branch_needs_nop_p (insn)) 7082 strcat (buf, "{,n %0,%2%#|,n %0,%%sar,%2%#}"); 7083 else 7084 strcat (buf, "{,n %0,%2|,n %0,%%sar,%2}"); 7085 } 7086 else if (! nullify && negated) 7087 strcat (buf, "{ %0,%3| %0,%%sar,%3}"); 7088 else if (! nullify && ! negated) 7089 strcat (buf, "{ %0,%2| %0,%%sar,%2}"); 7090 break; 7091 7092 /* All long conditionals. Note a short backward branch with an 7093 unfilled delay slot is treated just like a long backward branch 7094 with an unfilled delay slot. */ 7095 case 8: 7096 /* Handle weird backwards branch with a filled delay slot 7097 which is nullified. */ 7098 if (dbr_sequence_length () != 0 7099 && ! forward_branch_p (insn) 7100 && nullify) 7101 { 7102 strcpy (buf, "{bvb,|bb,}"); 7103 if (GET_MODE (operands[0]) == DImode) 7104 strcat (buf, "*"); 7105 if ((which == 0 && negated) 7106 || (which == 1 && ! negated)) 7107 strcat (buf, "<"); 7108 else 7109 strcat (buf, ">="); 7110 if (negated) 7111 strcat (buf, "{,n %0,.+12\n\tb %3|,n %0,%%sar,.+12\n\tb %3}"); 7112 else 7113 strcat (buf, "{,n %0,.+12\n\tb %2|,n %0,%%sar,.+12\n\tb %2}"); 7114 } 7115 /* Handle short backwards branch with an unfilled delay slot. 7116 Using a bb;nop rather than extrs;bl saves 1 cycle for both 7117 taken and untaken branches. */ 7118 else if (dbr_sequence_length () == 0 7119 && ! forward_branch_p (insn) 7120 && INSN_ADDRESSES_SET_P () 7121 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn))) 7122 - INSN_ADDRESSES (INSN_UID (insn)) - 8)) 7123 { 7124 strcpy (buf, "{bvb,|bb,}"); 7125 if (GET_MODE (operands[0]) == DImode) 7126 strcat (buf, "*"); 7127 if ((which == 0 && negated) 7128 || (which == 1 && ! negated)) 7129 strcat (buf, ">="); 7130 else 7131 strcat (buf, "<"); 7132 if (negated) 7133 strcat (buf, "{ %0,%3%#| %0,%%sar,%3%#}"); 7134 else 7135 strcat (buf, "{ %0,%2%#| %0,%%sar,%2%#}"); 7136 } 7137 else 7138 { 7139 strcpy (buf, "{vextrs,|extrw,s,}"); 7140 if (GET_MODE (operands[0]) == DImode) 7141 strcpy (buf, "extrd,s,*"); 7142 if ((which == 0 && negated) 7143 || (which == 1 && ! negated)) 7144 strcat (buf, "<"); 7145 else 7146 strcat (buf, ">="); 7147 if (nullify && negated) 7148 strcat (buf, "{ %0,1,%%r0\n\tb,n %3| %0,%%sar,1,%%r0\n\tb,n %3}"); 7149 else if (nullify && ! negated) 7150 strcat (buf, "{ %0,1,%%r0\n\tb,n %2| %0,%%sar,1,%%r0\n\tb,n %2}"); 7151 else if (negated) 7152 strcat (buf, "{ %0,1,%%r0\n\tb %3| %0,%%sar,1,%%r0\n\tb %3}"); 7153 else 7154 strcat (buf, "{ %0,1,%%r0\n\tb %2| %0,%%sar,1,%%r0\n\tb %2}"); 7155 } 7156 break; 7157 7158 default: 7159 /* The reversed conditional branch must branch over one additional 7160 instruction if the delay slot is filled and needs to be extracted 7161 by pa_output_lbranch. If the delay slot is empty or this is a 7162 nullified forward branch, the instruction after the reversed 7163 condition branch must be nullified. */ 7164 if (dbr_sequence_length () == 0 7165 || (nullify && forward_branch_p (insn))) 7166 { 7167 nullify = 1; 7168 xdelay = 0; 7169 operands[4] = GEN_INT (length); 7170 } 7171 else 7172 { 7173 xdelay = 1; 7174 operands[4] = GEN_INT (length + 4); 7175 } 7176 7177 if (GET_MODE (operands[0]) == DImode) 7178 strcpy (buf, "bb,*"); 7179 else 7180 strcpy (buf, "{bvb,|bb,}"); 7181 if ((which == 0 && negated) 7182 || (which == 1 && !negated)) 7183 strcat (buf, "<"); 7184 else 7185 strcat (buf, ">="); 7186 if (nullify) 7187 strcat (buf, ",n {%0,.+%4|%0,%%sar,.+%4}"); 7188 else 7189 strcat (buf, " {%0,.+%4|%0,%%sar,.+%4}"); 7190 output_asm_insn (buf, operands); 7191 return pa_output_lbranch (negated ? operands[3] : operands[2], 7192 insn, xdelay); 7193 } 7194 return buf; 7195} 7196 7197/* Return the output template for emitting a dbra type insn. 7198 7199 Note it may perform some output operations on its own before 7200 returning the final output string. */ 7201const char * 7202pa_output_dbra (rtx *operands, rtx_insn *insn, int which_alternative) 7203{ 7204 int length = get_attr_length (insn); 7205 7206 /* A conditional branch to the following instruction (e.g. the delay slot) is 7207 asking for a disaster. Be prepared! */ 7208 7209 if (branch_to_delay_slot_p (insn)) 7210 { 7211 if (which_alternative == 0) 7212 return "ldo %1(%0),%0"; 7213 else if (which_alternative == 1) 7214 { 7215 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)", operands); 7216 output_asm_insn ("ldw -16(%%r30),%4", operands); 7217 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands); 7218 return "{fldws|fldw} -16(%%r30),%0"; 7219 } 7220 else 7221 { 7222 output_asm_insn ("ldw %0,%4", operands); 7223 return "ldo %1(%4),%4\n\tstw %4,%0"; 7224 } 7225 } 7226 7227 if (which_alternative == 0) 7228 { 7229 int nullify = INSN_ANNULLED_BRANCH_P (insn); 7230 int xdelay; 7231 7232 /* If this is a long branch with its delay slot unfilled, set `nullify' 7233 as it can nullify the delay slot and save a nop. */ 7234 if (length == 8 && dbr_sequence_length () == 0) 7235 nullify = 1; 7236 7237 /* If this is a short forward conditional branch which did not get 7238 its delay slot filled, the delay slot can still be nullified. */ 7239 if (! nullify && length == 4 && dbr_sequence_length () == 0) 7240 nullify = forward_branch_p (insn); 7241 7242 switch (length) 7243 { 7244 case 4: 7245 if (nullify) 7246 { 7247 if (branch_needs_nop_p (insn)) 7248 return "addib,%C2,n %1,%0,%3%#"; 7249 else 7250 return "addib,%C2,n %1,%0,%3"; 7251 } 7252 else 7253 return "addib,%C2 %1,%0,%3"; 7254 7255 case 8: 7256 /* Handle weird backwards branch with a fulled delay slot 7257 which is nullified. */ 7258 if (dbr_sequence_length () != 0 7259 && ! forward_branch_p (insn) 7260 && nullify) 7261 return "addib,%N2,n %1,%0,.+12\n\tb %3"; 7262 /* Handle short backwards branch with an unfilled delay slot. 7263 Using a addb;nop rather than addi;bl saves 1 cycle for both 7264 taken and untaken branches. */ 7265 else if (dbr_sequence_length () == 0 7266 && ! forward_branch_p (insn) 7267 && INSN_ADDRESSES_SET_P () 7268 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn))) 7269 - INSN_ADDRESSES (INSN_UID (insn)) - 8)) 7270 return "addib,%C2 %1,%0,%3%#"; 7271 7272 /* Handle normal cases. */ 7273 if (nullify) 7274 return "addi,%N2 %1,%0,%0\n\tb,n %3"; 7275 else 7276 return "addi,%N2 %1,%0,%0\n\tb %3"; 7277 7278 default: 7279 /* The reversed conditional branch must branch over one additional 7280 instruction if the delay slot is filled and needs to be extracted 7281 by pa_output_lbranch. If the delay slot is empty or this is a 7282 nullified forward branch, the instruction after the reversed 7283 condition branch must be nullified. */ 7284 if (dbr_sequence_length () == 0 7285 || (nullify && forward_branch_p (insn))) 7286 { 7287 nullify = 1; 7288 xdelay = 0; 7289 operands[4] = GEN_INT (length); 7290 } 7291 else 7292 { 7293 xdelay = 1; 7294 operands[4] = GEN_INT (length + 4); 7295 } 7296 7297 if (nullify) 7298 output_asm_insn ("addib,%N2,n %1,%0,.+%4", operands); 7299 else 7300 output_asm_insn ("addib,%N2 %1,%0,.+%4", operands); 7301 7302 return pa_output_lbranch (operands[3], insn, xdelay); 7303 } 7304 7305 } 7306 /* Deal with gross reload from FP register case. */ 7307 else if (which_alternative == 1) 7308 { 7309 /* Move loop counter from FP register to MEM then into a GR, 7310 increment the GR, store the GR into MEM, and finally reload 7311 the FP register from MEM from within the branch's delay slot. */ 7312 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)\n\tldw -16(%%r30),%4", 7313 operands); 7314 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands); 7315 if (length == 24) 7316 return "{comb|cmpb},%S2 %%r0,%4,%3\n\t{fldws|fldw} -16(%%r30),%0"; 7317 else if (length == 28) 7318 return "{comclr|cmpclr},%B2 %%r0,%4,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0"; 7319 else 7320 { 7321 operands[5] = GEN_INT (length - 16); 7322 output_asm_insn ("{comb|cmpb},%B2 %%r0,%4,.+%5", operands); 7323 output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands); 7324 return pa_output_lbranch (operands[3], insn, 0); 7325 } 7326 } 7327 /* Deal with gross reload from memory case. */ 7328 else 7329 { 7330 /* Reload loop counter from memory, the store back to memory 7331 happens in the branch's delay slot. */ 7332 output_asm_insn ("ldw %0,%4", operands); 7333 if (length == 12) 7334 return "addib,%C2 %1,%4,%3\n\tstw %4,%0"; 7335 else if (length == 16) 7336 return "addi,%N2 %1,%4,%4\n\tb %3\n\tstw %4,%0"; 7337 else 7338 { 7339 operands[5] = GEN_INT (length - 4); 7340 output_asm_insn ("addib,%N2 %1,%4,.+%5\n\tstw %4,%0", operands); 7341 return pa_output_lbranch (operands[3], insn, 0); 7342 } 7343 } 7344} 7345 7346/* Return the output template for emitting a movb type insn. 7347 7348 Note it may perform some output operations on its own before 7349 returning the final output string. */ 7350const char * 7351pa_output_movb (rtx *operands, rtx_insn *insn, int which_alternative, 7352 int reverse_comparison) 7353{ 7354 int length = get_attr_length (insn); 7355 7356 /* A conditional branch to the following instruction (e.g. the delay slot) is 7357 asking for a disaster. Be prepared! */ 7358 7359 if (branch_to_delay_slot_p (insn)) 7360 { 7361 if (which_alternative == 0) 7362 return "copy %1,%0"; 7363 else if (which_alternative == 1) 7364 { 7365 output_asm_insn ("stw %1,-16(%%r30)", operands); 7366 return "{fldws|fldw} -16(%%r30),%0"; 7367 } 7368 else if (which_alternative == 2) 7369 return "stw %1,%0"; 7370 else 7371 return "mtsar %r1"; 7372 } 7373 7374 /* Support the second variant. */ 7375 if (reverse_comparison) 7376 PUT_CODE (operands[2], reverse_condition (GET_CODE (operands[2]))); 7377 7378 if (which_alternative == 0) 7379 { 7380 int nullify = INSN_ANNULLED_BRANCH_P (insn); 7381 int xdelay; 7382 7383 /* If this is a long branch with its delay slot unfilled, set `nullify' 7384 as it can nullify the delay slot and save a nop. */ 7385 if (length == 8 && dbr_sequence_length () == 0) 7386 nullify = 1; 7387 7388 /* If this is a short forward conditional branch which did not get 7389 its delay slot filled, the delay slot can still be nullified. */ 7390 if (! nullify && length == 4 && dbr_sequence_length () == 0) 7391 nullify = forward_branch_p (insn); 7392 7393 switch (length) 7394 { 7395 case 4: 7396 if (nullify) 7397 { 7398 if (branch_needs_nop_p (insn)) 7399 return "movb,%C2,n %1,%0,%3%#"; 7400 else 7401 return "movb,%C2,n %1,%0,%3"; 7402 } 7403 else 7404 return "movb,%C2 %1,%0,%3"; 7405 7406 case 8: 7407 /* Handle weird backwards branch with a filled delay slot 7408 which is nullified. */ 7409 if (dbr_sequence_length () != 0 7410 && ! forward_branch_p (insn) 7411 && nullify) 7412 return "movb,%N2,n %1,%0,.+12\n\tb %3"; 7413 7414 /* Handle short backwards branch with an unfilled delay slot. 7415 Using a movb;nop rather than or;bl saves 1 cycle for both 7416 taken and untaken branches. */ 7417 else if (dbr_sequence_length () == 0 7418 && ! forward_branch_p (insn) 7419 && INSN_ADDRESSES_SET_P () 7420 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn))) 7421 - INSN_ADDRESSES (INSN_UID (insn)) - 8)) 7422 return "movb,%C2 %1,%0,%3%#"; 7423 /* Handle normal cases. */ 7424 if (nullify) 7425 return "or,%N2 %1,%%r0,%0\n\tb,n %3"; 7426 else 7427 return "or,%N2 %1,%%r0,%0\n\tb %3"; 7428 7429 default: 7430 /* The reversed conditional branch must branch over one additional 7431 instruction if the delay slot is filled and needs to be extracted 7432 by pa_output_lbranch. If the delay slot is empty or this is a 7433 nullified forward branch, the instruction after the reversed 7434 condition branch must be nullified. */ 7435 if (dbr_sequence_length () == 0 7436 || (nullify && forward_branch_p (insn))) 7437 { 7438 nullify = 1; 7439 xdelay = 0; 7440 operands[4] = GEN_INT (length); 7441 } 7442 else 7443 { 7444 xdelay = 1; 7445 operands[4] = GEN_INT (length + 4); 7446 } 7447 7448 if (nullify) 7449 output_asm_insn ("movb,%N2,n %1,%0,.+%4", operands); 7450 else 7451 output_asm_insn ("movb,%N2 %1,%0,.+%4", operands); 7452 7453 return pa_output_lbranch (operands[3], insn, xdelay); 7454 } 7455 } 7456 /* Deal with gross reload for FP destination register case. */ 7457 else if (which_alternative == 1) 7458 { 7459 /* Move source register to MEM, perform the branch test, then 7460 finally load the FP register from MEM from within the branch's 7461 delay slot. */ 7462 output_asm_insn ("stw %1,-16(%%r30)", operands); 7463 if (length == 12) 7464 return "{comb|cmpb},%S2 %%r0,%1,%3\n\t{fldws|fldw} -16(%%r30),%0"; 7465 else if (length == 16) 7466 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0"; 7467 else 7468 { 7469 operands[4] = GEN_INT (length - 4); 7470 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4", operands); 7471 output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands); 7472 return pa_output_lbranch (operands[3], insn, 0); 7473 } 7474 } 7475 /* Deal with gross reload from memory case. */ 7476 else if (which_alternative == 2) 7477 { 7478 /* Reload loop counter from memory, the store back to memory 7479 happens in the branch's delay slot. */ 7480 if (length == 8) 7481 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tstw %1,%0"; 7482 else if (length == 12) 7483 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tstw %1,%0"; 7484 else 7485 { 7486 operands[4] = GEN_INT (length); 7487 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tstw %1,%0", 7488 operands); 7489 return pa_output_lbranch (operands[3], insn, 0); 7490 } 7491 } 7492 /* Handle SAR as a destination. */ 7493 else 7494 { 7495 if (length == 8) 7496 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tmtsar %r1"; 7497 else if (length == 12) 7498 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tmtsar %r1"; 7499 else 7500 { 7501 operands[4] = GEN_INT (length); 7502 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tmtsar %r1", 7503 operands); 7504 return pa_output_lbranch (operands[3], insn, 0); 7505 } 7506 } 7507} 7508 7509/* Copy any FP arguments in INSN into integer registers. */ 7510static void 7511copy_fp_args (rtx_insn *insn) 7512{ 7513 rtx link; 7514 rtx xoperands[2]; 7515 7516 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1)) 7517 { 7518 int arg_mode, regno; 7519 rtx use = XEXP (link, 0); 7520 7521 if (! (GET_CODE (use) == USE 7522 && GET_CODE (XEXP (use, 0)) == REG 7523 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0))))) 7524 continue; 7525 7526 arg_mode = GET_MODE (XEXP (use, 0)); 7527 regno = REGNO (XEXP (use, 0)); 7528 7529 /* Is it a floating point register? */ 7530 if (regno >= 32 && regno <= 39) 7531 { 7532 /* Copy the FP register into an integer register via memory. */ 7533 if (arg_mode == SFmode) 7534 { 7535 xoperands[0] = XEXP (use, 0); 7536 xoperands[1] = gen_rtx_REG (SImode, 26 - (regno - 32) / 2); 7537 output_asm_insn ("{fstws|fstw} %0,-16(%%sr0,%%r30)", xoperands); 7538 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands); 7539 } 7540 else 7541 { 7542 xoperands[0] = XEXP (use, 0); 7543 xoperands[1] = gen_rtx_REG (DImode, 25 - (regno - 34) / 2); 7544 output_asm_insn ("{fstds|fstd} %0,-16(%%sr0,%%r30)", xoperands); 7545 output_asm_insn ("ldw -12(%%sr0,%%r30),%R1", xoperands); 7546 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands); 7547 } 7548 } 7549 } 7550} 7551 7552/* Compute length of the FP argument copy sequence for INSN. */ 7553static int 7554length_fp_args (rtx_insn *insn) 7555{ 7556 int length = 0; 7557 rtx link; 7558 7559 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1)) 7560 { 7561 int arg_mode, regno; 7562 rtx use = XEXP (link, 0); 7563 7564 if (! (GET_CODE (use) == USE 7565 && GET_CODE (XEXP (use, 0)) == REG 7566 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0))))) 7567 continue; 7568 7569 arg_mode = GET_MODE (XEXP (use, 0)); 7570 regno = REGNO (XEXP (use, 0)); 7571 7572 /* Is it a floating point register? */ 7573 if (regno >= 32 && regno <= 39) 7574 { 7575 if (arg_mode == SFmode) 7576 length += 8; 7577 else 7578 length += 12; 7579 } 7580 } 7581 7582 return length; 7583} 7584 7585/* Return the attribute length for the millicode call instruction INSN. 7586 The length must match the code generated by pa_output_millicode_call. 7587 We include the delay slot in the returned length as it is better to 7588 over estimate the length than to under estimate it. */ 7589 7590int 7591pa_attr_length_millicode_call (rtx_insn *insn) 7592{ 7593 unsigned long distance = -1; 7594 unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes; 7595 7596 if (INSN_ADDRESSES_SET_P ()) 7597 { 7598 distance = (total + insn_current_reference_address (insn)); 7599 if (distance < total) 7600 distance = -1; 7601 } 7602 7603 if (TARGET_64BIT) 7604 { 7605 if (!TARGET_LONG_CALLS && distance < 7600000) 7606 return 8; 7607 7608 return 20; 7609 } 7610 else if (TARGET_PORTABLE_RUNTIME) 7611 return 24; 7612 else 7613 { 7614 if (!TARGET_LONG_CALLS && distance < MAX_PCREL17F_OFFSET) 7615 return 8; 7616 7617 if (!flag_pic) 7618 return 12; 7619 7620 return 24; 7621 } 7622} 7623 7624/* INSN is a function call. 7625 7626 CALL_DEST is the routine we are calling. */ 7627 7628const char * 7629pa_output_millicode_call (rtx_insn *insn, rtx call_dest) 7630{ 7631 int attr_length = get_attr_length (insn); 7632 int seq_length = dbr_sequence_length (); 7633 rtx xoperands[3]; 7634 7635 xoperands[0] = call_dest; 7636 xoperands[2] = gen_rtx_REG (Pmode, TARGET_64BIT ? 2 : 31); 7637 7638 /* Handle the common case where we are sure that the branch will 7639 reach the beginning of the $CODE$ subspace. The within reach 7640 form of the $$sh_func_adrs call has a length of 28. Because it 7641 has an attribute type of sh_func_adrs, it never has a nonzero 7642 sequence length (i.e., the delay slot is never filled). */ 7643 if (!TARGET_LONG_CALLS 7644 && (attr_length == 8 7645 || (attr_length == 28 7646 && get_attr_type (insn) == TYPE_SH_FUNC_ADRS))) 7647 { 7648 output_asm_insn ("{bl|b,l} %0,%2", xoperands); 7649 } 7650 else 7651 { 7652 if (TARGET_64BIT) 7653 { 7654 /* It might seem that one insn could be saved by accessing 7655 the millicode function using the linkage table. However, 7656 this doesn't work in shared libraries and other dynamically 7657 loaded objects. Using a pc-relative sequence also avoids 7658 problems related to the implicit use of the gp register. */ 7659 output_asm_insn ("b,l .+8,%%r1", xoperands); 7660 7661 if (TARGET_GAS) 7662 { 7663 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands); 7664 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands); 7665 } 7666 else 7667 { 7668 xoperands[1] = gen_label_rtx (); 7669 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands); 7670 targetm.asm_out.internal_label (asm_out_file, "L", 7671 CODE_LABEL_NUMBER (xoperands[1])); 7672 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands); 7673 } 7674 7675 output_asm_insn ("bve,l (%%r1),%%r2", xoperands); 7676 } 7677 else if (TARGET_PORTABLE_RUNTIME) 7678 { 7679 /* Pure portable runtime doesn't allow be/ble; we also don't 7680 have PIC support in the assembler/linker, so this sequence 7681 is needed. */ 7682 7683 /* Get the address of our target into %r1. */ 7684 output_asm_insn ("ldil L'%0,%%r1", xoperands); 7685 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands); 7686 7687 /* Get our return address into %r31. */ 7688 output_asm_insn ("{bl|b,l} .+8,%%r31", xoperands); 7689 output_asm_insn ("addi 8,%%r31,%%r31", xoperands); 7690 7691 /* Jump to our target address in %r1. */ 7692 output_asm_insn ("bv %%r0(%%r1)", xoperands); 7693 } 7694 else if (!flag_pic) 7695 { 7696 output_asm_insn ("ldil L'%0,%%r1", xoperands); 7697 if (TARGET_PA_20) 7698 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31", xoperands); 7699 else 7700 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands); 7701 } 7702 else 7703 { 7704 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands); 7705 output_asm_insn ("addi 16,%%r1,%%r31", xoperands); 7706 7707 if (TARGET_SOM || !TARGET_GAS) 7708 { 7709 /* The HP assembler can generate relocations for the 7710 difference of two symbols. GAS can do this for a 7711 millicode symbol but not an arbitrary external 7712 symbol when generating SOM output. */ 7713 xoperands[1] = gen_label_rtx (); 7714 targetm.asm_out.internal_label (asm_out_file, "L", 7715 CODE_LABEL_NUMBER (xoperands[1])); 7716 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands); 7717 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands); 7718 } 7719 else 7720 { 7721 output_asm_insn ("addil L'%0-$PIC_pcrel$0+8,%%r1", xoperands); 7722 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+12(%%r1),%%r1", 7723 xoperands); 7724 } 7725 7726 /* Jump to our target address in %r1. */ 7727 output_asm_insn ("bv %%r0(%%r1)", xoperands); 7728 } 7729 } 7730 7731 if (seq_length == 0) 7732 output_asm_insn ("nop", xoperands); 7733 7734 return ""; 7735} 7736 7737/* Return the attribute length of the call instruction INSN. The SIBCALL 7738 flag indicates whether INSN is a regular call or a sibling call. The 7739 length returned must be longer than the code actually generated by 7740 pa_output_call. Since branch shortening is done before delay branch 7741 sequencing, there is no way to determine whether or not the delay 7742 slot will be filled during branch shortening. Even when the delay 7743 slot is filled, we may have to add a nop if the delay slot contains 7744 a branch that can't reach its target. Thus, we always have to include 7745 the delay slot in the length estimate. This used to be done in 7746 pa_adjust_insn_length but we do it here now as some sequences always 7747 fill the delay slot and we can save four bytes in the estimate for 7748 these sequences. */ 7749 7750int 7751pa_attr_length_call (rtx_insn *insn, int sibcall) 7752{ 7753 int local_call; 7754 rtx call, call_dest; 7755 tree call_decl; 7756 int length = 0; 7757 rtx pat = PATTERN (insn); 7758 unsigned long distance = -1; 7759 7760 gcc_assert (CALL_P (insn)); 7761 7762 if (INSN_ADDRESSES_SET_P ()) 7763 { 7764 unsigned long total; 7765 7766 total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes; 7767 distance = (total + insn_current_reference_address (insn)); 7768 if (distance < total) 7769 distance = -1; 7770 } 7771 7772 gcc_assert (GET_CODE (pat) == PARALLEL); 7773 7774 /* Get the call rtx. */ 7775 call = XVECEXP (pat, 0, 0); 7776 if (GET_CODE (call) == SET) 7777 call = SET_SRC (call); 7778 7779 gcc_assert (GET_CODE (call) == CALL); 7780 7781 /* Determine if this is a local call. */ 7782 call_dest = XEXP (XEXP (call, 0), 0); 7783 call_decl = SYMBOL_REF_DECL (call_dest); 7784 local_call = call_decl && targetm.binds_local_p (call_decl); 7785 7786 /* pc-relative branch. */ 7787 if (!TARGET_LONG_CALLS 7788 && ((TARGET_PA_20 && !sibcall && distance < 7600000) 7789 || distance < MAX_PCREL17F_OFFSET)) 7790 length += 8; 7791 7792 /* 64-bit plabel sequence. */ 7793 else if (TARGET_64BIT && !local_call) 7794 length += sibcall ? 28 : 24; 7795 7796 /* non-pic long absolute branch sequence. */ 7797 else if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic) 7798 length += 12; 7799 7800 /* long pc-relative branch sequence. */ 7801 else if (TARGET_LONG_PIC_SDIFF_CALL 7802 || (TARGET_GAS && !TARGET_SOM 7803 && (TARGET_LONG_PIC_PCREL_CALL || local_call))) 7804 { 7805 length += 20; 7806 7807 if (!TARGET_PA_20 && !TARGET_NO_SPACE_REGS && (!local_call || flag_pic)) 7808 length += 8; 7809 } 7810 7811 /* 32-bit plabel sequence. */ 7812 else 7813 { 7814 length += 32; 7815 7816 if (TARGET_SOM) 7817 length += length_fp_args (insn); 7818 7819 if (flag_pic) 7820 length += 4; 7821 7822 if (!TARGET_PA_20) 7823 { 7824 if (!sibcall) 7825 length += 8; 7826 7827 if (!TARGET_NO_SPACE_REGS && (!local_call || flag_pic)) 7828 length += 8; 7829 } 7830 } 7831 7832 return length; 7833} 7834 7835/* INSN is a function call. 7836 7837 CALL_DEST is the routine we are calling. */ 7838 7839const char * 7840pa_output_call (rtx_insn *insn, rtx call_dest, int sibcall) 7841{ 7842 int seq_length = dbr_sequence_length (); 7843 tree call_decl = SYMBOL_REF_DECL (call_dest); 7844 int local_call = call_decl && targetm.binds_local_p (call_decl); 7845 rtx xoperands[2]; 7846 7847 xoperands[0] = call_dest; 7848 7849 /* Handle the common case where we're sure that the branch will reach 7850 the beginning of the "$CODE$" subspace. This is the beginning of 7851 the current function if we are in a named section. */ 7852 if (!TARGET_LONG_CALLS && pa_attr_length_call (insn, sibcall) == 8) 7853 { 7854 xoperands[1] = gen_rtx_REG (word_mode, sibcall ? 0 : 2); 7855 output_asm_insn ("{bl|b,l} %0,%1", xoperands); 7856 } 7857 else 7858 { 7859 if (TARGET_64BIT && !local_call) 7860 { 7861 /* ??? As far as I can tell, the HP linker doesn't support the 7862 long pc-relative sequence described in the 64-bit runtime 7863 architecture. So, we use a slightly longer indirect call. */ 7864 xoperands[0] = pa_get_deferred_plabel (call_dest); 7865 xoperands[1] = gen_label_rtx (); 7866 7867 /* If this isn't a sibcall, we put the load of %r27 into the 7868 delay slot. We can't do this in a sibcall as we don't 7869 have a second call-clobbered scratch register available. 7870 We don't need to do anything when generating fast indirect 7871 calls. */ 7872 if (seq_length != 0 && !sibcall) 7873 { 7874 final_scan_insn (NEXT_INSN (insn), asm_out_file, 7875 optimize, 0, NULL); 7876 7877 /* Now delete the delay insn. */ 7878 SET_INSN_DELETED (NEXT_INSN (insn)); 7879 seq_length = 0; 7880 } 7881 7882 output_asm_insn ("addil LT'%0,%%r27", xoperands); 7883 output_asm_insn ("ldd RT'%0(%%r1),%%r1", xoperands); 7884 output_asm_insn ("ldd 0(%%r1),%%r1", xoperands); 7885 7886 if (sibcall) 7887 { 7888 output_asm_insn ("ldd 24(%%r1),%%r27", xoperands); 7889 output_asm_insn ("ldd 16(%%r1),%%r1", xoperands); 7890 output_asm_insn ("bve (%%r1)", xoperands); 7891 } 7892 else 7893 { 7894 output_asm_insn ("ldd 16(%%r1),%%r2", xoperands); 7895 output_asm_insn ("bve,l (%%r2),%%r2", xoperands); 7896 output_asm_insn ("ldd 24(%%r1),%%r27", xoperands); 7897 seq_length = 1; 7898 } 7899 } 7900 else 7901 { 7902 int indirect_call = 0; 7903 7904 /* Emit a long call. There are several different sequences 7905 of increasing length and complexity. In most cases, 7906 they don't allow an instruction in the delay slot. */ 7907 if (!((TARGET_LONG_ABS_CALL || local_call) && !flag_pic) 7908 && !TARGET_LONG_PIC_SDIFF_CALL 7909 && !(TARGET_GAS && !TARGET_SOM 7910 && (TARGET_LONG_PIC_PCREL_CALL || local_call)) 7911 && !TARGET_64BIT) 7912 indirect_call = 1; 7913 7914 if (seq_length != 0 7915 && !sibcall 7916 && (!TARGET_PA_20 7917 || indirect_call 7918 || ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic))) 7919 { 7920 /* A non-jump insn in the delay slot. By definition we can 7921 emit this insn before the call (and in fact before argument 7922 relocating. */ 7923 final_scan_insn (NEXT_INSN (insn), asm_out_file, optimize, 0, 7924 NULL); 7925 7926 /* Now delete the delay insn. */ 7927 SET_INSN_DELETED (NEXT_INSN (insn)); 7928 seq_length = 0; 7929 } 7930 7931 if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic) 7932 { 7933 /* This is the best sequence for making long calls in 7934 non-pic code. Unfortunately, GNU ld doesn't provide 7935 the stub needed for external calls, and GAS's support 7936 for this with the SOM linker is buggy. It is safe 7937 to use this for local calls. */ 7938 output_asm_insn ("ldil L'%0,%%r1", xoperands); 7939 if (sibcall) 7940 output_asm_insn ("be R'%0(%%sr4,%%r1)", xoperands); 7941 else 7942 { 7943 if (TARGET_PA_20) 7944 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31", 7945 xoperands); 7946 else 7947 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands); 7948 7949 output_asm_insn ("copy %%r31,%%r2", xoperands); 7950 seq_length = 1; 7951 } 7952 } 7953 else 7954 { 7955 if (TARGET_LONG_PIC_SDIFF_CALL) 7956 { 7957 /* The HP assembler and linker can handle relocations 7958 for the difference of two symbols. The HP assembler 7959 recognizes the sequence as a pc-relative call and 7960 the linker provides stubs when needed. */ 7961 xoperands[1] = gen_label_rtx (); 7962 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands); 7963 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands); 7964 targetm.asm_out.internal_label (asm_out_file, "L", 7965 CODE_LABEL_NUMBER (xoperands[1])); 7966 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands); 7967 } 7968 else if (TARGET_GAS && !TARGET_SOM 7969 && (TARGET_LONG_PIC_PCREL_CALL || local_call)) 7970 { 7971 /* GAS currently can't generate the relocations that 7972 are needed for the SOM linker under HP-UX using this 7973 sequence. The GNU linker doesn't generate the stubs 7974 that are needed for external calls on TARGET_ELF32 7975 with this sequence. For now, we have to use a 7976 longer plabel sequence when using GAS. */ 7977 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands); 7978 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", 7979 xoperands); 7980 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1", 7981 xoperands); 7982 } 7983 else 7984 { 7985 /* Emit a long plabel-based call sequence. This is 7986 essentially an inline implementation of $$dyncall. 7987 We don't actually try to call $$dyncall as this is 7988 as difficult as calling the function itself. */ 7989 xoperands[0] = pa_get_deferred_plabel (call_dest); 7990 xoperands[1] = gen_label_rtx (); 7991 7992 /* Since the call is indirect, FP arguments in registers 7993 need to be copied to the general registers. Then, the 7994 argument relocation stub will copy them back. */ 7995 if (TARGET_SOM) 7996 copy_fp_args (insn); 7997 7998 if (flag_pic) 7999 { 8000 output_asm_insn ("addil LT'%0,%%r19", xoperands); 8001 output_asm_insn ("ldw RT'%0(%%r1),%%r1", xoperands); 8002 output_asm_insn ("ldw 0(%%r1),%%r1", xoperands); 8003 } 8004 else 8005 { 8006 output_asm_insn ("addil LR'%0-$global$,%%r27", 8007 xoperands); 8008 output_asm_insn ("ldw RR'%0-$global$(%%r1),%%r1", 8009 xoperands); 8010 } 8011 8012 output_asm_insn ("bb,>=,n %%r1,30,.+16", xoperands); 8013 output_asm_insn ("depi 0,31,2,%%r1", xoperands); 8014 output_asm_insn ("ldw 4(%%sr0,%%r1),%%r19", xoperands); 8015 output_asm_insn ("ldw 0(%%sr0,%%r1),%%r1", xoperands); 8016 8017 if (!sibcall && !TARGET_PA_20) 8018 { 8019 output_asm_insn ("{bl|b,l} .+8,%%r2", xoperands); 8020 if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic)) 8021 output_asm_insn ("addi 8,%%r2,%%r2", xoperands); 8022 else 8023 output_asm_insn ("addi 16,%%r2,%%r2", xoperands); 8024 } 8025 } 8026 8027 if (TARGET_PA_20) 8028 { 8029 if (sibcall) 8030 output_asm_insn ("bve (%%r1)", xoperands); 8031 else 8032 { 8033 if (indirect_call) 8034 { 8035 output_asm_insn ("bve,l (%%r1),%%r2", xoperands); 8036 output_asm_insn ("stw %%r2,-24(%%sp)", xoperands); 8037 seq_length = 1; 8038 } 8039 else 8040 output_asm_insn ("bve,l (%%r1),%%r2", xoperands); 8041 } 8042 } 8043 else 8044 { 8045 if (!TARGET_NO_SPACE_REGS && (!local_call || flag_pic)) 8046 output_asm_insn ("ldsid (%%r1),%%r31\n\tmtsp %%r31,%%sr0", 8047 xoperands); 8048 8049 if (sibcall) 8050 { 8051 if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic)) 8052 output_asm_insn ("be 0(%%sr4,%%r1)", xoperands); 8053 else 8054 output_asm_insn ("be 0(%%sr0,%%r1)", xoperands); 8055 } 8056 else 8057 { 8058 if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic)) 8059 output_asm_insn ("ble 0(%%sr4,%%r1)", xoperands); 8060 else 8061 output_asm_insn ("ble 0(%%sr0,%%r1)", xoperands); 8062 8063 if (indirect_call) 8064 output_asm_insn ("stw %%r31,-24(%%sp)", xoperands); 8065 else 8066 output_asm_insn ("copy %%r31,%%r2", xoperands); 8067 seq_length = 1; 8068 } 8069 } 8070 } 8071 } 8072 } 8073 8074 if (seq_length == 0) 8075 output_asm_insn ("nop", xoperands); 8076 8077 return ""; 8078} 8079 8080/* Return the attribute length of the indirect call instruction INSN. 8081 The length must match the code generated by output_indirect call. 8082 The returned length includes the delay slot. Currently, the delay 8083 slot of an indirect call sequence is not exposed and it is used by 8084 the sequence itself. */ 8085 8086int 8087pa_attr_length_indirect_call (rtx_insn *insn) 8088{ 8089 unsigned long distance = -1; 8090 unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes; 8091 8092 if (INSN_ADDRESSES_SET_P ()) 8093 { 8094 distance = (total + insn_current_reference_address (insn)); 8095 if (distance < total) 8096 distance = -1; 8097 } 8098 8099 if (TARGET_64BIT) 8100 return 12; 8101 8102 if (TARGET_FAST_INDIRECT_CALLS 8103 || (!TARGET_LONG_CALLS 8104 && !TARGET_PORTABLE_RUNTIME 8105 && ((TARGET_PA_20 && !TARGET_SOM && distance < 7600000) 8106 || distance < MAX_PCREL17F_OFFSET))) 8107 return 8; 8108 8109 if (flag_pic) 8110 return 20; 8111 8112 if (TARGET_PORTABLE_RUNTIME) 8113 return 16; 8114 8115 /* Out of reach, can use ble. */ 8116 return 12; 8117} 8118 8119const char * 8120pa_output_indirect_call (rtx_insn *insn, rtx call_dest) 8121{ 8122 rtx xoperands[1]; 8123 8124 if (TARGET_64BIT) 8125 { 8126 xoperands[0] = call_dest; 8127 output_asm_insn ("ldd 16(%0),%%r2", xoperands); 8128 output_asm_insn ("bve,l (%%r2),%%r2\n\tldd 24(%0),%%r27", xoperands); 8129 return ""; 8130 } 8131 8132 /* First the special case for kernels, level 0 systems, etc. */ 8133 if (TARGET_FAST_INDIRECT_CALLS) 8134 return "ble 0(%%sr4,%%r22)\n\tcopy %%r31,%%r2"; 8135 8136 /* Now the normal case -- we can reach $$dyncall directly or 8137 we're sure that we can get there via a long-branch stub. 8138 8139 No need to check target flags as the length uniquely identifies 8140 the remaining cases. */ 8141 if (pa_attr_length_indirect_call (insn) == 8) 8142 { 8143 /* The HP linker sometimes substitutes a BLE for BL/B,L calls to 8144 $$dyncall. Since BLE uses %r31 as the link register, the 22-bit 8145 variant of the B,L instruction can't be used on the SOM target. */ 8146 if (TARGET_PA_20 && !TARGET_SOM) 8147 return ".CALL\tARGW0=GR\n\tb,l $$dyncall,%%r2\n\tcopy %%r2,%%r31"; 8148 else 8149 return ".CALL\tARGW0=GR\n\tbl $$dyncall,%%r31\n\tcopy %%r31,%%r2"; 8150 } 8151 8152 /* Long millicode call, but we are not generating PIC or portable runtime 8153 code. */ 8154 if (pa_attr_length_indirect_call (insn) == 12) 8155 return ".CALL\tARGW0=GR\n\tldil L'$$dyncall,%%r2\n\tble R'$$dyncall(%%sr4,%%r2)\n\tcopy %%r31,%%r2"; 8156 8157 /* Long millicode call for portable runtime. */ 8158 if (pa_attr_length_indirect_call (insn) == 16) 8159 return "ldil L'$$dyncall,%%r31\n\tldo R'$$dyncall(%%r31),%%r31\n\tblr %%r0,%%r2\n\tbv,n %%r0(%%r31)"; 8160 8161 /* We need a long PIC call to $$dyncall. */ 8162 xoperands[0] = NULL_RTX; 8163 output_asm_insn ("{bl|b,l} .+8,%%r2", xoperands); 8164 if (TARGET_SOM || !TARGET_GAS) 8165 { 8166 xoperands[0] = gen_label_rtx (); 8167 output_asm_insn ("addil L'$$dyncall-%0,%%r2", xoperands); 8168 targetm.asm_out.internal_label (asm_out_file, "L", 8169 CODE_LABEL_NUMBER (xoperands[0])); 8170 output_asm_insn ("ldo R'$$dyncall-%0(%%r1),%%r1", xoperands); 8171 } 8172 else 8173 { 8174 output_asm_insn ("addil L'$$dyncall-$PIC_pcrel$0+4,%%r2", xoperands); 8175 output_asm_insn ("ldo R'$$dyncall-$PIC_pcrel$0+8(%%r1),%%r1", 8176 xoperands); 8177 } 8178 output_asm_insn ("bv %%r0(%%r1)", xoperands); 8179 output_asm_insn ("ldo 12(%%r2),%%r2", xoperands); 8180 return ""; 8181} 8182 8183/* In HPUX 8.0's shared library scheme, special relocations are needed 8184 for function labels if they might be passed to a function 8185 in a shared library (because shared libraries don't live in code 8186 space), and special magic is needed to construct their address. */ 8187 8188void 8189pa_encode_label (rtx sym) 8190{ 8191 const char *str = XSTR (sym, 0); 8192 int len = strlen (str) + 1; 8193 char *newstr, *p; 8194 8195 p = newstr = XALLOCAVEC (char, len + 1); 8196 *p++ = '@'; 8197 strcpy (p, str); 8198 8199 XSTR (sym, 0) = ggc_alloc_string (newstr, len); 8200} 8201 8202static void 8203pa_encode_section_info (tree decl, rtx rtl, int first) 8204{ 8205 int old_referenced = 0; 8206 8207 if (!first && MEM_P (rtl) && GET_CODE (XEXP (rtl, 0)) == SYMBOL_REF) 8208 old_referenced 8209 = SYMBOL_REF_FLAGS (XEXP (rtl, 0)) & SYMBOL_FLAG_REFERENCED; 8210 8211 default_encode_section_info (decl, rtl, first); 8212 8213 if (first && TEXT_SPACE_P (decl)) 8214 { 8215 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1; 8216 if (TREE_CODE (decl) == FUNCTION_DECL) 8217 pa_encode_label (XEXP (rtl, 0)); 8218 } 8219 else if (old_referenced) 8220 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= old_referenced; 8221} 8222 8223/* This is sort of inverse to pa_encode_section_info. */ 8224 8225static const char * 8226pa_strip_name_encoding (const char *str) 8227{ 8228 str += (*str == '@'); 8229 str += (*str == '*'); 8230 return str; 8231} 8232 8233/* Returns 1 if OP is a function label involved in a simple addition 8234 with a constant. Used to keep certain patterns from matching 8235 during instruction combination. */ 8236int 8237pa_is_function_label_plus_const (rtx op) 8238{ 8239 /* Strip off any CONST. */ 8240 if (GET_CODE (op) == CONST) 8241 op = XEXP (op, 0); 8242 8243 return (GET_CODE (op) == PLUS 8244 && function_label_operand (XEXP (op, 0), VOIDmode) 8245 && GET_CODE (XEXP (op, 1)) == CONST_INT); 8246} 8247 8248/* Output assembly code for a thunk to FUNCTION. */ 8249 8250static void 8251pa_asm_output_mi_thunk (FILE *file, tree thunk_fndecl, HOST_WIDE_INT delta, 8252 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED, 8253 tree function) 8254{ 8255 static unsigned int current_thunk_number; 8256 int val_14 = VAL_14_BITS_P (delta); 8257 unsigned int old_last_address = last_address, nbytes = 0; 8258 char label[16]; 8259 rtx xoperands[4]; 8260 8261 xoperands[0] = XEXP (DECL_RTL (function), 0); 8262 xoperands[1] = XEXP (DECL_RTL (thunk_fndecl), 0); 8263 xoperands[2] = GEN_INT (delta); 8264 8265 final_start_function (emit_barrier (), file, 1); 8266 8267 /* Output the thunk. We know that the function is in the same 8268 translation unit (i.e., the same space) as the thunk, and that 8269 thunks are output after their method. Thus, we don't need an 8270 external branch to reach the function. With SOM and GAS, 8271 functions and thunks are effectively in different sections. 8272 Thus, we can always use a IA-relative branch and the linker 8273 will add a long branch stub if necessary. 8274 8275 However, we have to be careful when generating PIC code on the 8276 SOM port to ensure that the sequence does not transfer to an 8277 import stub for the target function as this could clobber the 8278 return value saved at SP-24. This would also apply to the 8279 32-bit linux port if the multi-space model is implemented. */ 8280 if ((!TARGET_LONG_CALLS && TARGET_SOM && !TARGET_PORTABLE_RUNTIME 8281 && !(flag_pic && TREE_PUBLIC (function)) 8282 && (TARGET_GAS || last_address < 262132)) 8283 || (!TARGET_LONG_CALLS && !TARGET_SOM && !TARGET_PORTABLE_RUNTIME 8284 && ((targetm_common.have_named_sections 8285 && DECL_SECTION_NAME (thunk_fndecl) != NULL 8286 /* The GNU 64-bit linker has rather poor stub management. 8287 So, we use a long branch from thunks that aren't in 8288 the same section as the target function. */ 8289 && ((!TARGET_64BIT 8290 && (DECL_SECTION_NAME (thunk_fndecl) 8291 != DECL_SECTION_NAME (function))) 8292 || ((DECL_SECTION_NAME (thunk_fndecl) 8293 == DECL_SECTION_NAME (function)) 8294 && last_address < 262132))) 8295 /* In this case, we need to be able to reach the start of 8296 the stub table even though the function is likely closer 8297 and can be jumped to directly. */ 8298 || (targetm_common.have_named_sections 8299 && DECL_SECTION_NAME (thunk_fndecl) == NULL 8300 && DECL_SECTION_NAME (function) == NULL 8301 && total_code_bytes < MAX_PCREL17F_OFFSET) 8302 /* Likewise. */ 8303 || (!targetm_common.have_named_sections 8304 && total_code_bytes < MAX_PCREL17F_OFFSET)))) 8305 { 8306 if (!val_14) 8307 output_asm_insn ("addil L'%2,%%r26", xoperands); 8308 8309 output_asm_insn ("b %0", xoperands); 8310 8311 if (val_14) 8312 { 8313 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands); 8314 nbytes += 8; 8315 } 8316 else 8317 { 8318 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands); 8319 nbytes += 12; 8320 } 8321 } 8322 else if (TARGET_64BIT) 8323 { 8324 /* We only have one call-clobbered scratch register, so we can't 8325 make use of the delay slot if delta doesn't fit in 14 bits. */ 8326 if (!val_14) 8327 { 8328 output_asm_insn ("addil L'%2,%%r26", xoperands); 8329 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands); 8330 } 8331 8332 output_asm_insn ("b,l .+8,%%r1", xoperands); 8333 8334 if (TARGET_GAS) 8335 { 8336 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands); 8337 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands); 8338 } 8339 else 8340 { 8341 xoperands[3] = GEN_INT (val_14 ? 8 : 16); 8342 output_asm_insn ("addil L'%0-%1-%3,%%r1", xoperands); 8343 } 8344 8345 if (val_14) 8346 { 8347 output_asm_insn ("bv %%r0(%%r1)", xoperands); 8348 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands); 8349 nbytes += 20; 8350 } 8351 else 8352 { 8353 output_asm_insn ("bv,n %%r0(%%r1)", xoperands); 8354 nbytes += 24; 8355 } 8356 } 8357 else if (TARGET_PORTABLE_RUNTIME) 8358 { 8359 output_asm_insn ("ldil L'%0,%%r1", xoperands); 8360 output_asm_insn ("ldo R'%0(%%r1),%%r22", xoperands); 8361 8362 if (!val_14) 8363 output_asm_insn ("addil L'%2,%%r26", xoperands); 8364 8365 output_asm_insn ("bv %%r0(%%r22)", xoperands); 8366 8367 if (val_14) 8368 { 8369 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands); 8370 nbytes += 16; 8371 } 8372 else 8373 { 8374 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands); 8375 nbytes += 20; 8376 } 8377 } 8378 else if (TARGET_SOM && flag_pic && TREE_PUBLIC (function)) 8379 { 8380 /* The function is accessible from outside this module. The only 8381 way to avoid an import stub between the thunk and function is to 8382 call the function directly with an indirect sequence similar to 8383 that used by $$dyncall. This is possible because $$dyncall acts 8384 as the import stub in an indirect call. */ 8385 ASM_GENERATE_INTERNAL_LABEL (label, "LTHN", current_thunk_number); 8386 xoperands[3] = gen_rtx_SYMBOL_REF (Pmode, label); 8387 output_asm_insn ("addil LT'%3,%%r19", xoperands); 8388 output_asm_insn ("ldw RT'%3(%%r1),%%r22", xoperands); 8389 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands); 8390 output_asm_insn ("bb,>=,n %%r22,30,.+16", xoperands); 8391 output_asm_insn ("depi 0,31,2,%%r22", xoperands); 8392 output_asm_insn ("ldw 4(%%sr0,%%r22),%%r19", xoperands); 8393 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands); 8394 8395 if (!val_14) 8396 { 8397 output_asm_insn ("addil L'%2,%%r26", xoperands); 8398 nbytes += 4; 8399 } 8400 8401 if (TARGET_PA_20) 8402 { 8403 output_asm_insn ("bve (%%r22)", xoperands); 8404 nbytes += 36; 8405 } 8406 else if (TARGET_NO_SPACE_REGS) 8407 { 8408 output_asm_insn ("be 0(%%sr4,%%r22)", xoperands); 8409 nbytes += 36; 8410 } 8411 else 8412 { 8413 output_asm_insn ("ldsid (%%sr0,%%r22),%%r21", xoperands); 8414 output_asm_insn ("mtsp %%r21,%%sr0", xoperands); 8415 output_asm_insn ("be 0(%%sr0,%%r22)", xoperands); 8416 nbytes += 44; 8417 } 8418 8419 if (val_14) 8420 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands); 8421 else 8422 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands); 8423 } 8424 else if (flag_pic) 8425 { 8426 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands); 8427 8428 if (TARGET_SOM || !TARGET_GAS) 8429 { 8430 output_asm_insn ("addil L'%0-%1-8,%%r1", xoperands); 8431 output_asm_insn ("ldo R'%0-%1-8(%%r1),%%r22", xoperands); 8432 } 8433 else 8434 { 8435 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands); 8436 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r22", xoperands); 8437 } 8438 8439 if (!val_14) 8440 output_asm_insn ("addil L'%2,%%r26", xoperands); 8441 8442 output_asm_insn ("bv %%r0(%%r22)", xoperands); 8443 8444 if (val_14) 8445 { 8446 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands); 8447 nbytes += 20; 8448 } 8449 else 8450 { 8451 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands); 8452 nbytes += 24; 8453 } 8454 } 8455 else 8456 { 8457 if (!val_14) 8458 output_asm_insn ("addil L'%2,%%r26", xoperands); 8459 8460 output_asm_insn ("ldil L'%0,%%r22", xoperands); 8461 output_asm_insn ("be R'%0(%%sr4,%%r22)", xoperands); 8462 8463 if (val_14) 8464 { 8465 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands); 8466 nbytes += 12; 8467 } 8468 else 8469 { 8470 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands); 8471 nbytes += 16; 8472 } 8473 } 8474 8475 final_end_function (); 8476 8477 if (TARGET_SOM && flag_pic && TREE_PUBLIC (function)) 8478 { 8479 switch_to_section (data_section); 8480 output_asm_insn (".align 4", xoperands); 8481 ASM_OUTPUT_LABEL (file, label); 8482 output_asm_insn (".word P'%0", xoperands); 8483 } 8484 8485 current_thunk_number++; 8486 nbytes = ((nbytes + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1) 8487 & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)); 8488 last_address += nbytes; 8489 if (old_last_address > last_address) 8490 last_address = UINT_MAX; 8491 update_total_code_bytes (nbytes); 8492} 8493 8494/* Only direct calls to static functions are allowed to be sibling (tail) 8495 call optimized. 8496 8497 This restriction is necessary because some linker generated stubs will 8498 store return pointers into rp' in some cases which might clobber a 8499 live value already in rp'. 8500 8501 In a sibcall the current function and the target function share stack 8502 space. Thus if the path to the current function and the path to the 8503 target function save a value in rp', they save the value into the 8504 same stack slot, which has undesirable consequences. 8505 8506 Because of the deferred binding nature of shared libraries any function 8507 with external scope could be in a different load module and thus require 8508 rp' to be saved when calling that function. So sibcall optimizations 8509 can only be safe for static function. 8510 8511 Note that GCC never needs return value relocations, so we don't have to 8512 worry about static calls with return value relocations (which require 8513 saving rp'). 8514 8515 It is safe to perform a sibcall optimization when the target function 8516 will never return. */ 8517static bool 8518pa_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED) 8519{ 8520 if (TARGET_PORTABLE_RUNTIME) 8521 return false; 8522 8523 /* Sibcalls are not ok because the arg pointer register is not a fixed 8524 register. This prevents the sibcall optimization from occurring. In 8525 addition, there are problems with stub placement using GNU ld. This 8526 is because a normal sibcall branch uses a 17-bit relocation while 8527 a regular call branch uses a 22-bit relocation. As a result, more 8528 care needs to be taken in the placement of long-branch stubs. */ 8529 if (TARGET_64BIT) 8530 return false; 8531 8532 /* Sibcalls are only ok within a translation unit. */ 8533 return (decl && !TREE_PUBLIC (decl)); 8534} 8535 8536/* ??? Addition is not commutative on the PA due to the weird implicit 8537 space register selection rules for memory addresses. Therefore, we 8538 don't consider a + b == b + a, as this might be inside a MEM. */ 8539static bool 8540pa_commutative_p (const_rtx x, int outer_code) 8541{ 8542 return (COMMUTATIVE_P (x) 8543 && (TARGET_NO_SPACE_REGS 8544 || (outer_code != UNKNOWN && outer_code != MEM) 8545 || GET_CODE (x) != PLUS)); 8546} 8547 8548/* Returns 1 if the 6 operands specified in OPERANDS are suitable for 8549 use in fmpyadd instructions. */ 8550int 8551pa_fmpyaddoperands (rtx *operands) 8552{ 8553 machine_mode mode = GET_MODE (operands[0]); 8554 8555 /* Must be a floating point mode. */ 8556 if (mode != SFmode && mode != DFmode) 8557 return 0; 8558 8559 /* All modes must be the same. */ 8560 if (! (mode == GET_MODE (operands[1]) 8561 && mode == GET_MODE (operands[2]) 8562 && mode == GET_MODE (operands[3]) 8563 && mode == GET_MODE (operands[4]) 8564 && mode == GET_MODE (operands[5]))) 8565 return 0; 8566 8567 /* All operands must be registers. */ 8568 if (! (GET_CODE (operands[1]) == REG 8569 && GET_CODE (operands[2]) == REG 8570 && GET_CODE (operands[3]) == REG 8571 && GET_CODE (operands[4]) == REG 8572 && GET_CODE (operands[5]) == REG)) 8573 return 0; 8574 8575 /* Only 2 real operands to the addition. One of the input operands must 8576 be the same as the output operand. */ 8577 if (! rtx_equal_p (operands[3], operands[4]) 8578 && ! rtx_equal_p (operands[3], operands[5])) 8579 return 0; 8580 8581 /* Inout operand of add cannot conflict with any operands from multiply. */ 8582 if (rtx_equal_p (operands[3], operands[0]) 8583 || rtx_equal_p (operands[3], operands[1]) 8584 || rtx_equal_p (operands[3], operands[2])) 8585 return 0; 8586 8587 /* multiply cannot feed into addition operands. */ 8588 if (rtx_equal_p (operands[4], operands[0]) 8589 || rtx_equal_p (operands[5], operands[0])) 8590 return 0; 8591 8592 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */ 8593 if (mode == SFmode 8594 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS 8595 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS 8596 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS 8597 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS 8598 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS 8599 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS)) 8600 return 0; 8601 8602 /* Passed. Operands are suitable for fmpyadd. */ 8603 return 1; 8604} 8605 8606#if !defined(USE_COLLECT2) 8607static void 8608pa_asm_out_constructor (rtx symbol, int priority) 8609{ 8610 if (!function_label_operand (symbol, VOIDmode)) 8611 pa_encode_label (symbol); 8612 8613#ifdef CTORS_SECTION_ASM_OP 8614 default_ctor_section_asm_out_constructor (symbol, priority); 8615#else 8616# ifdef TARGET_ASM_NAMED_SECTION 8617 default_named_section_asm_out_constructor (symbol, priority); 8618# else 8619 default_stabs_asm_out_constructor (symbol, priority); 8620# endif 8621#endif 8622} 8623 8624static void 8625pa_asm_out_destructor (rtx symbol, int priority) 8626{ 8627 if (!function_label_operand (symbol, VOIDmode)) 8628 pa_encode_label (symbol); 8629 8630#ifdef DTORS_SECTION_ASM_OP 8631 default_dtor_section_asm_out_destructor (symbol, priority); 8632#else 8633# ifdef TARGET_ASM_NAMED_SECTION 8634 default_named_section_asm_out_destructor (symbol, priority); 8635# else 8636 default_stabs_asm_out_destructor (symbol, priority); 8637# endif 8638#endif 8639} 8640#endif 8641 8642/* This function places uninitialized global data in the bss section. 8643 The ASM_OUTPUT_ALIGNED_BSS macro needs to be defined to call this 8644 function on the SOM port to prevent uninitialized global data from 8645 being placed in the data section. */ 8646 8647void 8648pa_asm_output_aligned_bss (FILE *stream, 8649 const char *name, 8650 unsigned HOST_WIDE_INT size, 8651 unsigned int align) 8652{ 8653 switch_to_section (bss_section); 8654 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT); 8655 8656#ifdef ASM_OUTPUT_TYPE_DIRECTIVE 8657 ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "object"); 8658#endif 8659 8660#ifdef ASM_OUTPUT_SIZE_DIRECTIVE 8661 ASM_OUTPUT_SIZE_DIRECTIVE (stream, name, size); 8662#endif 8663 8664 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT); 8665 ASM_OUTPUT_LABEL (stream, name); 8666 fprintf (stream, "\t.block "HOST_WIDE_INT_PRINT_UNSIGNED"\n", size); 8667} 8668 8669/* Both the HP and GNU assemblers under HP-UX provide a .comm directive 8670 that doesn't allow the alignment of global common storage to be directly 8671 specified. The SOM linker aligns common storage based on the rounded 8672 value of the NUM_BYTES parameter in the .comm directive. It's not 8673 possible to use the .align directive as it doesn't affect the alignment 8674 of the label associated with a .comm directive. */ 8675 8676void 8677pa_asm_output_aligned_common (FILE *stream, 8678 const char *name, 8679 unsigned HOST_WIDE_INT size, 8680 unsigned int align) 8681{ 8682 unsigned int max_common_align; 8683 8684 max_common_align = TARGET_64BIT ? 128 : (size >= 4096 ? 256 : 64); 8685 if (align > max_common_align) 8686 { 8687 warning (0, "alignment (%u) for %s exceeds maximum alignment " 8688 "for global common data. Using %u", 8689 align / BITS_PER_UNIT, name, max_common_align / BITS_PER_UNIT); 8690 align = max_common_align; 8691 } 8692 8693 switch_to_section (bss_section); 8694 8695 assemble_name (stream, name); 8696 fprintf (stream, "\t.comm "HOST_WIDE_INT_PRINT_UNSIGNED"\n", 8697 MAX (size, align / BITS_PER_UNIT)); 8698} 8699 8700/* We can't use .comm for local common storage as the SOM linker effectively 8701 treats the symbol as universal and uses the same storage for local symbols 8702 with the same name in different object files. The .block directive 8703 reserves an uninitialized block of storage. However, it's not common 8704 storage. Fortunately, GCC never requests common storage with the same 8705 name in any given translation unit. */ 8706 8707void 8708pa_asm_output_aligned_local (FILE *stream, 8709 const char *name, 8710 unsigned HOST_WIDE_INT size, 8711 unsigned int align) 8712{ 8713 switch_to_section (bss_section); 8714 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT); 8715 8716#ifdef LOCAL_ASM_OP 8717 fprintf (stream, "%s", LOCAL_ASM_OP); 8718 assemble_name (stream, name); 8719 fprintf (stream, "\n"); 8720#endif 8721 8722 ASM_OUTPUT_LABEL (stream, name); 8723 fprintf (stream, "\t.block "HOST_WIDE_INT_PRINT_UNSIGNED"\n", size); 8724} 8725 8726/* Returns 1 if the 6 operands specified in OPERANDS are suitable for 8727 use in fmpysub instructions. */ 8728int 8729pa_fmpysuboperands (rtx *operands) 8730{ 8731 machine_mode mode = GET_MODE (operands[0]); 8732 8733 /* Must be a floating point mode. */ 8734 if (mode != SFmode && mode != DFmode) 8735 return 0; 8736 8737 /* All modes must be the same. */ 8738 if (! (mode == GET_MODE (operands[1]) 8739 && mode == GET_MODE (operands[2]) 8740 && mode == GET_MODE (operands[3]) 8741 && mode == GET_MODE (operands[4]) 8742 && mode == GET_MODE (operands[5]))) 8743 return 0; 8744 8745 /* All operands must be registers. */ 8746 if (! (GET_CODE (operands[1]) == REG 8747 && GET_CODE (operands[2]) == REG 8748 && GET_CODE (operands[3]) == REG 8749 && GET_CODE (operands[4]) == REG 8750 && GET_CODE (operands[5]) == REG)) 8751 return 0; 8752 8753 /* Only 2 real operands to the subtraction. Subtraction is not a commutative 8754 operation, so operands[4] must be the same as operand[3]. */ 8755 if (! rtx_equal_p (operands[3], operands[4])) 8756 return 0; 8757 8758 /* multiply cannot feed into subtraction. */ 8759 if (rtx_equal_p (operands[5], operands[0])) 8760 return 0; 8761 8762 /* Inout operand of sub cannot conflict with any operands from multiply. */ 8763 if (rtx_equal_p (operands[3], operands[0]) 8764 || rtx_equal_p (operands[3], operands[1]) 8765 || rtx_equal_p (operands[3], operands[2])) 8766 return 0; 8767 8768 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */ 8769 if (mode == SFmode 8770 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS 8771 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS 8772 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS 8773 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS 8774 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS 8775 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS)) 8776 return 0; 8777 8778 /* Passed. Operands are suitable for fmpysub. */ 8779 return 1; 8780} 8781 8782/* Return 1 if the given constant is 2, 4, or 8. These are the valid 8783 constants for shadd instructions. */ 8784int 8785pa_shadd_constant_p (int val) 8786{ 8787 if (val == 2 || val == 4 || val == 8) 8788 return 1; 8789 else 8790 return 0; 8791} 8792 8793/* Return TRUE if INSN branches forward. */ 8794 8795static bool 8796forward_branch_p (rtx_insn *insn) 8797{ 8798 rtx lab = JUMP_LABEL (insn); 8799 8800 /* The INSN must have a jump label. */ 8801 gcc_assert (lab != NULL_RTX); 8802 8803 if (INSN_ADDRESSES_SET_P ()) 8804 return INSN_ADDRESSES (INSN_UID (lab)) > INSN_ADDRESSES (INSN_UID (insn)); 8805 8806 while (insn) 8807 { 8808 if (insn == lab) 8809 return true; 8810 else 8811 insn = NEXT_INSN (insn); 8812 } 8813 8814 return false; 8815} 8816 8817/* Output an unconditional move and branch insn. */ 8818 8819const char * 8820pa_output_parallel_movb (rtx *operands, rtx_insn *insn) 8821{ 8822 int length = get_attr_length (insn); 8823 8824 /* These are the cases in which we win. */ 8825 if (length == 4) 8826 return "mov%I1b,tr %1,%0,%2"; 8827 8828 /* None of the following cases win, but they don't lose either. */ 8829 if (length == 8) 8830 { 8831 if (dbr_sequence_length () == 0) 8832 { 8833 /* Nothing in the delay slot, fake it by putting the combined 8834 insn (the copy or add) in the delay slot of a bl. */ 8835 if (GET_CODE (operands[1]) == CONST_INT) 8836 return "b %2\n\tldi %1,%0"; 8837 else 8838 return "b %2\n\tcopy %1,%0"; 8839 } 8840 else 8841 { 8842 /* Something in the delay slot, but we've got a long branch. */ 8843 if (GET_CODE (operands[1]) == CONST_INT) 8844 return "ldi %1,%0\n\tb %2"; 8845 else 8846 return "copy %1,%0\n\tb %2"; 8847 } 8848 } 8849 8850 if (GET_CODE (operands[1]) == CONST_INT) 8851 output_asm_insn ("ldi %1,%0", operands); 8852 else 8853 output_asm_insn ("copy %1,%0", operands); 8854 return pa_output_lbranch (operands[2], insn, 1); 8855} 8856 8857/* Output an unconditional add and branch insn. */ 8858 8859const char * 8860pa_output_parallel_addb (rtx *operands, rtx_insn *insn) 8861{ 8862 int length = get_attr_length (insn); 8863 8864 /* To make life easy we want operand0 to be the shared input/output 8865 operand and operand1 to be the readonly operand. */ 8866 if (operands[0] == operands[1]) 8867 operands[1] = operands[2]; 8868 8869 /* These are the cases in which we win. */ 8870 if (length == 4) 8871 return "add%I1b,tr %1,%0,%3"; 8872 8873 /* None of the following cases win, but they don't lose either. */ 8874 if (length == 8) 8875 { 8876 if (dbr_sequence_length () == 0) 8877 /* Nothing in the delay slot, fake it by putting the combined 8878 insn (the copy or add) in the delay slot of a bl. */ 8879 return "b %3\n\tadd%I1 %1,%0,%0"; 8880 else 8881 /* Something in the delay slot, but we've got a long branch. */ 8882 return "add%I1 %1,%0,%0\n\tb %3"; 8883 } 8884 8885 output_asm_insn ("add%I1 %1,%0,%0", operands); 8886 return pa_output_lbranch (operands[3], insn, 1); 8887} 8888 8889/* We use this hook to perform a PA specific optimization which is difficult 8890 to do in earlier passes. */ 8891 8892static void 8893pa_reorg (void) 8894{ 8895 remove_useless_addtr_insns (1); 8896 8897 if (pa_cpu < PROCESSOR_8000) 8898 pa_combine_instructions (); 8899} 8900 8901/* The PA has a number of odd instructions which can perform multiple 8902 tasks at once. On first generation PA machines (PA1.0 and PA1.1) 8903 it may be profitable to combine two instructions into one instruction 8904 with two outputs. It's not profitable PA2.0 machines because the 8905 two outputs would take two slots in the reorder buffers. 8906 8907 This routine finds instructions which can be combined and combines 8908 them. We only support some of the potential combinations, and we 8909 only try common ways to find suitable instructions. 8910 8911 * addb can add two registers or a register and a small integer 8912 and jump to a nearby (+-8k) location. Normally the jump to the 8913 nearby location is conditional on the result of the add, but by 8914 using the "true" condition we can make the jump unconditional. 8915 Thus addb can perform two independent operations in one insn. 8916 8917 * movb is similar to addb in that it can perform a reg->reg 8918 or small immediate->reg copy and jump to a nearby (+-8k location). 8919 8920 * fmpyadd and fmpysub can perform a FP multiply and either an 8921 FP add or FP sub if the operands of the multiply and add/sub are 8922 independent (there are other minor restrictions). Note both 8923 the fmpy and fadd/fsub can in theory move to better spots according 8924 to data dependencies, but for now we require the fmpy stay at a 8925 fixed location. 8926 8927 * Many of the memory operations can perform pre & post updates 8928 of index registers. GCC's pre/post increment/decrement addressing 8929 is far too simple to take advantage of all the possibilities. This 8930 pass may not be suitable since those insns may not be independent. 8931 8932 * comclr can compare two ints or an int and a register, nullify 8933 the following instruction and zero some other register. This 8934 is more difficult to use as it's harder to find an insn which 8935 will generate a comclr than finding something like an unconditional 8936 branch. (conditional moves & long branches create comclr insns). 8937 8938 * Most arithmetic operations can conditionally skip the next 8939 instruction. They can be viewed as "perform this operation 8940 and conditionally jump to this nearby location" (where nearby 8941 is an insns away). These are difficult to use due to the 8942 branch length restrictions. */ 8943 8944static void 8945pa_combine_instructions (void) 8946{ 8947 rtx_insn *anchor; 8948 8949 /* This can get expensive since the basic algorithm is on the 8950 order of O(n^2) (or worse). Only do it for -O2 or higher 8951 levels of optimization. */ 8952 if (optimize < 2) 8953 return; 8954 8955 /* Walk down the list of insns looking for "anchor" insns which 8956 may be combined with "floating" insns. As the name implies, 8957 "anchor" instructions don't move, while "floating" insns may 8958 move around. */ 8959 rtx par = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, NULL_RTX, NULL_RTX)); 8960 rtx_insn *new_rtx = make_insn_raw (par); 8961 8962 for (anchor = get_insns (); anchor; anchor = NEXT_INSN (anchor)) 8963 { 8964 enum attr_pa_combine_type anchor_attr; 8965 enum attr_pa_combine_type floater_attr; 8966 8967 /* We only care about INSNs, JUMP_INSNs, and CALL_INSNs. 8968 Also ignore any special USE insns. */ 8969 if ((! NONJUMP_INSN_P (anchor) && ! JUMP_P (anchor) && ! CALL_P (anchor)) 8970 || GET_CODE (PATTERN (anchor)) == USE 8971 || GET_CODE (PATTERN (anchor)) == CLOBBER) 8972 continue; 8973 8974 anchor_attr = get_attr_pa_combine_type (anchor); 8975 /* See if anchor is an insn suitable for combination. */ 8976 if (anchor_attr == PA_COMBINE_TYPE_FMPY 8977 || anchor_attr == PA_COMBINE_TYPE_FADDSUB 8978 || (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH 8979 && ! forward_branch_p (anchor))) 8980 { 8981 rtx_insn *floater; 8982 8983 for (floater = PREV_INSN (anchor); 8984 floater; 8985 floater = PREV_INSN (floater)) 8986 { 8987 if (NOTE_P (floater) 8988 || (NONJUMP_INSN_P (floater) 8989 && (GET_CODE (PATTERN (floater)) == USE 8990 || GET_CODE (PATTERN (floater)) == CLOBBER))) 8991 continue; 8992 8993 /* Anything except a regular INSN will stop our search. */ 8994 if (! NONJUMP_INSN_P (floater)) 8995 { 8996 floater = NULL; 8997 break; 8998 } 8999 9000 /* See if FLOATER is suitable for combination with the 9001 anchor. */ 9002 floater_attr = get_attr_pa_combine_type (floater); 9003 if ((anchor_attr == PA_COMBINE_TYPE_FMPY 9004 && floater_attr == PA_COMBINE_TYPE_FADDSUB) 9005 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB 9006 && floater_attr == PA_COMBINE_TYPE_FMPY)) 9007 { 9008 /* If ANCHOR and FLOATER can be combined, then we're 9009 done with this pass. */ 9010 if (pa_can_combine_p (new_rtx, anchor, floater, 0, 9011 SET_DEST (PATTERN (floater)), 9012 XEXP (SET_SRC (PATTERN (floater)), 0), 9013 XEXP (SET_SRC (PATTERN (floater)), 1))) 9014 break; 9015 } 9016 9017 else if (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH 9018 && floater_attr == PA_COMBINE_TYPE_ADDMOVE) 9019 { 9020 if (GET_CODE (SET_SRC (PATTERN (floater))) == PLUS) 9021 { 9022 if (pa_can_combine_p (new_rtx, anchor, floater, 0, 9023 SET_DEST (PATTERN (floater)), 9024 XEXP (SET_SRC (PATTERN (floater)), 0), 9025 XEXP (SET_SRC (PATTERN (floater)), 1))) 9026 break; 9027 } 9028 else 9029 { 9030 if (pa_can_combine_p (new_rtx, anchor, floater, 0, 9031 SET_DEST (PATTERN (floater)), 9032 SET_SRC (PATTERN (floater)), 9033 SET_SRC (PATTERN (floater)))) 9034 break; 9035 } 9036 } 9037 } 9038 9039 /* If we didn't find anything on the backwards scan try forwards. */ 9040 if (!floater 9041 && (anchor_attr == PA_COMBINE_TYPE_FMPY 9042 || anchor_attr == PA_COMBINE_TYPE_FADDSUB)) 9043 { 9044 for (floater = anchor; floater; floater = NEXT_INSN (floater)) 9045 { 9046 if (NOTE_P (floater) 9047 || (NONJUMP_INSN_P (floater) 9048 && (GET_CODE (PATTERN (floater)) == USE 9049 || GET_CODE (PATTERN (floater)) == CLOBBER))) 9050 9051 continue; 9052 9053 /* Anything except a regular INSN will stop our search. */ 9054 if (! NONJUMP_INSN_P (floater)) 9055 { 9056 floater = NULL; 9057 break; 9058 } 9059 9060 /* See if FLOATER is suitable for combination with the 9061 anchor. */ 9062 floater_attr = get_attr_pa_combine_type (floater); 9063 if ((anchor_attr == PA_COMBINE_TYPE_FMPY 9064 && floater_attr == PA_COMBINE_TYPE_FADDSUB) 9065 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB 9066 && floater_attr == PA_COMBINE_TYPE_FMPY)) 9067 { 9068 /* If ANCHOR and FLOATER can be combined, then we're 9069 done with this pass. */ 9070 if (pa_can_combine_p (new_rtx, anchor, floater, 1, 9071 SET_DEST (PATTERN (floater)), 9072 XEXP (SET_SRC (PATTERN (floater)), 9073 0), 9074 XEXP (SET_SRC (PATTERN (floater)), 9075 1))) 9076 break; 9077 } 9078 } 9079 } 9080 9081 /* FLOATER will be nonzero if we found a suitable floating 9082 insn for combination with ANCHOR. */ 9083 if (floater 9084 && (anchor_attr == PA_COMBINE_TYPE_FADDSUB 9085 || anchor_attr == PA_COMBINE_TYPE_FMPY)) 9086 { 9087 /* Emit the new instruction and delete the old anchor. */ 9088 emit_insn_before (gen_rtx_PARALLEL 9089 (VOIDmode, 9090 gen_rtvec (2, PATTERN (anchor), 9091 PATTERN (floater))), 9092 anchor); 9093 9094 SET_INSN_DELETED (anchor); 9095 9096 /* Emit a special USE insn for FLOATER, then delete 9097 the floating insn. */ 9098 emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater); 9099 delete_insn (floater); 9100 9101 continue; 9102 } 9103 else if (floater 9104 && anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH) 9105 { 9106 rtx temp; 9107 /* Emit the new_jump instruction and delete the old anchor. */ 9108 temp 9109 = emit_jump_insn_before (gen_rtx_PARALLEL 9110 (VOIDmode, 9111 gen_rtvec (2, PATTERN (anchor), 9112 PATTERN (floater))), 9113 anchor); 9114 9115 JUMP_LABEL (temp) = JUMP_LABEL (anchor); 9116 SET_INSN_DELETED (anchor); 9117 9118 /* Emit a special USE insn for FLOATER, then delete 9119 the floating insn. */ 9120 emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater); 9121 delete_insn (floater); 9122 continue; 9123 } 9124 } 9125 } 9126} 9127 9128static int 9129pa_can_combine_p (rtx_insn *new_rtx, rtx_insn *anchor, rtx_insn *floater, 9130 int reversed, rtx dest, 9131 rtx src1, rtx src2) 9132{ 9133 int insn_code_number; 9134 rtx_insn *start, *end; 9135 9136 /* Create a PARALLEL with the patterns of ANCHOR and 9137 FLOATER, try to recognize it, then test constraints 9138 for the resulting pattern. 9139 9140 If the pattern doesn't match or the constraints 9141 aren't met keep searching for a suitable floater 9142 insn. */ 9143 XVECEXP (PATTERN (new_rtx), 0, 0) = PATTERN (anchor); 9144 XVECEXP (PATTERN (new_rtx), 0, 1) = PATTERN (floater); 9145 INSN_CODE (new_rtx) = -1; 9146 insn_code_number = recog_memoized (new_rtx); 9147 basic_block bb = BLOCK_FOR_INSN (anchor); 9148 if (insn_code_number < 0 9149 || (extract_insn (new_rtx), 9150 !constrain_operands (1, get_preferred_alternatives (new_rtx, bb)))) 9151 return 0; 9152 9153 if (reversed) 9154 { 9155 start = anchor; 9156 end = floater; 9157 } 9158 else 9159 { 9160 start = floater; 9161 end = anchor; 9162 } 9163 9164 /* There's up to three operands to consider. One 9165 output and two inputs. 9166 9167 The output must not be used between FLOATER & ANCHOR 9168 exclusive. The inputs must not be set between 9169 FLOATER and ANCHOR exclusive. */ 9170 9171 if (reg_used_between_p (dest, start, end)) 9172 return 0; 9173 9174 if (reg_set_between_p (src1, start, end)) 9175 return 0; 9176 9177 if (reg_set_between_p (src2, start, end)) 9178 return 0; 9179 9180 /* If we get here, then everything is good. */ 9181 return 1; 9182} 9183 9184/* Return nonzero if references for INSN are delayed. 9185 9186 Millicode insns are actually function calls with some special 9187 constraints on arguments and register usage. 9188 9189 Millicode calls always expect their arguments in the integer argument 9190 registers, and always return their result in %r29 (ret1). They 9191 are expected to clobber their arguments, %r1, %r29, and the return 9192 pointer which is %r31 on 32-bit and %r2 on 64-bit, and nothing else. 9193 9194 This function tells reorg that the references to arguments and 9195 millicode calls do not appear to happen until after the millicode call. 9196 This allows reorg to put insns which set the argument registers into the 9197 delay slot of the millicode call -- thus they act more like traditional 9198 CALL_INSNs. 9199 9200 Note we cannot consider side effects of the insn to be delayed because 9201 the branch and link insn will clobber the return pointer. If we happened 9202 to use the return pointer in the delay slot of the call, then we lose. 9203 9204 get_attr_type will try to recognize the given insn, so make sure to 9205 filter out things it will not accept -- SEQUENCE, USE and CLOBBER insns 9206 in particular. */ 9207int 9208pa_insn_refs_are_delayed (rtx_insn *insn) 9209{ 9210 return ((NONJUMP_INSN_P (insn) 9211 && GET_CODE (PATTERN (insn)) != SEQUENCE 9212 && GET_CODE (PATTERN (insn)) != USE 9213 && GET_CODE (PATTERN (insn)) != CLOBBER 9214 && get_attr_type (insn) == TYPE_MILLI)); 9215} 9216 9217/* Promote the return value, but not the arguments. */ 9218 9219static machine_mode 9220pa_promote_function_mode (const_tree type ATTRIBUTE_UNUSED, 9221 machine_mode mode, 9222 int *punsignedp ATTRIBUTE_UNUSED, 9223 const_tree fntype ATTRIBUTE_UNUSED, 9224 int for_return) 9225{ 9226 if (for_return == 0) 9227 return mode; 9228 return promote_mode (type, mode, punsignedp); 9229} 9230 9231/* On the HP-PA the value is found in register(s) 28(-29), unless 9232 the mode is SF or DF. Then the value is returned in fr4 (32). 9233 9234 This must perform the same promotions as PROMOTE_MODE, else promoting 9235 return values in TARGET_PROMOTE_FUNCTION_MODE will not work correctly. 9236 9237 Small structures must be returned in a PARALLEL on PA64 in order 9238 to match the HP Compiler ABI. */ 9239 9240static rtx 9241pa_function_value (const_tree valtype, 9242 const_tree func ATTRIBUTE_UNUSED, 9243 bool outgoing ATTRIBUTE_UNUSED) 9244{ 9245 machine_mode valmode; 9246 9247 if (AGGREGATE_TYPE_P (valtype) 9248 || TREE_CODE (valtype) == COMPLEX_TYPE 9249 || TREE_CODE (valtype) == VECTOR_TYPE) 9250 { 9251 HOST_WIDE_INT valsize = int_size_in_bytes (valtype); 9252 9253 /* Handle aggregates that fit exactly in a word or double word. */ 9254 if ((valsize & (UNITS_PER_WORD - 1)) == 0) 9255 return gen_rtx_REG (TYPE_MODE (valtype), 28); 9256 9257 if (TARGET_64BIT) 9258 { 9259 /* Aggregates with a size less than or equal to 128 bits are 9260 returned in GR 28(-29). They are left justified. The pad 9261 bits are undefined. Larger aggregates are returned in 9262 memory. */ 9263 rtx loc[2]; 9264 int i, offset = 0; 9265 int ub = valsize <= UNITS_PER_WORD ? 1 : 2; 9266 9267 for (i = 0; i < ub; i++) 9268 { 9269 loc[i] = gen_rtx_EXPR_LIST (VOIDmode, 9270 gen_rtx_REG (DImode, 28 + i), 9271 GEN_INT (offset)); 9272 offset += 8; 9273 } 9274 9275 return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (ub, loc)); 9276 } 9277 else if (valsize > UNITS_PER_WORD) 9278 { 9279 /* Aggregates 5 to 8 bytes in size are returned in general 9280 registers r28-r29 in the same manner as other non 9281 floating-point objects. The data is right-justified and 9282 zero-extended to 64 bits. This is opposite to the normal 9283 justification used on big endian targets and requires 9284 special treatment. */ 9285 rtx loc = gen_rtx_EXPR_LIST (VOIDmode, 9286 gen_rtx_REG (DImode, 28), const0_rtx); 9287 return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc)); 9288 } 9289 } 9290 9291 if ((INTEGRAL_TYPE_P (valtype) 9292 && GET_MODE_BITSIZE (TYPE_MODE (valtype)) < BITS_PER_WORD) 9293 || POINTER_TYPE_P (valtype)) 9294 valmode = word_mode; 9295 else 9296 valmode = TYPE_MODE (valtype); 9297 9298 if (TREE_CODE (valtype) == REAL_TYPE 9299 && !AGGREGATE_TYPE_P (valtype) 9300 && TYPE_MODE (valtype) != TFmode 9301 && !TARGET_SOFT_FLOAT) 9302 return gen_rtx_REG (valmode, 32); 9303 9304 return gen_rtx_REG (valmode, 28); 9305} 9306 9307/* Implement the TARGET_LIBCALL_VALUE hook. */ 9308 9309static rtx 9310pa_libcall_value (machine_mode mode, 9311 const_rtx fun ATTRIBUTE_UNUSED) 9312{ 9313 if (! TARGET_SOFT_FLOAT 9314 && (mode == SFmode || mode == DFmode)) 9315 return gen_rtx_REG (mode, 32); 9316 else 9317 return gen_rtx_REG (mode, 28); 9318} 9319 9320/* Implement the TARGET_FUNCTION_VALUE_REGNO_P hook. */ 9321 9322static bool 9323pa_function_value_regno_p (const unsigned int regno) 9324{ 9325 if (regno == 28 9326 || (! TARGET_SOFT_FLOAT && regno == 32)) 9327 return true; 9328 9329 return false; 9330} 9331 9332/* Update the data in CUM to advance over an argument 9333 of mode MODE and data type TYPE. 9334 (TYPE is null for libcalls where that information may not be available.) */ 9335 9336static void 9337pa_function_arg_advance (cumulative_args_t cum_v, machine_mode mode, 9338 const_tree type, bool named ATTRIBUTE_UNUSED) 9339{ 9340 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); 9341 int arg_size = FUNCTION_ARG_SIZE (mode, type); 9342 9343 cum->nargs_prototype--; 9344 cum->words += (arg_size 9345 + ((cum->words & 01) 9346 && type != NULL_TREE 9347 && arg_size > 1)); 9348} 9349 9350/* Return the location of a parameter that is passed in a register or NULL 9351 if the parameter has any component that is passed in memory. 9352 9353 This is new code and will be pushed to into the net sources after 9354 further testing. 9355 9356 ??? We might want to restructure this so that it looks more like other 9357 ports. */ 9358static rtx 9359pa_function_arg (cumulative_args_t cum_v, machine_mode mode, 9360 const_tree type, bool named ATTRIBUTE_UNUSED) 9361{ 9362 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); 9363 int max_arg_words = (TARGET_64BIT ? 8 : 4); 9364 int alignment = 0; 9365 int arg_size; 9366 int fpr_reg_base; 9367 int gpr_reg_base; 9368 rtx retval; 9369 9370 if (mode == VOIDmode) 9371 return NULL_RTX; 9372 9373 arg_size = FUNCTION_ARG_SIZE (mode, type); 9374 9375 /* If this arg would be passed partially or totally on the stack, then 9376 this routine should return zero. pa_arg_partial_bytes will 9377 handle arguments which are split between regs and stack slots if 9378 the ABI mandates split arguments. */ 9379 if (!TARGET_64BIT) 9380 { 9381 /* The 32-bit ABI does not split arguments. */ 9382 if (cum->words + arg_size > max_arg_words) 9383 return NULL_RTX; 9384 } 9385 else 9386 { 9387 if (arg_size > 1) 9388 alignment = cum->words & 1; 9389 if (cum->words + alignment >= max_arg_words) 9390 return NULL_RTX; 9391 } 9392 9393 /* The 32bit ABIs and the 64bit ABIs are rather different, 9394 particularly in their handling of FP registers. We might 9395 be able to cleverly share code between them, but I'm not 9396 going to bother in the hope that splitting them up results 9397 in code that is more easily understood. */ 9398 9399 if (TARGET_64BIT) 9400 { 9401 /* Advance the base registers to their current locations. 9402 9403 Remember, gprs grow towards smaller register numbers while 9404 fprs grow to higher register numbers. Also remember that 9405 although FP regs are 32-bit addressable, we pretend that 9406 the registers are 64-bits wide. */ 9407 gpr_reg_base = 26 - cum->words; 9408 fpr_reg_base = 32 + cum->words; 9409 9410 /* Arguments wider than one word and small aggregates need special 9411 treatment. */ 9412 if (arg_size > 1 9413 || mode == BLKmode 9414 || (type && (AGGREGATE_TYPE_P (type) 9415 || TREE_CODE (type) == COMPLEX_TYPE 9416 || TREE_CODE (type) == VECTOR_TYPE))) 9417 { 9418 /* Double-extended precision (80-bit), quad-precision (128-bit) 9419 and aggregates including complex numbers are aligned on 9420 128-bit boundaries. The first eight 64-bit argument slots 9421 are associated one-to-one, with general registers r26 9422 through r19, and also with floating-point registers fr4 9423 through fr11. Arguments larger than one word are always 9424 passed in general registers. 9425 9426 Using a PARALLEL with a word mode register results in left 9427 justified data on a big-endian target. */ 9428 9429 rtx loc[8]; 9430 int i, offset = 0, ub = arg_size; 9431 9432 /* Align the base register. */ 9433 gpr_reg_base -= alignment; 9434 9435 ub = MIN (ub, max_arg_words - cum->words - alignment); 9436 for (i = 0; i < ub; i++) 9437 { 9438 loc[i] = gen_rtx_EXPR_LIST (VOIDmode, 9439 gen_rtx_REG (DImode, gpr_reg_base), 9440 GEN_INT (offset)); 9441 gpr_reg_base -= 1; 9442 offset += 8; 9443 } 9444 9445 return gen_rtx_PARALLEL (mode, gen_rtvec_v (ub, loc)); 9446 } 9447 } 9448 else 9449 { 9450 /* If the argument is larger than a word, then we know precisely 9451 which registers we must use. */ 9452 if (arg_size > 1) 9453 { 9454 if (cum->words) 9455 { 9456 gpr_reg_base = 23; 9457 fpr_reg_base = 38; 9458 } 9459 else 9460 { 9461 gpr_reg_base = 25; 9462 fpr_reg_base = 34; 9463 } 9464 9465 /* Structures 5 to 8 bytes in size are passed in the general 9466 registers in the same manner as other non floating-point 9467 objects. The data is right-justified and zero-extended 9468 to 64 bits. This is opposite to the normal justification 9469 used on big endian targets and requires special treatment. 9470 We now define BLOCK_REG_PADDING to pad these objects. 9471 Aggregates, complex and vector types are passed in the same 9472 manner as structures. */ 9473 if (mode == BLKmode 9474 || (type && (AGGREGATE_TYPE_P (type) 9475 || TREE_CODE (type) == COMPLEX_TYPE 9476 || TREE_CODE (type) == VECTOR_TYPE))) 9477 { 9478 rtx loc = gen_rtx_EXPR_LIST (VOIDmode, 9479 gen_rtx_REG (DImode, gpr_reg_base), 9480 const0_rtx); 9481 return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc)); 9482 } 9483 } 9484 else 9485 { 9486 /* We have a single word (32 bits). A simple computation 9487 will get us the register #s we need. */ 9488 gpr_reg_base = 26 - cum->words; 9489 fpr_reg_base = 32 + 2 * cum->words; 9490 } 9491 } 9492 9493 /* Determine if the argument needs to be passed in both general and 9494 floating point registers. */ 9495 if (((TARGET_PORTABLE_RUNTIME || TARGET_64BIT || TARGET_ELF32) 9496 /* If we are doing soft-float with portable runtime, then there 9497 is no need to worry about FP regs. */ 9498 && !TARGET_SOFT_FLOAT 9499 /* The parameter must be some kind of scalar float, else we just 9500 pass it in integer registers. */ 9501 && GET_MODE_CLASS (mode) == MODE_FLOAT 9502 /* The target function must not have a prototype. */ 9503 && cum->nargs_prototype <= 0 9504 /* libcalls do not need to pass items in both FP and general 9505 registers. */ 9506 && type != NULL_TREE 9507 /* All this hair applies to "outgoing" args only. This includes 9508 sibcall arguments setup with FUNCTION_INCOMING_ARG. */ 9509 && !cum->incoming) 9510 /* Also pass outgoing floating arguments in both registers in indirect 9511 calls with the 32 bit ABI and the HP assembler since there is no 9512 way to the specify argument locations in static functions. */ 9513 || (!TARGET_64BIT 9514 && !TARGET_GAS 9515 && !cum->incoming 9516 && cum->indirect 9517 && GET_MODE_CLASS (mode) == MODE_FLOAT)) 9518 { 9519 retval 9520 = gen_rtx_PARALLEL 9521 (mode, 9522 gen_rtvec (2, 9523 gen_rtx_EXPR_LIST (VOIDmode, 9524 gen_rtx_REG (mode, fpr_reg_base), 9525 const0_rtx), 9526 gen_rtx_EXPR_LIST (VOIDmode, 9527 gen_rtx_REG (mode, gpr_reg_base), 9528 const0_rtx))); 9529 } 9530 else 9531 { 9532 /* See if we should pass this parameter in a general register. */ 9533 if (TARGET_SOFT_FLOAT 9534 /* Indirect calls in the normal 32bit ABI require all arguments 9535 to be passed in general registers. */ 9536 || (!TARGET_PORTABLE_RUNTIME 9537 && !TARGET_64BIT 9538 && !TARGET_ELF32 9539 && cum->indirect) 9540 /* If the parameter is not a scalar floating-point parameter, 9541 then it belongs in GPRs. */ 9542 || GET_MODE_CLASS (mode) != MODE_FLOAT 9543 /* Structure with single SFmode field belongs in GPR. */ 9544 || (type && AGGREGATE_TYPE_P (type))) 9545 retval = gen_rtx_REG (mode, gpr_reg_base); 9546 else 9547 retval = gen_rtx_REG (mode, fpr_reg_base); 9548 } 9549 return retval; 9550} 9551 9552/* Arguments larger than one word are double word aligned. */ 9553 9554static unsigned int 9555pa_function_arg_boundary (machine_mode mode, const_tree type) 9556{ 9557 bool singleword = (type 9558 ? (integer_zerop (TYPE_SIZE (type)) 9559 || !TREE_CONSTANT (TYPE_SIZE (type)) 9560 || int_size_in_bytes (type) <= UNITS_PER_WORD) 9561 : GET_MODE_SIZE (mode) <= UNITS_PER_WORD); 9562 9563 return singleword ? PARM_BOUNDARY : MAX_PARM_BOUNDARY; 9564} 9565 9566/* If this arg would be passed totally in registers or totally on the stack, 9567 then this routine should return zero. */ 9568 9569static int 9570pa_arg_partial_bytes (cumulative_args_t cum_v, machine_mode mode, 9571 tree type, bool named ATTRIBUTE_UNUSED) 9572{ 9573 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); 9574 unsigned int max_arg_words = 8; 9575 unsigned int offset = 0; 9576 9577 if (!TARGET_64BIT) 9578 return 0; 9579 9580 if (FUNCTION_ARG_SIZE (mode, type) > 1 && (cum->words & 1)) 9581 offset = 1; 9582 9583 if (cum->words + offset + FUNCTION_ARG_SIZE (mode, type) <= max_arg_words) 9584 /* Arg fits fully into registers. */ 9585 return 0; 9586 else if (cum->words + offset >= max_arg_words) 9587 /* Arg fully on the stack. */ 9588 return 0; 9589 else 9590 /* Arg is split. */ 9591 return (max_arg_words - cum->words - offset) * UNITS_PER_WORD; 9592} 9593 9594 9595/* A get_unnamed_section callback for switching to the text section. 9596 9597 This function is only used with SOM. Because we don't support 9598 named subspaces, we can only create a new subspace or switch back 9599 to the default text subspace. */ 9600 9601static void 9602som_output_text_section_asm_op (const void *data ATTRIBUTE_UNUSED) 9603{ 9604 gcc_assert (TARGET_SOM); 9605 if (TARGET_GAS) 9606 { 9607 if (cfun && cfun->machine && !cfun->machine->in_nsubspa) 9608 { 9609 /* We only want to emit a .nsubspa directive once at the 9610 start of the function. */ 9611 cfun->machine->in_nsubspa = 1; 9612 9613 /* Create a new subspace for the text. This provides 9614 better stub placement and one-only functions. */ 9615 if (cfun->decl 9616 && DECL_ONE_ONLY (cfun->decl) 9617 && !DECL_WEAK (cfun->decl)) 9618 { 9619 output_section_asm_op ("\t.SPACE $TEXT$\n" 9620 "\t.NSUBSPA $CODE$,QUAD=0,ALIGN=8," 9621 "ACCESS=44,SORT=24,COMDAT"); 9622 return; 9623 } 9624 } 9625 else 9626 { 9627 /* There isn't a current function or the body of the current 9628 function has been completed. So, we are changing to the 9629 text section to output debugging information. Thus, we 9630 need to forget that we are in the text section so that 9631 varasm.c will call us when text_section is selected again. */ 9632 gcc_assert (!cfun || !cfun->machine 9633 || cfun->machine->in_nsubspa == 2); 9634 in_section = NULL; 9635 } 9636 output_section_asm_op ("\t.SPACE $TEXT$\n\t.NSUBSPA $CODE$"); 9637 return; 9638 } 9639 output_section_asm_op ("\t.SPACE $TEXT$\n\t.SUBSPA $CODE$"); 9640} 9641 9642/* A get_unnamed_section callback for switching to comdat data 9643 sections. This function is only used with SOM. */ 9644 9645static void 9646som_output_comdat_data_section_asm_op (const void *data) 9647{ 9648 in_section = NULL; 9649 output_section_asm_op (data); 9650} 9651 9652/* Implement TARGET_ASM_INITIALIZE_SECTIONS */ 9653 9654static void 9655pa_som_asm_init_sections (void) 9656{ 9657 text_section 9658 = get_unnamed_section (0, som_output_text_section_asm_op, NULL); 9659 9660 /* SOM puts readonly data in the default $LIT$ subspace when PIC code 9661 is not being generated. */ 9662 som_readonly_data_section 9663 = get_unnamed_section (0, output_section_asm_op, 9664 "\t.SPACE $TEXT$\n\t.SUBSPA $LIT$"); 9665 9666 /* When secondary definitions are not supported, SOM makes readonly 9667 data one-only by creating a new $LIT$ subspace in $TEXT$ with 9668 the comdat flag. */ 9669 som_one_only_readonly_data_section 9670 = get_unnamed_section (0, som_output_comdat_data_section_asm_op, 9671 "\t.SPACE $TEXT$\n" 9672 "\t.NSUBSPA $LIT$,QUAD=0,ALIGN=8," 9673 "ACCESS=0x2c,SORT=16,COMDAT"); 9674 9675 9676 /* When secondary definitions are not supported, SOM makes data one-only 9677 by creating a new $DATA$ subspace in $PRIVATE$ with the comdat flag. */ 9678 som_one_only_data_section 9679 = get_unnamed_section (SECTION_WRITE, 9680 som_output_comdat_data_section_asm_op, 9681 "\t.SPACE $PRIVATE$\n" 9682 "\t.NSUBSPA $DATA$,QUAD=1,ALIGN=8," 9683 "ACCESS=31,SORT=24,COMDAT"); 9684 9685 if (flag_tm) 9686 som_tm_clone_table_section 9687 = get_unnamed_section (0, output_section_asm_op, 9688 "\t.SPACE $PRIVATE$\n\t.SUBSPA $TM_CLONE_TABLE$"); 9689 9690 /* FIXME: HPUX ld generates incorrect GOT entries for "T" fixups 9691 which reference data within the $TEXT$ space (for example constant 9692 strings in the $LIT$ subspace). 9693 9694 The assemblers (GAS and HP as) both have problems with handling 9695 the difference of two symbols which is the other correct way to 9696 reference constant data during PIC code generation. 9697 9698 So, there's no way to reference constant data which is in the 9699 $TEXT$ space during PIC generation. Instead place all constant 9700 data into the $PRIVATE$ subspace (this reduces sharing, but it 9701 works correctly). */ 9702 readonly_data_section = flag_pic ? data_section : som_readonly_data_section; 9703 9704 /* We must not have a reference to an external symbol defined in a 9705 shared library in a readonly section, else the SOM linker will 9706 complain. 9707 9708 So, we force exception information into the data section. */ 9709 exception_section = data_section; 9710} 9711 9712/* Implement TARGET_ASM_TM_CLONE_TABLE_SECTION. */ 9713 9714static section * 9715pa_som_tm_clone_table_section (void) 9716{ 9717 return som_tm_clone_table_section; 9718} 9719 9720/* On hpux10, the linker will give an error if we have a reference 9721 in the read-only data section to a symbol defined in a shared 9722 library. Therefore, expressions that might require a reloc can 9723 not be placed in the read-only data section. */ 9724 9725static section * 9726pa_select_section (tree exp, int reloc, 9727 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED) 9728{ 9729 if (TREE_CODE (exp) == VAR_DECL 9730 && TREE_READONLY (exp) 9731 && !TREE_THIS_VOLATILE (exp) 9732 && DECL_INITIAL (exp) 9733 && (DECL_INITIAL (exp) == error_mark_node 9734 || TREE_CONSTANT (DECL_INITIAL (exp))) 9735 && !reloc) 9736 { 9737 if (TARGET_SOM 9738 && DECL_ONE_ONLY (exp) 9739 && !DECL_WEAK (exp)) 9740 return som_one_only_readonly_data_section; 9741 else 9742 return readonly_data_section; 9743 } 9744 else if (CONSTANT_CLASS_P (exp) && !reloc) 9745 return readonly_data_section; 9746 else if (TARGET_SOM 9747 && TREE_CODE (exp) == VAR_DECL 9748 && DECL_ONE_ONLY (exp) 9749 && !DECL_WEAK (exp)) 9750 return som_one_only_data_section; 9751 else 9752 return data_section; 9753} 9754 9755/* Implement pa_reloc_rw_mask. */ 9756 9757static int 9758pa_reloc_rw_mask (void) 9759{ 9760 /* We force (const (plus (symbol) (const_int))) to memory when the 9761 const_int doesn't fit in a 14-bit integer. The SOM linker can't 9762 handle this construct in read-only memory and we want to avoid 9763 this for ELF. So, we always force an RTX needing relocation to 9764 the data section. */ 9765 return 3; 9766} 9767 9768static void 9769pa_globalize_label (FILE *stream, const char *name) 9770{ 9771 /* We only handle DATA objects here, functions are globalized in 9772 ASM_DECLARE_FUNCTION_NAME. */ 9773 if (! FUNCTION_NAME_P (name)) 9774 { 9775 fputs ("\t.EXPORT ", stream); 9776 assemble_name (stream, name); 9777 fputs (",DATA\n", stream); 9778 } 9779} 9780 9781/* Worker function for TARGET_STRUCT_VALUE_RTX. */ 9782 9783static rtx 9784pa_struct_value_rtx (tree fntype ATTRIBUTE_UNUSED, 9785 int incoming ATTRIBUTE_UNUSED) 9786{ 9787 return gen_rtx_REG (Pmode, PA_STRUCT_VALUE_REGNUM); 9788} 9789 9790/* Worker function for TARGET_RETURN_IN_MEMORY. */ 9791 9792bool 9793pa_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED) 9794{ 9795 /* SOM ABI says that objects larger than 64 bits are returned in memory. 9796 PA64 ABI says that objects larger than 128 bits are returned in memory. 9797 Note, int_size_in_bytes can return -1 if the size of the object is 9798 variable or larger than the maximum value that can be expressed as 9799 a HOST_WIDE_INT. It can also return zero for an empty type. The 9800 simplest way to handle variable and empty types is to pass them in 9801 memory. This avoids problems in defining the boundaries of argument 9802 slots, allocating registers, etc. */ 9803 return (int_size_in_bytes (type) > (TARGET_64BIT ? 16 : 8) 9804 || int_size_in_bytes (type) <= 0); 9805} 9806 9807/* Structure to hold declaration and name of external symbols that are 9808 emitted by GCC. We generate a vector of these symbols and output them 9809 at the end of the file if and only if SYMBOL_REF_REFERENCED_P is true. 9810 This avoids putting out names that are never really used. */ 9811 9812typedef struct GTY(()) extern_symbol 9813{ 9814 tree decl; 9815 const char *name; 9816} extern_symbol; 9817 9818/* Define gc'd vector type for extern_symbol. */ 9819 9820/* Vector of extern_symbol pointers. */ 9821static GTY(()) vec<extern_symbol, va_gc> *extern_symbols; 9822 9823#ifdef ASM_OUTPUT_EXTERNAL_REAL 9824/* Mark DECL (name NAME) as an external reference (assembler output 9825 file FILE). This saves the names to output at the end of the file 9826 if actually referenced. */ 9827 9828void 9829pa_hpux_asm_output_external (FILE *file, tree decl, const char *name) 9830{ 9831 gcc_assert (file == asm_out_file); 9832 extern_symbol p = {decl, name}; 9833 vec_safe_push (extern_symbols, p); 9834} 9835 9836/* Output text required at the end of an assembler file. 9837 This includes deferred plabels and .import directives for 9838 all external symbols that were actually referenced. */ 9839 9840static void 9841pa_hpux_file_end (void) 9842{ 9843 unsigned int i; 9844 extern_symbol *p; 9845 9846 if (!NO_DEFERRED_PROFILE_COUNTERS) 9847 output_deferred_profile_counters (); 9848 9849 output_deferred_plabels (); 9850 9851 for (i = 0; vec_safe_iterate (extern_symbols, i, &p); i++) 9852 { 9853 tree decl = p->decl; 9854 9855 if (!TREE_ASM_WRITTEN (decl) 9856 && SYMBOL_REF_REFERENCED_P (XEXP (DECL_RTL (decl), 0))) 9857 ASM_OUTPUT_EXTERNAL_REAL (asm_out_file, decl, p->name); 9858 } 9859 9860 vec_free (extern_symbols); 9861} 9862#endif 9863 9864/* Return true if a change from mode FROM to mode TO for a register 9865 in register class RCLASS is invalid. */ 9866 9867bool 9868pa_cannot_change_mode_class (machine_mode from, machine_mode to, 9869 enum reg_class rclass) 9870{ 9871 if (from == to) 9872 return false; 9873 9874 /* Reject changes to/from complex and vector modes. */ 9875 if (COMPLEX_MODE_P (from) || VECTOR_MODE_P (from) 9876 || COMPLEX_MODE_P (to) || VECTOR_MODE_P (to)) 9877 return true; 9878 9879 if (GET_MODE_SIZE (from) == GET_MODE_SIZE (to)) 9880 return false; 9881 9882 /* There is no way to load QImode or HImode values directly from 9883 memory. SImode loads to the FP registers are not zero extended. 9884 On the 64-bit target, this conflicts with the definition of 9885 LOAD_EXTEND_OP. Thus, we can't allow changing between modes 9886 with different sizes in the floating-point registers. */ 9887 if (MAYBE_FP_REG_CLASS_P (rclass)) 9888 return true; 9889 9890 /* HARD_REGNO_MODE_OK places modes with sizes larger than a word 9891 in specific sets of registers. Thus, we cannot allow changing 9892 to a larger mode when it's larger than a word. */ 9893 if (GET_MODE_SIZE (to) > UNITS_PER_WORD 9894 && GET_MODE_SIZE (to) > GET_MODE_SIZE (from)) 9895 return true; 9896 9897 return false; 9898} 9899 9900/* Returns TRUE if it is a good idea to tie two pseudo registers 9901 when one has mode MODE1 and one has mode MODE2. 9902 If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2, 9903 for any hard reg, then this must be FALSE for correct output. 9904 9905 We should return FALSE for QImode and HImode because these modes 9906 are not ok in the floating-point registers. However, this prevents 9907 tieing these modes to SImode and DImode in the general registers. 9908 So, this isn't a good idea. We rely on HARD_REGNO_MODE_OK and 9909 CANNOT_CHANGE_MODE_CLASS to prevent these modes from being used 9910 in the floating-point registers. */ 9911 9912bool 9913pa_modes_tieable_p (machine_mode mode1, machine_mode mode2) 9914{ 9915 /* Don't tie modes in different classes. */ 9916 if (GET_MODE_CLASS (mode1) != GET_MODE_CLASS (mode2)) 9917 return false; 9918 9919 return true; 9920} 9921 9922 9923/* Length in units of the trampoline instruction code. */ 9924 9925#define TRAMPOLINE_CODE_SIZE (TARGET_64BIT ? 24 : (TARGET_PA_20 ? 32 : 40)) 9926 9927 9928/* Output assembler code for a block containing the constant parts 9929 of a trampoline, leaving space for the variable parts.\ 9930 9931 The trampoline sets the static chain pointer to STATIC_CHAIN_REGNUM 9932 and then branches to the specified routine. 9933 9934 This code template is copied from text segment to stack location 9935 and then patched with pa_trampoline_init to contain valid values, 9936 and then entered as a subroutine. 9937 9938 It is best to keep this as small as possible to avoid having to 9939 flush multiple lines in the cache. */ 9940 9941static void 9942pa_asm_trampoline_template (FILE *f) 9943{ 9944 if (!TARGET_64BIT) 9945 { 9946 fputs ("\tldw 36(%r22),%r21\n", f); 9947 fputs ("\tbb,>=,n %r21,30,.+16\n", f); 9948 if (ASSEMBLER_DIALECT == 0) 9949 fputs ("\tdepi 0,31,2,%r21\n", f); 9950 else 9951 fputs ("\tdepwi 0,31,2,%r21\n", f); 9952 fputs ("\tldw 4(%r21),%r19\n", f); 9953 fputs ("\tldw 0(%r21),%r21\n", f); 9954 if (TARGET_PA_20) 9955 { 9956 fputs ("\tbve (%r21)\n", f); 9957 fputs ("\tldw 40(%r22),%r29\n", f); 9958 fputs ("\t.word 0\n", f); 9959 fputs ("\t.word 0\n", f); 9960 } 9961 else 9962 { 9963 fputs ("\tldsid (%r21),%r1\n", f); 9964 fputs ("\tmtsp %r1,%sr0\n", f); 9965 fputs ("\tbe 0(%sr0,%r21)\n", f); 9966 fputs ("\tldw 40(%r22),%r29\n", f); 9967 } 9968 fputs ("\t.word 0\n", f); 9969 fputs ("\t.word 0\n", f); 9970 fputs ("\t.word 0\n", f); 9971 fputs ("\t.word 0\n", f); 9972 } 9973 else 9974 { 9975 fputs ("\t.dword 0\n", f); 9976 fputs ("\t.dword 0\n", f); 9977 fputs ("\t.dword 0\n", f); 9978 fputs ("\t.dword 0\n", f); 9979 fputs ("\tmfia %r31\n", f); 9980 fputs ("\tldd 24(%r31),%r1\n", f); 9981 fputs ("\tldd 24(%r1),%r27\n", f); 9982 fputs ("\tldd 16(%r1),%r1\n", f); 9983 fputs ("\tbve (%r1)\n", f); 9984 fputs ("\tldd 32(%r31),%r31\n", f); 9985 fputs ("\t.dword 0 ; fptr\n", f); 9986 fputs ("\t.dword 0 ; static link\n", f); 9987 } 9988} 9989 9990/* Emit RTL insns to initialize the variable parts of a trampoline. 9991 FNADDR is an RTX for the address of the function's pure code. 9992 CXT is an RTX for the static chain value for the function. 9993 9994 Move the function address to the trampoline template at offset 36. 9995 Move the static chain value to trampoline template at offset 40. 9996 Move the trampoline address to trampoline template at offset 44. 9997 Move r19 to trampoline template at offset 48. The latter two 9998 words create a plabel for the indirect call to the trampoline. 9999 10000 A similar sequence is used for the 64-bit port but the plabel is 10001 at the beginning of the trampoline. 10002 10003 Finally, the cache entries for the trampoline code are flushed. 10004 This is necessary to ensure that the trampoline instruction sequence 10005 is written to memory prior to any attempts at prefetching the code 10006 sequence. */ 10007 10008static void 10009pa_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value) 10010{ 10011 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0); 10012 rtx start_addr = gen_reg_rtx (Pmode); 10013 rtx end_addr = gen_reg_rtx (Pmode); 10014 rtx line_length = gen_reg_rtx (Pmode); 10015 rtx r_tramp, tmp; 10016 10017 emit_block_move (m_tramp, assemble_trampoline_template (), 10018 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL); 10019 r_tramp = force_reg (Pmode, XEXP (m_tramp, 0)); 10020 10021 if (!TARGET_64BIT) 10022 { 10023 tmp = adjust_address (m_tramp, Pmode, 36); 10024 emit_move_insn (tmp, fnaddr); 10025 tmp = adjust_address (m_tramp, Pmode, 40); 10026 emit_move_insn (tmp, chain_value); 10027 10028 /* Create a fat pointer for the trampoline. */ 10029 tmp = adjust_address (m_tramp, Pmode, 44); 10030 emit_move_insn (tmp, r_tramp); 10031 tmp = adjust_address (m_tramp, Pmode, 48); 10032 emit_move_insn (tmp, gen_rtx_REG (Pmode, 19)); 10033 10034 /* fdc and fic only use registers for the address to flush, 10035 they do not accept integer displacements. We align the 10036 start and end addresses to the beginning of their respective 10037 cache lines to minimize the number of lines flushed. */ 10038 emit_insn (gen_andsi3 (start_addr, r_tramp, 10039 GEN_INT (-MIN_CACHELINE_SIZE))); 10040 tmp = force_reg (Pmode, plus_constant (Pmode, r_tramp, 10041 TRAMPOLINE_CODE_SIZE-1)); 10042 emit_insn (gen_andsi3 (end_addr, tmp, 10043 GEN_INT (-MIN_CACHELINE_SIZE))); 10044 emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE)); 10045 emit_insn (gen_dcacheflushsi (start_addr, end_addr, line_length)); 10046 emit_insn (gen_icacheflushsi (start_addr, end_addr, line_length, 10047 gen_reg_rtx (Pmode), 10048 gen_reg_rtx (Pmode))); 10049 } 10050 else 10051 { 10052 tmp = adjust_address (m_tramp, Pmode, 56); 10053 emit_move_insn (tmp, fnaddr); 10054 tmp = adjust_address (m_tramp, Pmode, 64); 10055 emit_move_insn (tmp, chain_value); 10056 10057 /* Create a fat pointer for the trampoline. */ 10058 tmp = adjust_address (m_tramp, Pmode, 16); 10059 emit_move_insn (tmp, force_reg (Pmode, plus_constant (Pmode, 10060 r_tramp, 32))); 10061 tmp = adjust_address (m_tramp, Pmode, 24); 10062 emit_move_insn (tmp, gen_rtx_REG (Pmode, 27)); 10063 10064 /* fdc and fic only use registers for the address to flush, 10065 they do not accept integer displacements. We align the 10066 start and end addresses to the beginning of their respective 10067 cache lines to minimize the number of lines flushed. */ 10068 tmp = force_reg (Pmode, plus_constant (Pmode, r_tramp, 32)); 10069 emit_insn (gen_anddi3 (start_addr, tmp, 10070 GEN_INT (-MIN_CACHELINE_SIZE))); 10071 tmp = force_reg (Pmode, plus_constant (Pmode, tmp, 10072 TRAMPOLINE_CODE_SIZE - 1)); 10073 emit_insn (gen_anddi3 (end_addr, tmp, 10074 GEN_INT (-MIN_CACHELINE_SIZE))); 10075 emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE)); 10076 emit_insn (gen_dcacheflushdi (start_addr, end_addr, line_length)); 10077 emit_insn (gen_icacheflushdi (start_addr, end_addr, line_length, 10078 gen_reg_rtx (Pmode), 10079 gen_reg_rtx (Pmode))); 10080 } 10081 10082#ifdef HAVE_ENABLE_EXECUTE_STACK 10083 ��emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"), 10084 �������� LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode); 10085#endif 10086} 10087 10088/* Perform any machine-specific adjustment in the address of the trampoline. 10089 ADDR contains the address that was passed to pa_trampoline_init. 10090 Adjust the trampoline address to point to the plabel at offset 44. */ 10091 10092static rtx 10093pa_trampoline_adjust_address (rtx addr) 10094{ 10095 if (!TARGET_64BIT) 10096 addr = memory_address (Pmode, plus_constant (Pmode, addr, 46)); 10097 return addr; 10098} 10099 10100static rtx 10101pa_delegitimize_address (rtx orig_x) 10102{ 10103 rtx x = delegitimize_mem_from_attrs (orig_x); 10104 10105 if (GET_CODE (x) == LO_SUM 10106 && GET_CODE (XEXP (x, 1)) == UNSPEC 10107 && XINT (XEXP (x, 1), 1) == UNSPEC_DLTIND14R) 10108 return gen_const_mem (Pmode, XVECEXP (XEXP (x, 1), 0, 0)); 10109 return x; 10110} 10111 10112static rtx 10113pa_internal_arg_pointer (void) 10114{ 10115 /* The argument pointer and the hard frame pointer are the same in 10116 the 32-bit runtime, so we don't need a copy. */ 10117 if (TARGET_64BIT) 10118 return copy_to_reg (virtual_incoming_args_rtx); 10119 else 10120 return virtual_incoming_args_rtx; 10121} 10122 10123/* Given FROM and TO register numbers, say whether this elimination is allowed. 10124 Frame pointer elimination is automatically handled. */ 10125 10126static bool 10127pa_can_eliminate (const int from, const int to) 10128{ 10129 /* The argument cannot be eliminated in the 64-bit runtime. */ 10130 if (TARGET_64BIT && from == ARG_POINTER_REGNUM) 10131 return false; 10132 10133 return (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM 10134 ? ! frame_pointer_needed 10135 : true); 10136} 10137 10138/* Define the offset between two registers, FROM to be eliminated and its 10139 replacement TO, at the start of a routine. */ 10140HOST_WIDE_INT 10141pa_initial_elimination_offset (int from, int to) 10142{ 10143 HOST_WIDE_INT offset; 10144 10145 if ((from == HARD_FRAME_POINTER_REGNUM || from == FRAME_POINTER_REGNUM) 10146 && to == STACK_POINTER_REGNUM) 10147 offset = -pa_compute_frame_size (get_frame_size (), 0); 10148 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM) 10149 offset = 0; 10150 else 10151 gcc_unreachable (); 10152 10153 return offset; 10154} 10155 10156static void 10157pa_conditional_register_usage (void) 10158{ 10159 int i; 10160 10161 if (!TARGET_64BIT && !TARGET_PA_11) 10162 { 10163 for (i = 56; i <= FP_REG_LAST; i++) 10164 fixed_regs[i] = call_used_regs[i] = 1; 10165 for (i = 33; i < 56; i += 2) 10166 fixed_regs[i] = call_used_regs[i] = 1; 10167 } 10168 if (TARGET_DISABLE_FPREGS || TARGET_SOFT_FLOAT) 10169 { 10170 for (i = FP_REG_FIRST; i <= FP_REG_LAST; i++) 10171 fixed_regs[i] = call_used_regs[i] = 1; 10172 } 10173 if (flag_pic) 10174 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1; 10175} 10176 10177/* Target hook for c_mode_for_suffix. */ 10178 10179static machine_mode 10180pa_c_mode_for_suffix (char suffix) 10181{ 10182 if (HPUX_LONG_DOUBLE_LIBRARY) 10183 { 10184 if (suffix == 'q') 10185 return TFmode; 10186 } 10187 10188 return VOIDmode; 10189} 10190 10191/* Target hook for function_section. */ 10192 10193static section * 10194pa_function_section (tree decl, enum node_frequency freq, 10195 bool startup, bool exit) 10196{ 10197 /* Put functions in text section if target doesn't have named sections. */ 10198 if (!targetm_common.have_named_sections) 10199 return text_section; 10200 10201 /* Force nested functions into the same section as the containing 10202 function. */ 10203 if (decl 10204 && DECL_SECTION_NAME (decl) == NULL 10205 && DECL_CONTEXT (decl) != NULL_TREE 10206 && TREE_CODE (DECL_CONTEXT (decl)) == FUNCTION_DECL 10207 && DECL_SECTION_NAME (DECL_CONTEXT (decl)) == NULL) 10208 return function_section (DECL_CONTEXT (decl)); 10209 10210 /* Otherwise, use the default function section. */ 10211 return default_function_section (decl, freq, startup, exit); 10212} 10213 10214/* Implement TARGET_LEGITIMATE_CONSTANT_P. 10215 10216 In 64-bit mode, we reject CONST_DOUBLES. We also reject CONST_INTS 10217 that need more than three instructions to load prior to reload. This 10218 limit is somewhat arbitrary. It takes three instructions to load a 10219 CONST_INT from memory but two are memory accesses. It may be better 10220 to increase the allowed range for CONST_INTS. We may also be able 10221 to handle CONST_DOUBLES. */ 10222 10223static bool 10224pa_legitimate_constant_p (machine_mode mode, rtx x) 10225{ 10226 if (GET_MODE_CLASS (mode) == MODE_FLOAT && x != CONST0_RTX (mode)) 10227 return false; 10228 10229 if (!NEW_HP_ASSEMBLER && !TARGET_GAS && GET_CODE (x) == LABEL_REF) 10230 return false; 10231 10232 /* TLS_MODEL_GLOBAL_DYNAMIC and TLS_MODEL_LOCAL_DYNAMIC are not 10233 legitimate constants. The other variants can't be handled by 10234 the move patterns after reload starts. */ 10235 if (tls_referenced_p (x)) 10236 return false; 10237 10238 if (TARGET_64BIT && GET_CODE (x) == CONST_DOUBLE) 10239 return false; 10240 10241 if (TARGET_64BIT 10242 && HOST_BITS_PER_WIDE_INT > 32 10243 && GET_CODE (x) == CONST_INT 10244 && !reload_in_progress 10245 && !reload_completed 10246 && !LEGITIMATE_64BIT_CONST_INT_P (INTVAL (x)) 10247 && !pa_cint_ok_for_move (INTVAL (x))) 10248 return false; 10249 10250 if (function_label_operand (x, mode)) 10251 return false; 10252 10253 return true; 10254} 10255 10256/* Implement TARGET_SECTION_TYPE_FLAGS. */ 10257 10258static unsigned int 10259pa_section_type_flags (tree decl, const char *name, int reloc) 10260{ 10261 unsigned int flags; 10262 10263 flags = default_section_type_flags (decl, name, reloc); 10264 10265 /* Function labels are placed in the constant pool. This can 10266 cause a section conflict if decls are put in ".data.rel.ro" 10267 or ".data.rel.ro.local" using the __attribute__ construct. */ 10268 if (strcmp (name, ".data.rel.ro") == 0 10269 || strcmp (name, ".data.rel.ro.local") == 0) 10270 flags |= SECTION_WRITE | SECTION_RELRO; 10271 10272 return flags; 10273} 10274 10275/* pa_legitimate_address_p recognizes an RTL expression that is a 10276 valid memory address for an instruction. The MODE argument is the 10277 machine mode for the MEM expression that wants to use this address. 10278 10279 On HP PA-RISC, the legitimate address forms are REG+SMALLINT, 10280 REG+REG, and REG+(REG*SCALE). The indexed address forms are only 10281 available with floating point loads and stores, and integer loads. 10282 We get better code by allowing indexed addresses in the initial 10283 RTL generation. 10284 10285 The acceptance of indexed addresses as legitimate implies that we 10286 must provide patterns for doing indexed integer stores, or the move 10287 expanders must force the address of an indexed store to a register. 10288 We have adopted the latter approach. 10289 10290 Another function of pa_legitimate_address_p is to ensure that 10291 the base register is a valid pointer for indexed instructions. 10292 On targets that have non-equivalent space registers, we have to 10293 know at the time of assembler output which register in a REG+REG 10294 pair is the base register. The REG_POINTER flag is sometimes lost 10295 in reload and the following passes, so it can't be relied on during 10296 code generation. Thus, we either have to canonicalize the order 10297 of the registers in REG+REG indexed addresses, or treat REG+REG 10298 addresses separately and provide patterns for both permutations. 10299 10300 The latter approach requires several hundred additional lines of 10301 code in pa.md. The downside to canonicalizing is that a PLUS 10302 in the wrong order can't combine to form to make a scaled indexed 10303 memory operand. As we won't need to canonicalize the operands if 10304 the REG_POINTER lossage can be fixed, it seems better canonicalize. 10305 10306 We initially break out scaled indexed addresses in canonical order 10307 in pa_emit_move_sequence. LEGITIMIZE_ADDRESS also canonicalizes 10308 scaled indexed addresses during RTL generation. However, fold_rtx 10309 has its own opinion on how the operands of a PLUS should be ordered. 10310 If one of the operands is equivalent to a constant, it will make 10311 that operand the second operand. As the base register is likely to 10312 be equivalent to a SYMBOL_REF, we have made it the second operand. 10313 10314 pa_legitimate_address_p accepts REG+REG as legitimate when the 10315 operands are in the order INDEX+BASE on targets with non-equivalent 10316 space registers, and in any order on targets with equivalent space 10317 registers. It accepts both MULT+BASE and BASE+MULT for scaled indexing. 10318 10319 We treat a SYMBOL_REF as legitimate if it is part of the current 10320 function's constant-pool, because such addresses can actually be 10321 output as REG+SMALLINT. */ 10322 10323static bool 10324pa_legitimate_address_p (machine_mode mode, rtx x, bool strict) 10325{ 10326 if ((REG_P (x) 10327 && (strict ? STRICT_REG_OK_FOR_BASE_P (x) 10328 : REG_OK_FOR_BASE_P (x))) 10329 || ((GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC 10330 || GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC) 10331 && REG_P (XEXP (x, 0)) 10332 && (strict ? STRICT_REG_OK_FOR_BASE_P (XEXP (x, 0)) 10333 : REG_OK_FOR_BASE_P (XEXP (x, 0))))) 10334 return true; 10335 10336 if (GET_CODE (x) == PLUS) 10337 { 10338 rtx base, index; 10339 10340 /* For REG+REG, the base register should be in XEXP (x, 1), 10341 so check it first. */ 10342 if (REG_P (XEXP (x, 1)) 10343 && (strict ? STRICT_REG_OK_FOR_BASE_P (XEXP (x, 1)) 10344 : REG_OK_FOR_BASE_P (XEXP (x, 1)))) 10345 base = XEXP (x, 1), index = XEXP (x, 0); 10346 else if (REG_P (XEXP (x, 0)) 10347 && (strict ? STRICT_REG_OK_FOR_BASE_P (XEXP (x, 0)) 10348 : REG_OK_FOR_BASE_P (XEXP (x, 0)))) 10349 base = XEXP (x, 0), index = XEXP (x, 1); 10350 else 10351 return false; 10352 10353 if (GET_CODE (index) == CONST_INT) 10354 { 10355 if (INT_5_BITS (index)) 10356 return true; 10357 10358 /* When INT14_OK_STRICT is false, a secondary reload is needed 10359 to adjust the displacement of SImode and DImode floating point 10360 instructions but this may fail when the register also needs 10361 reloading. So, we return false when STRICT is true. We 10362 also reject long displacements for float mode addresses since 10363 the majority of accesses will use floating point instructions 10364 that don't support 14-bit offsets. */ 10365 if (!INT14_OK_STRICT 10366 && (strict || !(reload_in_progress || reload_completed)) 10367 && mode != QImode 10368 && mode != HImode) 10369 return false; 10370 10371 return base14_operand (index, mode); 10372 } 10373 10374 if (!TARGET_DISABLE_INDEXING 10375 /* Only accept the "canonical" INDEX+BASE operand order 10376 on targets with non-equivalent space registers. */ 10377 && (TARGET_NO_SPACE_REGS 10378 ? REG_P (index) 10379 : (base == XEXP (x, 1) && REG_P (index) 10380 && (reload_completed 10381 || (reload_in_progress && HARD_REGISTER_P (base)) 10382 || REG_POINTER (base)) 10383 && (reload_completed 10384 || (reload_in_progress && HARD_REGISTER_P (index)) 10385 || !REG_POINTER (index)))) 10386 && MODE_OK_FOR_UNSCALED_INDEXING_P (mode) 10387 && (strict ? STRICT_REG_OK_FOR_INDEX_P (index) 10388 : REG_OK_FOR_INDEX_P (index)) 10389 && borx_reg_operand (base, Pmode) 10390 && borx_reg_operand (index, Pmode)) 10391 return true; 10392 10393 if (!TARGET_DISABLE_INDEXING 10394 && GET_CODE (index) == MULT 10395 && MODE_OK_FOR_SCALED_INDEXING_P (mode) 10396 && REG_P (XEXP (index, 0)) 10397 && GET_MODE (XEXP (index, 0)) == Pmode 10398 && (strict ? STRICT_REG_OK_FOR_INDEX_P (XEXP (index, 0)) 10399 : REG_OK_FOR_INDEX_P (XEXP (index, 0))) 10400 && GET_CODE (XEXP (index, 1)) == CONST_INT 10401 && INTVAL (XEXP (index, 1)) 10402 == (HOST_WIDE_INT) GET_MODE_SIZE (mode) 10403 && borx_reg_operand (base, Pmode)) 10404 return true; 10405 10406 return false; 10407 } 10408 10409 if (GET_CODE (x) == LO_SUM) 10410 { 10411 rtx y = XEXP (x, 0); 10412 10413 if (GET_CODE (y) == SUBREG) 10414 y = SUBREG_REG (y); 10415 10416 if (REG_P (y) 10417 && (strict ? STRICT_REG_OK_FOR_BASE_P (y) 10418 : REG_OK_FOR_BASE_P (y))) 10419 { 10420 /* Needed for -fPIC */ 10421 if (mode == Pmode 10422 && GET_CODE (XEXP (x, 1)) == UNSPEC) 10423 return true; 10424 10425 if (!INT14_OK_STRICT 10426 && (strict || !(reload_in_progress || reload_completed)) 10427 && mode != QImode 10428 && mode != HImode) 10429 return false; 10430 10431 if (CONSTANT_P (XEXP (x, 1))) 10432 return true; 10433 } 10434 return false; 10435 } 10436 10437 if (GET_CODE (x) == CONST_INT && INT_5_BITS (x)) 10438 return true; 10439 10440 return false; 10441} 10442 10443/* Look for machine dependent ways to make the invalid address AD a 10444 valid address. 10445 10446 For the PA, transform: 10447 10448 memory(X + <large int>) 10449 10450 into: 10451 10452 if (<large int> & mask) >= 16 10453 Y = (<large int> & ~mask) + mask + 1 Round up. 10454 else 10455 Y = (<large int> & ~mask) Round down. 10456 Z = X + Y 10457 memory (Z + (<large int> - Y)); 10458 10459 This makes reload inheritance and reload_cse work better since Z 10460 can be reused. 10461 10462 There may be more opportunities to improve code with this hook. */ 10463 10464rtx 10465pa_legitimize_reload_address (rtx ad, machine_mode mode, 10466 int opnum, int type, 10467 int ind_levels ATTRIBUTE_UNUSED) 10468{ 10469 long offset, newoffset, mask; 10470 rtx new_rtx, temp = NULL_RTX; 10471 10472 mask = (GET_MODE_CLASS (mode) == MODE_FLOAT 10473 && !INT14_OK_STRICT ? 0x1f : 0x3fff); 10474 10475 if (optimize && GET_CODE (ad) == PLUS) 10476 temp = simplify_binary_operation (PLUS, Pmode, 10477 XEXP (ad, 0), XEXP (ad, 1)); 10478 10479 new_rtx = temp ? temp : ad; 10480 10481 if (optimize 10482 && GET_CODE (new_rtx) == PLUS 10483 && GET_CODE (XEXP (new_rtx, 0)) == REG 10484 && GET_CODE (XEXP (new_rtx, 1)) == CONST_INT) 10485 { 10486 offset = INTVAL (XEXP ((new_rtx), 1)); 10487 10488 /* Choose rounding direction. Round up if we are >= halfway. */ 10489 if ((offset & mask) >= ((mask + 1) / 2)) 10490 newoffset = (offset & ~mask) + mask + 1; 10491 else 10492 newoffset = offset & ~mask; 10493 10494 /* Ensure that long displacements are aligned. */ 10495 if (mask == 0x3fff 10496 && (GET_MODE_CLASS (mode) == MODE_FLOAT 10497 || (TARGET_64BIT && (mode) == DImode))) 10498 newoffset &= ~(GET_MODE_SIZE (mode) - 1); 10499 10500 if (newoffset != 0 && VAL_14_BITS_P (newoffset)) 10501 { 10502 temp = gen_rtx_PLUS (Pmode, XEXP (new_rtx, 0), 10503 GEN_INT (newoffset)); 10504 ad = gen_rtx_PLUS (Pmode, temp, GEN_INT (offset - newoffset)); 10505 push_reload (XEXP (ad, 0), 0, &XEXP (ad, 0), 0, 10506 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, 10507 opnum, (enum reload_type) type); 10508 return ad; 10509 } 10510 } 10511 10512 return NULL_RTX; 10513} 10514 10515/* Output address vector. */ 10516 10517void 10518pa_output_addr_vec (rtx lab, rtx body) 10519{ 10520 int idx, vlen = XVECLEN (body, 0); 10521 10522 targetm.asm_out.internal_label (asm_out_file, "L", CODE_LABEL_NUMBER (lab)); 10523 if (TARGET_GAS) 10524 fputs ("\t.begin_brtab\n", asm_out_file); 10525 for (idx = 0; idx < vlen; idx++) 10526 { 10527 ASM_OUTPUT_ADDR_VEC_ELT 10528 (asm_out_file, CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 0, idx), 0))); 10529 } 10530 if (TARGET_GAS) 10531 fputs ("\t.end_brtab\n", asm_out_file); 10532} 10533 10534/* Output address difference vector. */ 10535 10536void 10537pa_output_addr_diff_vec (rtx lab, rtx body) 10538{ 10539 rtx base = XEXP (XEXP (body, 0), 0); 10540 int idx, vlen = XVECLEN (body, 1); 10541 10542 targetm.asm_out.internal_label (asm_out_file, "L", CODE_LABEL_NUMBER (lab)); 10543 if (TARGET_GAS) 10544 fputs ("\t.begin_brtab\n", asm_out_file); 10545 for (idx = 0; idx < vlen; idx++) 10546 { 10547 ASM_OUTPUT_ADDR_DIFF_ELT 10548 (asm_out_file, 10549 body, 10550 CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 1, idx), 0)), 10551 CODE_LABEL_NUMBER (base)); 10552 } 10553 if (TARGET_GAS) 10554 fputs ("\t.end_brtab\n", asm_out_file); 10555} 10556 10557/* This is a helper function for the other atomic operations. This function 10558 emits a loop that contains SEQ that iterates until a compare-and-swap 10559 operation at the end succeeds. MEM is the memory to be modified. SEQ is 10560 a set of instructions that takes a value from OLD_REG as an input and 10561 produces a value in NEW_REG as an output. Before SEQ, OLD_REG will be 10562 set to the current contents of MEM. After SEQ, a compare-and-swap will 10563 attempt to update MEM with NEW_REG. The function returns true when the 10564 loop was generated successfully. */ 10565 10566static bool 10567pa_expand_compare_and_swap_loop (rtx mem, rtx old_reg, rtx new_reg, rtx seq) 10568{ 10569 machine_mode mode = GET_MODE (mem); 10570 rtx_code_label *label; 10571 rtx cmp_reg, success, oldval; 10572 10573 /* The loop we want to generate looks like 10574 10575 cmp_reg = mem; 10576 label: 10577 old_reg = cmp_reg; 10578 seq; 10579 (success, cmp_reg) = compare-and-swap(mem, old_reg, new_reg) 10580 if (success) 10581 goto label; 10582 10583 Note that we only do the plain load from memory once. Subsequent 10584 iterations use the value loaded by the compare-and-swap pattern. */ 10585 10586 label = gen_label_rtx (); 10587 cmp_reg = gen_reg_rtx (mode); 10588 10589 emit_move_insn (cmp_reg, mem); 10590 emit_label (label); 10591 emit_move_insn (old_reg, cmp_reg); 10592 if (seq) 10593 emit_insn (seq); 10594 10595 success = NULL_RTX; 10596 oldval = cmp_reg; 10597 if (!expand_atomic_compare_and_swap (&success, &oldval, mem, old_reg, 10598 new_reg, false, MEMMODEL_SYNC_SEQ_CST, 10599 MEMMODEL_RELAXED)) 10600 return false; 10601 10602 if (oldval != cmp_reg) 10603 emit_move_insn (cmp_reg, oldval); 10604 10605 /* Mark this jump predicted not taken. */ 10606 emit_cmp_and_jump_insns (success, const0_rtx, EQ, const0_rtx, 10607 GET_MODE (success), 1, label, 0); 10608 return true; 10609} 10610 10611/* This function tries to implement an atomic exchange operation using a 10612 compare_and_swap loop. VAL is written to *MEM. The previous contents of 10613 *MEM are returned, using TARGET if possible. No memory model is required 10614 since a compare_and_swap loop is seq-cst. */ 10615 10616rtx 10617pa_maybe_emit_compare_and_swap_exchange_loop (rtx target, rtx mem, rtx val) 10618{ 10619 machine_mode mode = GET_MODE (mem); 10620 10621 if (can_compare_and_swap_p (mode, true)) 10622 { 10623 if (!target || !register_operand (target, mode)) 10624 target = gen_reg_rtx (mode); 10625 if (pa_expand_compare_and_swap_loop (mem, target, val, NULL_RTX)) 10626 return target; 10627 } 10628 10629 return NULL_RTX; 10630} 10631 10632#include "gt-pa.h" 10633