1/* Subroutines used for code generation on IBM S/390 and zSeries 2 Copyright (C) 1999-2015 Free Software Foundation, Inc. 3 Contributed by Hartmut Penner (hpenner@de.ibm.com) and 4 Ulrich Weigand (uweigand@de.ibm.com) and 5 Andreas Krebbel (Andreas.Krebbel@de.ibm.com). 6 7This file is part of GCC. 8 9GCC is free software; you can redistribute it and/or modify it under 10the terms of the GNU General Public License as published by the Free 11Software Foundation; either version 3, or (at your option) any later 12version. 13 14GCC is distributed in the hope that it will be useful, but WITHOUT ANY 15WARRANTY; without even the implied warranty of MERCHANTABILITY or 16FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 17for more details. 18 19You should have received a copy of the GNU General Public License 20along with GCC; see the file COPYING3. If not see 21<http://www.gnu.org/licenses/>. */ 22 23#include "config.h" 24#include "system.h" 25#include "coretypes.h" 26#include "tm.h" 27#include "rtl.h" 28#include "hash-set.h" 29#include "machmode.h" 30#include "vec.h" 31#include "double-int.h" 32#include "input.h" 33#include "alias.h" 34#include "symtab.h" 35#include "wide-int.h" 36#include "inchash.h" 37#include "tree.h" 38#include "fold-const.h" 39#include "print-tree.h" 40#include "stringpool.h" 41#include "stor-layout.h" 42#include "varasm.h" 43#include "calls.h" 44#include "tm_p.h" 45#include "regs.h" 46#include "hard-reg-set.h" 47#include "insn-config.h" 48#include "conditions.h" 49#include "output.h" 50#include "insn-attr.h" 51#include "flags.h" 52#include "except.h" 53#include "function.h" 54#include "recog.h" 55#include "hashtab.h" 56#include "statistics.h" 57#include "real.h" 58#include "fixed-value.h" 59#include "expmed.h" 60#include "dojump.h" 61#include "explow.h" 62#include "emit-rtl.h" 63#include "stmt.h" 64#include "expr.h" 65#include "reload.h" 66#include "diagnostic-core.h" 67#include "predict.h" 68#include "dominance.h" 69#include "cfg.h" 70#include "cfgrtl.h" 71#include "cfganal.h" 72#include "lcm.h" 73#include "cfgbuild.h" 74#include "cfgcleanup.h" 75#include "basic-block.h" 76#include "ggc.h" 77#include "target.h" 78#include "target-def.h" 79#include "debug.h" 80#include "langhooks.h" 81#include "insn-codes.h" 82#include "optabs.h" 83#include "hash-table.h" 84#include "tree-ssa-alias.h" 85#include "internal-fn.h" 86#include "gimple-fold.h" 87#include "tree-eh.h" 88#include "gimple-expr.h" 89#include "is-a.h" 90#include "gimple.h" 91#include "gimplify.h" 92#include "df.h" 93#include "params.h" 94#include "cfgloop.h" 95#include "opts.h" 96#include "tree-pass.h" 97#include "context.h" 98#include "builtins.h" 99#include "rtl-iter.h" 100#include "intl.h" 101#include "plugin-api.h" 102#include "ipa-ref.h" 103#include "cgraph.h" 104#include "tm-constrs.h" 105 106/* Define the specific costs for a given cpu. */ 107 108struct processor_costs 109{ 110 /* multiplication */ 111 const int m; /* cost of an M instruction. */ 112 const int mghi; /* cost of an MGHI instruction. */ 113 const int mh; /* cost of an MH instruction. */ 114 const int mhi; /* cost of an MHI instruction. */ 115 const int ml; /* cost of an ML instruction. */ 116 const int mr; /* cost of an MR instruction. */ 117 const int ms; /* cost of an MS instruction. */ 118 const int msg; /* cost of an MSG instruction. */ 119 const int msgf; /* cost of an MSGF instruction. */ 120 const int msgfr; /* cost of an MSGFR instruction. */ 121 const int msgr; /* cost of an MSGR instruction. */ 122 const int msr; /* cost of an MSR instruction. */ 123 const int mult_df; /* cost of multiplication in DFmode. */ 124 const int mxbr; 125 /* square root */ 126 const int sqxbr; /* cost of square root in TFmode. */ 127 const int sqdbr; /* cost of square root in DFmode. */ 128 const int sqebr; /* cost of square root in SFmode. */ 129 /* multiply and add */ 130 const int madbr; /* cost of multiply and add in DFmode. */ 131 const int maebr; /* cost of multiply and add in SFmode. */ 132 /* division */ 133 const int dxbr; 134 const int ddbr; 135 const int debr; 136 const int dlgr; 137 const int dlr; 138 const int dr; 139 const int dsgfr; 140 const int dsgr; 141}; 142 143const struct processor_costs *s390_cost; 144 145static const 146struct processor_costs z900_cost = 147{ 148 COSTS_N_INSNS (5), /* M */ 149 COSTS_N_INSNS (10), /* MGHI */ 150 COSTS_N_INSNS (5), /* MH */ 151 COSTS_N_INSNS (4), /* MHI */ 152 COSTS_N_INSNS (5), /* ML */ 153 COSTS_N_INSNS (5), /* MR */ 154 COSTS_N_INSNS (4), /* MS */ 155 COSTS_N_INSNS (15), /* MSG */ 156 COSTS_N_INSNS (7), /* MSGF */ 157 COSTS_N_INSNS (7), /* MSGFR */ 158 COSTS_N_INSNS (10), /* MSGR */ 159 COSTS_N_INSNS (4), /* MSR */ 160 COSTS_N_INSNS (7), /* multiplication in DFmode */ 161 COSTS_N_INSNS (13), /* MXBR */ 162 COSTS_N_INSNS (136), /* SQXBR */ 163 COSTS_N_INSNS (44), /* SQDBR */ 164 COSTS_N_INSNS (35), /* SQEBR */ 165 COSTS_N_INSNS (18), /* MADBR */ 166 COSTS_N_INSNS (13), /* MAEBR */ 167 COSTS_N_INSNS (134), /* DXBR */ 168 COSTS_N_INSNS (30), /* DDBR */ 169 COSTS_N_INSNS (27), /* DEBR */ 170 COSTS_N_INSNS (220), /* DLGR */ 171 COSTS_N_INSNS (34), /* DLR */ 172 COSTS_N_INSNS (34), /* DR */ 173 COSTS_N_INSNS (32), /* DSGFR */ 174 COSTS_N_INSNS (32), /* DSGR */ 175}; 176 177static const 178struct processor_costs z990_cost = 179{ 180 COSTS_N_INSNS (4), /* M */ 181 COSTS_N_INSNS (2), /* MGHI */ 182 COSTS_N_INSNS (2), /* MH */ 183 COSTS_N_INSNS (2), /* MHI */ 184 COSTS_N_INSNS (4), /* ML */ 185 COSTS_N_INSNS (4), /* MR */ 186 COSTS_N_INSNS (5), /* MS */ 187 COSTS_N_INSNS (6), /* MSG */ 188 COSTS_N_INSNS (4), /* MSGF */ 189 COSTS_N_INSNS (4), /* MSGFR */ 190 COSTS_N_INSNS (4), /* MSGR */ 191 COSTS_N_INSNS (4), /* MSR */ 192 COSTS_N_INSNS (1), /* multiplication in DFmode */ 193 COSTS_N_INSNS (28), /* MXBR */ 194 COSTS_N_INSNS (130), /* SQXBR */ 195 COSTS_N_INSNS (66), /* SQDBR */ 196 COSTS_N_INSNS (38), /* SQEBR */ 197 COSTS_N_INSNS (1), /* MADBR */ 198 COSTS_N_INSNS (1), /* MAEBR */ 199 COSTS_N_INSNS (60), /* DXBR */ 200 COSTS_N_INSNS (40), /* DDBR */ 201 COSTS_N_INSNS (26), /* DEBR */ 202 COSTS_N_INSNS (176), /* DLGR */ 203 COSTS_N_INSNS (31), /* DLR */ 204 COSTS_N_INSNS (31), /* DR */ 205 COSTS_N_INSNS (31), /* DSGFR */ 206 COSTS_N_INSNS (31), /* DSGR */ 207}; 208 209static const 210struct processor_costs z9_109_cost = 211{ 212 COSTS_N_INSNS (4), /* M */ 213 COSTS_N_INSNS (2), /* MGHI */ 214 COSTS_N_INSNS (2), /* MH */ 215 COSTS_N_INSNS (2), /* MHI */ 216 COSTS_N_INSNS (4), /* ML */ 217 COSTS_N_INSNS (4), /* MR */ 218 COSTS_N_INSNS (5), /* MS */ 219 COSTS_N_INSNS (6), /* MSG */ 220 COSTS_N_INSNS (4), /* MSGF */ 221 COSTS_N_INSNS (4), /* MSGFR */ 222 COSTS_N_INSNS (4), /* MSGR */ 223 COSTS_N_INSNS (4), /* MSR */ 224 COSTS_N_INSNS (1), /* multiplication in DFmode */ 225 COSTS_N_INSNS (28), /* MXBR */ 226 COSTS_N_INSNS (130), /* SQXBR */ 227 COSTS_N_INSNS (66), /* SQDBR */ 228 COSTS_N_INSNS (38), /* SQEBR */ 229 COSTS_N_INSNS (1), /* MADBR */ 230 COSTS_N_INSNS (1), /* MAEBR */ 231 COSTS_N_INSNS (60), /* DXBR */ 232 COSTS_N_INSNS (40), /* DDBR */ 233 COSTS_N_INSNS (26), /* DEBR */ 234 COSTS_N_INSNS (30), /* DLGR */ 235 COSTS_N_INSNS (23), /* DLR */ 236 COSTS_N_INSNS (23), /* DR */ 237 COSTS_N_INSNS (24), /* DSGFR */ 238 COSTS_N_INSNS (24), /* DSGR */ 239}; 240 241static const 242struct processor_costs z10_cost = 243{ 244 COSTS_N_INSNS (10), /* M */ 245 COSTS_N_INSNS (10), /* MGHI */ 246 COSTS_N_INSNS (10), /* MH */ 247 COSTS_N_INSNS (10), /* MHI */ 248 COSTS_N_INSNS (10), /* ML */ 249 COSTS_N_INSNS (10), /* MR */ 250 COSTS_N_INSNS (10), /* MS */ 251 COSTS_N_INSNS (10), /* MSG */ 252 COSTS_N_INSNS (10), /* MSGF */ 253 COSTS_N_INSNS (10), /* MSGFR */ 254 COSTS_N_INSNS (10), /* MSGR */ 255 COSTS_N_INSNS (10), /* MSR */ 256 COSTS_N_INSNS (1) , /* multiplication in DFmode */ 257 COSTS_N_INSNS (50), /* MXBR */ 258 COSTS_N_INSNS (120), /* SQXBR */ 259 COSTS_N_INSNS (52), /* SQDBR */ 260 COSTS_N_INSNS (38), /* SQEBR */ 261 COSTS_N_INSNS (1), /* MADBR */ 262 COSTS_N_INSNS (1), /* MAEBR */ 263 COSTS_N_INSNS (111), /* DXBR */ 264 COSTS_N_INSNS (39), /* DDBR */ 265 COSTS_N_INSNS (32), /* DEBR */ 266 COSTS_N_INSNS (160), /* DLGR */ 267 COSTS_N_INSNS (71), /* DLR */ 268 COSTS_N_INSNS (71), /* DR */ 269 COSTS_N_INSNS (71), /* DSGFR */ 270 COSTS_N_INSNS (71), /* DSGR */ 271}; 272 273static const 274struct processor_costs z196_cost = 275{ 276 COSTS_N_INSNS (7), /* M */ 277 COSTS_N_INSNS (5), /* MGHI */ 278 COSTS_N_INSNS (5), /* MH */ 279 COSTS_N_INSNS (5), /* MHI */ 280 COSTS_N_INSNS (7), /* ML */ 281 COSTS_N_INSNS (7), /* MR */ 282 COSTS_N_INSNS (6), /* MS */ 283 COSTS_N_INSNS (8), /* MSG */ 284 COSTS_N_INSNS (6), /* MSGF */ 285 COSTS_N_INSNS (6), /* MSGFR */ 286 COSTS_N_INSNS (8), /* MSGR */ 287 COSTS_N_INSNS (6), /* MSR */ 288 COSTS_N_INSNS (1) , /* multiplication in DFmode */ 289 COSTS_N_INSNS (40), /* MXBR B+40 */ 290 COSTS_N_INSNS (100), /* SQXBR B+100 */ 291 COSTS_N_INSNS (42), /* SQDBR B+42 */ 292 COSTS_N_INSNS (28), /* SQEBR B+28 */ 293 COSTS_N_INSNS (1), /* MADBR B */ 294 COSTS_N_INSNS (1), /* MAEBR B */ 295 COSTS_N_INSNS (101), /* DXBR B+101 */ 296 COSTS_N_INSNS (29), /* DDBR */ 297 COSTS_N_INSNS (22), /* DEBR */ 298 COSTS_N_INSNS (160), /* DLGR cracked */ 299 COSTS_N_INSNS (160), /* DLR cracked */ 300 COSTS_N_INSNS (160), /* DR expanded */ 301 COSTS_N_INSNS (160), /* DSGFR cracked */ 302 COSTS_N_INSNS (160), /* DSGR cracked */ 303}; 304 305static const 306struct processor_costs zEC12_cost = 307{ 308 COSTS_N_INSNS (7), /* M */ 309 COSTS_N_INSNS (5), /* MGHI */ 310 COSTS_N_INSNS (5), /* MH */ 311 COSTS_N_INSNS (5), /* MHI */ 312 COSTS_N_INSNS (7), /* ML */ 313 COSTS_N_INSNS (7), /* MR */ 314 COSTS_N_INSNS (6), /* MS */ 315 COSTS_N_INSNS (8), /* MSG */ 316 COSTS_N_INSNS (6), /* MSGF */ 317 COSTS_N_INSNS (6), /* MSGFR */ 318 COSTS_N_INSNS (8), /* MSGR */ 319 COSTS_N_INSNS (6), /* MSR */ 320 COSTS_N_INSNS (1) , /* multiplication in DFmode */ 321 COSTS_N_INSNS (40), /* MXBR B+40 */ 322 COSTS_N_INSNS (100), /* SQXBR B+100 */ 323 COSTS_N_INSNS (42), /* SQDBR B+42 */ 324 COSTS_N_INSNS (28), /* SQEBR B+28 */ 325 COSTS_N_INSNS (1), /* MADBR B */ 326 COSTS_N_INSNS (1), /* MAEBR B */ 327 COSTS_N_INSNS (131), /* DXBR B+131 */ 328 COSTS_N_INSNS (29), /* DDBR */ 329 COSTS_N_INSNS (22), /* DEBR */ 330 COSTS_N_INSNS (160), /* DLGR cracked */ 331 COSTS_N_INSNS (160), /* DLR cracked */ 332 COSTS_N_INSNS (160), /* DR expanded */ 333 COSTS_N_INSNS (160), /* DSGFR cracked */ 334 COSTS_N_INSNS (160), /* DSGR cracked */ 335}; 336 337extern int reload_completed; 338 339/* Kept up to date using the SCHED_VARIABLE_ISSUE hook. */ 340static rtx_insn *last_scheduled_insn; 341#define MAX_SCHED_UNITS 3 342static int last_scheduled_unit_distance[MAX_SCHED_UNITS]; 343 344/* The maximum score added for an instruction whose unit hasn't been 345 in use for MAX_SCHED_MIX_DISTANCE steps. Increase this value to 346 give instruction mix scheduling more priority over instruction 347 grouping. */ 348#define MAX_SCHED_MIX_SCORE 8 349 350/* The maximum distance up to which individual scores will be 351 calculated. Everything beyond this gives MAX_SCHED_MIX_SCORE. 352 Increase this with the OOO windows size of the machine. */ 353#define MAX_SCHED_MIX_DISTANCE 100 354 355/* Structure used to hold the components of a S/390 memory 356 address. A legitimate address on S/390 is of the general 357 form 358 base + index + displacement 359 where any of the components is optional. 360 361 base and index are registers of the class ADDR_REGS, 362 displacement is an unsigned 12-bit immediate constant. */ 363 364struct s390_address 365{ 366 rtx base; 367 rtx indx; 368 rtx disp; 369 bool pointer; 370 bool literal_pool; 371}; 372 373/* The following structure is embedded in the machine 374 specific part of struct function. */ 375 376struct GTY (()) s390_frame_layout 377{ 378 /* Offset within stack frame. */ 379 HOST_WIDE_INT gprs_offset; 380 HOST_WIDE_INT f0_offset; 381 HOST_WIDE_INT f4_offset; 382 HOST_WIDE_INT f8_offset; 383 HOST_WIDE_INT backchain_offset; 384 385 /* Number of first and last gpr where slots in the register 386 save area are reserved for. */ 387 int first_save_gpr_slot; 388 int last_save_gpr_slot; 389 390 /* Location (FP register number) where GPRs (r0-r15) should 391 be saved to. 392 0 - does not need to be saved at all 393 -1 - stack slot */ 394#define SAVE_SLOT_NONE 0 395#define SAVE_SLOT_STACK -1 396 signed char gpr_save_slots[16]; 397 398 /* Number of first and last gpr to be saved, restored. */ 399 int first_save_gpr; 400 int first_restore_gpr; 401 int last_save_gpr; 402 int last_restore_gpr; 403 404 /* Bits standing for floating point registers. Set, if the 405 respective register has to be saved. Starting with reg 16 (f0) 406 at the rightmost bit. 407 Bit 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 408 fpr 15 13 11 9 14 12 10 8 7 5 3 1 6 4 2 0 409 reg 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 */ 410 unsigned int fpr_bitmap; 411 412 /* Number of floating point registers f8-f15 which must be saved. */ 413 int high_fprs; 414 415 /* Set if return address needs to be saved. 416 This flag is set by s390_return_addr_rtx if it could not use 417 the initial value of r14 and therefore depends on r14 saved 418 to the stack. */ 419 bool save_return_addr_p; 420 421 /* Size of stack frame. */ 422 HOST_WIDE_INT frame_size; 423}; 424 425/* Define the structure for the machine field in struct function. */ 426 427struct GTY(()) machine_function 428{ 429 struct s390_frame_layout frame_layout; 430 431 /* Literal pool base register. */ 432 rtx base_reg; 433 434 /* True if we may need to perform branch splitting. */ 435 bool split_branches_pending_p; 436 437 bool has_landing_pad_p; 438 439 /* True if the current function may contain a tbegin clobbering 440 FPRs. */ 441 bool tbegin_p; 442}; 443 444/* Few accessor macros for struct cfun->machine->s390_frame_layout. */ 445 446#define cfun_frame_layout (cfun->machine->frame_layout) 447#define cfun_save_high_fprs_p (!!cfun_frame_layout.high_fprs) 448#define cfun_save_arg_fprs_p (!!(TARGET_64BIT \ 449 ? cfun_frame_layout.fpr_bitmap & 0x0f \ 450 : cfun_frame_layout.fpr_bitmap & 0x03)) 451#define cfun_gprs_save_area_size ((cfun_frame_layout.last_save_gpr_slot - \ 452 cfun_frame_layout.first_save_gpr_slot + 1) * UNITS_PER_LONG) 453#define cfun_set_fpr_save(REGNO) (cfun->machine->frame_layout.fpr_bitmap |= \ 454 (1 << (REGNO - FPR0_REGNUM))) 455#define cfun_fpr_save_p(REGNO) (!!(cfun->machine->frame_layout.fpr_bitmap & \ 456 (1 << (REGNO - FPR0_REGNUM)))) 457#define cfun_gpr_save_slot(REGNO) \ 458 cfun->machine->frame_layout.gpr_save_slots[REGNO] 459 460/* Number of GPRs and FPRs used for argument passing. */ 461#define GP_ARG_NUM_REG 5 462#define FP_ARG_NUM_REG (TARGET_64BIT? 4 : 2) 463#define VEC_ARG_NUM_REG 8 464 465/* A couple of shortcuts. */ 466#define CONST_OK_FOR_J(x) \ 467 CONST_OK_FOR_CONSTRAINT_P((x), 'J', "J") 468#define CONST_OK_FOR_K(x) \ 469 CONST_OK_FOR_CONSTRAINT_P((x), 'K', "K") 470#define CONST_OK_FOR_Os(x) \ 471 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "Os") 472#define CONST_OK_FOR_Op(x) \ 473 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "Op") 474#define CONST_OK_FOR_On(x) \ 475 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "On") 476 477#define REGNO_PAIR_OK(REGNO, MODE) \ 478 (HARD_REGNO_NREGS ((REGNO), (MODE)) == 1 || !((REGNO) & 1)) 479 480/* That's the read ahead of the dynamic branch prediction unit in 481 bytes on a z10 (or higher) CPU. */ 482#define PREDICT_DISTANCE (TARGET_Z10 ? 384 : 2048) 483 484 485/* Indicate which ABI has been used for passing vector args. 486 0 - no vector type arguments have been passed where the ABI is relevant 487 1 - the old ABI has been used 488 2 - a vector type argument has been passed either in a vector register 489 or on the stack by value */ 490static int s390_vector_abi = 0; 491 492/* Set the vector ABI marker if TYPE is subject to the vector ABI 493 switch. The vector ABI affects only vector data types. There are 494 two aspects of the vector ABI relevant here: 495 496 1. vectors >= 16 bytes have an alignment of 8 bytes with the new 497 ABI and natural alignment with the old. 498 499 2. vector <= 16 bytes are passed in VRs or by value on the stack 500 with the new ABI but by reference on the stack with the old. 501 502 If ARG_P is true TYPE is used for a function argument or return 503 value. The ABI marker then is set for all vector data types. If 504 ARG_P is false only type 1 vectors are being checked. */ 505 506static void 507s390_check_type_for_vector_abi (const_tree type, bool arg_p, bool in_struct_p) 508{ 509 static hash_set<const_tree> visited_types_hash; 510 511 if (s390_vector_abi) 512 return; 513 514 if (type == NULL_TREE || TREE_CODE (type) == ERROR_MARK) 515 return; 516 517 if (visited_types_hash.contains (type)) 518 return; 519 520 visited_types_hash.add (type); 521 522 if (VECTOR_TYPE_P (type)) 523 { 524 int type_size = int_size_in_bytes (type); 525 526 /* Outside arguments only the alignment is changing and this 527 only happens for vector types >= 16 bytes. */ 528 if (!arg_p && type_size < 16) 529 return; 530 531 /* In arguments vector types > 16 are passed as before (GCC 532 never enforced the bigger alignment for arguments which was 533 required by the old vector ABI). However, it might still be 534 ABI relevant due to the changed alignment if it is a struct 535 member. */ 536 if (arg_p && type_size > 16 && !in_struct_p) 537 return; 538 539 s390_vector_abi = TARGET_VX_ABI ? 2 : 1; 540 } 541 else if (POINTER_TYPE_P (type) || TREE_CODE (type) == ARRAY_TYPE) 542 { 543 /* ARRAY_TYPE: Since with neither of the ABIs we have more than 544 natural alignment there will never be ABI dependent padding 545 in an array type. That's why we do not set in_struct_p to 546 true here. */ 547 s390_check_type_for_vector_abi (TREE_TYPE (type), arg_p, in_struct_p); 548 } 549 else if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE) 550 { 551 tree arg_chain; 552 553 /* Check the return type. */ 554 s390_check_type_for_vector_abi (TREE_TYPE (type), true, false); 555 556 for (arg_chain = TYPE_ARG_TYPES (type); 557 arg_chain; 558 arg_chain = TREE_CHAIN (arg_chain)) 559 s390_check_type_for_vector_abi (TREE_VALUE (arg_chain), true, false); 560 } 561 else if (RECORD_OR_UNION_TYPE_P (type)) 562 { 563 tree field; 564 565 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field)) 566 { 567 if (TREE_CODE (field) != FIELD_DECL) 568 continue; 569 570 s390_check_type_for_vector_abi (TREE_TYPE (field), arg_p, true); 571 } 572 } 573} 574 575 576/* System z builtins. */ 577 578#include "s390-builtins.h" 579 580const unsigned int bflags_builtin[S390_BUILTIN_MAX + 1] = 581 { 582#undef B_DEF 583#undef OB_DEF 584#undef OB_DEF_VAR 585#define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, ...) BFLAGS, 586#define OB_DEF(...) 587#define OB_DEF_VAR(...) 588#include "s390-builtins.def" 589 0 590 }; 591 592const unsigned int opflags_builtin[S390_BUILTIN_MAX + 1] = 593 { 594#undef B_DEF 595#undef OB_DEF 596#undef OB_DEF_VAR 597#define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, OPFLAGS, ...) OPFLAGS, 598#define OB_DEF(...) 599#define OB_DEF_VAR(...) 600#include "s390-builtins.def" 601 0 602 }; 603 604const unsigned int bflags_overloaded_builtin[S390_OVERLOADED_BUILTIN_MAX + 1] = 605 { 606#undef B_DEF 607#undef OB_DEF 608#undef OB_DEF_VAR 609#define B_DEF(...) 610#define OB_DEF(NAME, FIRST_VAR_NAME, LAST_VAR_NAME, BFLAGS, ...) BFLAGS, 611#define OB_DEF_VAR(...) 612#include "s390-builtins.def" 613 0 614 }; 615 616const unsigned int 617opflags_overloaded_builtin_var[S390_OVERLOADED_BUILTIN_VAR_MAX + 1] = 618 { 619#undef B_DEF 620#undef OB_DEF 621#undef OB_DEF_VAR 622#define B_DEF(...) 623#define OB_DEF(...) 624#define OB_DEF_VAR(NAME, PATTERN, FLAGS, FNTYPE) FLAGS, 625#include "s390-builtins.def" 626 0 627 }; 628 629tree s390_builtin_types[BT_MAX]; 630tree s390_builtin_fn_types[BT_FN_MAX]; 631tree s390_builtin_decls[S390_BUILTIN_MAX + 632 S390_OVERLOADED_BUILTIN_MAX + 633 S390_OVERLOADED_BUILTIN_VAR_MAX]; 634 635static enum insn_code const code_for_builtin[S390_BUILTIN_MAX + 1] = { 636#undef B_DEF 637#undef OB_DEF 638#undef OB_DEF_VAR 639#define B_DEF(NAME, PATTERN, ...) CODE_FOR_##PATTERN, 640#define OB_DEF(...) 641#define OB_DEF_VAR(...) 642 643#include "s390-builtins.def" 644 CODE_FOR_nothing 645}; 646 647static void 648s390_init_builtins (void) 649{ 650 /* These definitions are being used in s390-builtins.def. */ 651 tree returns_twice_attr = tree_cons (get_identifier ("returns_twice"), 652 NULL, NULL); 653 tree noreturn_attr = tree_cons (get_identifier ("noreturn"), NULL, NULL); 654 tree c_uint64_type_node; 655 unsigned int bflags_mask = (BFLAGS_MASK_INIT); 656 657 bflags_mask |= (TARGET_VX) ? B_VX : 0; 658 bflags_mask |= (TARGET_HTM) ? B_HTM : 0; 659 660 /* The uint64_type_node from tree.c is not compatible to the C99 661 uint64_t data type. What we want is c_uint64_type_node from 662 c-common.c. But since backend code is not supposed to interface 663 with the frontend we recreate it here. */ 664 if (TARGET_64BIT) 665 c_uint64_type_node = long_unsigned_type_node; 666 else 667 c_uint64_type_node = long_long_unsigned_type_node; 668 669#undef DEF_TYPE 670#define DEF_TYPE(INDEX, BFLAGS, NODE, CONST_P) \ 671 if ((BFLAGS) == 0 || ((BFLAGS) & bflags_mask)) \ 672 s390_builtin_types[INDEX] = (!CONST_P) ? \ 673 (NODE) : build_type_variant ((NODE), 1, 0); 674 675#undef DEF_POINTER_TYPE 676#define DEF_POINTER_TYPE(INDEX, BFLAGS, INDEX_BASE) \ 677 if ((BFLAGS) == 0 || ((BFLAGS) & bflags_mask)) \ 678 s390_builtin_types[INDEX] = \ 679 build_pointer_type (s390_builtin_types[INDEX_BASE]); 680 681#undef DEF_DISTINCT_TYPE 682#define DEF_DISTINCT_TYPE(INDEX, BFLAGS, INDEX_BASE) \ 683 if ((BFLAGS) == 0 || ((BFLAGS) & bflags_mask)) \ 684 s390_builtin_types[INDEX] = \ 685 build_distinct_type_copy (s390_builtin_types[INDEX_BASE]); 686 687#undef DEF_VECTOR_TYPE 688#define DEF_VECTOR_TYPE(INDEX, BFLAGS, INDEX_BASE, ELEMENTS) \ 689 if ((BFLAGS) == 0 || ((BFLAGS) & bflags_mask)) \ 690 s390_builtin_types[INDEX] = \ 691 build_vector_type (s390_builtin_types[INDEX_BASE], ELEMENTS); 692 693#undef DEF_OPAQUE_VECTOR_TYPE 694#define DEF_OPAQUE_VECTOR_TYPE(INDEX, BFLAGS, INDEX_BASE, ELEMENTS) \ 695 if ((BFLAGS) == 0 || ((BFLAGS) & bflags_mask)) \ 696 s390_builtin_types[INDEX] = \ 697 build_opaque_vector_type (s390_builtin_types[INDEX_BASE], ELEMENTS); 698 699#undef DEF_FN_TYPE 700#define DEF_FN_TYPE(INDEX, BFLAGS, args...) \ 701 if ((BFLAGS) == 0 || ((BFLAGS) & bflags_mask)) \ 702 s390_builtin_fn_types[INDEX] = \ 703 build_function_type_list (args, NULL_TREE); 704#undef DEF_OV_TYPE 705#define DEF_OV_TYPE(...) 706#include "s390-builtin-types.def" 707 708#undef B_DEF 709#define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, OPFLAGS, FNTYPE) \ 710 if (((BFLAGS) & ~bflags_mask) == 0) \ 711 s390_builtin_decls[S390_BUILTIN_##NAME] = \ 712 add_builtin_function ("__builtin_" #NAME, \ 713 s390_builtin_fn_types[FNTYPE], \ 714 S390_BUILTIN_##NAME, \ 715 BUILT_IN_MD, \ 716 NULL, \ 717 ATTRS); 718#undef OB_DEF 719#define OB_DEF(NAME, FIRST_VAR_NAME, LAST_VAR_NAME, BFLAGS, FNTYPE) \ 720 if (((BFLAGS) & ~bflags_mask) == 0) \ 721 s390_builtin_decls[S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX] = \ 722 add_builtin_function ("__builtin_" #NAME, \ 723 s390_builtin_fn_types[FNTYPE], \ 724 S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX, \ 725 BUILT_IN_MD, \ 726 NULL, \ 727 0); 728#undef OB_DEF_VAR 729#define OB_DEF_VAR(...) 730#include "s390-builtins.def" 731 732} 733 734/* Return true if ARG is appropriate as argument number ARGNUM of 735 builtin DECL. The operand flags from s390-builtins.def have to 736 passed as OP_FLAGS. */ 737bool 738s390_const_operand_ok (tree arg, int argnum, int op_flags, tree decl) 739{ 740 if (O_UIMM_P (op_flags)) 741 { 742 int bitwidths[] = { 1, 2, 3, 4, 5, 8, 12, 16, 32 }; 743 int bitwidth = bitwidths[op_flags - O_U1]; 744 745 if (!tree_fits_uhwi_p (arg) 746 || tree_to_uhwi (arg) > ((unsigned HOST_WIDE_INT)1 << bitwidth) - 1) 747 { 748 error("constant argument %d for builtin %qF is out of range (0.." 749 HOST_WIDE_INT_PRINT_UNSIGNED ")", 750 argnum, decl, 751 ((unsigned HOST_WIDE_INT)1 << bitwidth) - 1); 752 return false; 753 } 754 } 755 756 if (O_SIMM_P (op_flags)) 757 { 758 int bitwidths[] = { 2, 3, 4, 5, 8, 12, 16, 32 }; 759 int bitwidth = bitwidths[op_flags - O_S2]; 760 761 if (!tree_fits_shwi_p (arg) 762 || tree_to_shwi (arg) < -((HOST_WIDE_INT)1 << (bitwidth - 1)) 763 || tree_to_shwi (arg) > (((HOST_WIDE_INT)1 << (bitwidth - 1)) - 1)) 764 { 765 error("constant argument %d for builtin %qF is out of range (" 766 HOST_WIDE_INT_PRINT_DEC ".." 767 HOST_WIDE_INT_PRINT_DEC ")", 768 argnum, decl, 769 -((HOST_WIDE_INT)1 << (bitwidth - 1)), 770 ((HOST_WIDE_INT)1 << (bitwidth - 1)) - 1); 771 return false; 772 } 773 } 774 return true; 775} 776 777/* Expand an expression EXP that calls a built-in function, 778 with result going to TARGET if that's convenient 779 (and in mode MODE if that's convenient). 780 SUBTARGET may be used as the target for computing one of EXP's operands. 781 IGNORE is nonzero if the value is to be ignored. */ 782 783static rtx 784s390_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, 785 machine_mode mode ATTRIBUTE_UNUSED, 786 int ignore ATTRIBUTE_UNUSED) 787{ 788#define MAX_ARGS 5 789 790 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); 791 unsigned int fcode = DECL_FUNCTION_CODE (fndecl); 792 enum insn_code icode; 793 rtx op[MAX_ARGS], pat; 794 int arity; 795 bool nonvoid; 796 tree arg; 797 call_expr_arg_iterator iter; 798 unsigned int all_op_flags = opflags_for_builtin (fcode); 799 machine_mode last_vec_mode = VOIDmode; 800 801 if (TARGET_DEBUG_ARG) 802 { 803 fprintf (stderr, 804 "s390_expand_builtin, code = %4d, %s\n", 805 (int)fcode, IDENTIFIER_POINTER (DECL_NAME (fndecl))); 806 } 807 808 if (fcode >= S390_OVERLOADED_BUILTIN_VAR_OFFSET 809 && fcode < S390_ALL_BUILTIN_MAX) 810 { 811 gcc_unreachable (); 812 } 813 else if (fcode < S390_OVERLOADED_BUILTIN_OFFSET) 814 { 815 icode = code_for_builtin[fcode]; 816 /* Set a flag in the machine specific cfun part in order to support 817 saving/restoring of FPRs. */ 818 if (fcode == S390_BUILTIN_tbegin || fcode == S390_BUILTIN_tbegin_retry) 819 cfun->machine->tbegin_p = true; 820 } 821 else if (fcode < S390_OVERLOADED_BUILTIN_VAR_OFFSET) 822 { 823 error ("Unresolved overloaded builtin"); 824 return const0_rtx; 825 } 826 else 827 internal_error ("bad builtin fcode"); 828 829 if (icode == 0) 830 internal_error ("bad builtin icode"); 831 832 nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node; 833 834 if (nonvoid) 835 { 836 machine_mode tmode = insn_data[icode].operand[0].mode; 837 if (!target 838 || GET_MODE (target) != tmode 839 || !(*insn_data[icode].operand[0].predicate) (target, tmode)) 840 target = gen_reg_rtx (tmode); 841 842 /* There are builtins (e.g. vec_promote) with no vector 843 arguments but an element selector. So we have to also look 844 at the vector return type when emitting the modulo 845 operation. */ 846 if (VECTOR_MODE_P (insn_data[icode].operand[0].mode)) 847 last_vec_mode = insn_data[icode].operand[0].mode; 848 } 849 850 arity = 0; 851 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp) 852 { 853 const struct insn_operand_data *insn_op; 854 unsigned int op_flags = all_op_flags & ((1 << O_SHIFT) - 1); 855 856 all_op_flags = all_op_flags >> O_SHIFT; 857 858 if (arg == error_mark_node) 859 return NULL_RTX; 860 if (arity >= MAX_ARGS) 861 return NULL_RTX; 862 863 if (O_IMM_P (op_flags) 864 && TREE_CODE (arg) != INTEGER_CST) 865 { 866 error ("constant value required for builtin %qF argument %d", 867 fndecl, arity + 1); 868 return const0_rtx; 869 } 870 871 if (!s390_const_operand_ok (arg, arity + 1, op_flags, fndecl)) 872 return const0_rtx; 873 874 insn_op = &insn_data[icode].operand[arity + nonvoid]; 875 op[arity] = expand_expr (arg, NULL_RTX, insn_op->mode, EXPAND_NORMAL); 876 877 /* expand_expr truncates constants to the target mode only if it 878 is "convenient". However, our checks below rely on this 879 being done. */ 880 if (CONST_INT_P (op[arity]) 881 && SCALAR_INT_MODE_P (insn_op->mode) 882 && GET_MODE (op[arity]) != insn_op->mode) 883 op[arity] = GEN_INT (trunc_int_for_mode (INTVAL (op[arity]), 884 insn_op->mode)); 885 886 /* Wrap the expanded RTX for pointer types into a MEM expr with 887 the proper mode. This allows us to use e.g. (match_operand 888 "memory_operand"..) in the insn patterns instead of (mem 889 (match_operand "address_operand)). This is helpful for 890 patterns not just accepting MEMs. */ 891 if (POINTER_TYPE_P (TREE_TYPE (arg)) 892 && insn_op->predicate != address_operand) 893 op[arity] = gen_rtx_MEM (insn_op->mode, op[arity]); 894 895 /* Expand the module operation required on element selectors. */ 896 if (op_flags == O_ELEM) 897 { 898 gcc_assert (last_vec_mode != VOIDmode); 899 op[arity] = simplify_expand_binop (SImode, code_to_optab (AND), 900 op[arity], 901 GEN_INT (GET_MODE_NUNITS (last_vec_mode) - 1), 902 NULL_RTX, 1, OPTAB_DIRECT); 903 } 904 905 /* Record the vector mode used for an element selector. This assumes: 906 1. There is no builtin with two different vector modes and an element selector 907 2. The element selector comes after the vector type it is referring to. 908 This currently the true for all the builtins but FIXME we 909 should better check for that. */ 910 if (VECTOR_MODE_P (insn_op->mode)) 911 last_vec_mode = insn_op->mode; 912 913 if (insn_op->predicate (op[arity], insn_op->mode)) 914 { 915 arity++; 916 continue; 917 } 918 919 if (MEM_P (op[arity]) 920 && insn_op->predicate == memory_operand 921 && (GET_MODE (XEXP (op[arity], 0)) == Pmode 922 || GET_MODE (XEXP (op[arity], 0)) == VOIDmode)) 923 { 924 op[arity] = replace_equiv_address (op[arity], 925 copy_to_mode_reg (Pmode, 926 XEXP (op[arity], 0))); 927 } 928 else if (GET_MODE (op[arity]) == insn_op->mode 929 || GET_MODE (op[arity]) == VOIDmode 930 || (insn_op->predicate == address_operand 931 && GET_MODE (op[arity]) == Pmode)) 932 { 933 /* An address_operand usually has VOIDmode in the expander 934 so we cannot use this. */ 935 machine_mode target_mode = 936 (insn_op->predicate == address_operand 937 ? Pmode : insn_op->mode); 938 op[arity] = copy_to_mode_reg (target_mode, op[arity]); 939 } 940 941 if (!insn_op->predicate (op[arity], insn_op->mode)) 942 { 943 error ("Invalid argument %d for builtin %qF", arity + 1, fndecl); 944 return const0_rtx; 945 } 946 arity++; 947 } 948 949 if (last_vec_mode != VOIDmode && !TARGET_VX) 950 { 951 error ("Vector type builtin %qF is not supported without -mvx " 952 "(default with -march=z13).", 953 fndecl); 954 return const0_rtx; 955 } 956 957 switch (arity) 958 { 959 case 0: 960 pat = GEN_FCN (icode) (target); 961 break; 962 case 1: 963 if (nonvoid) 964 pat = GEN_FCN (icode) (target, op[0]); 965 else 966 pat = GEN_FCN (icode) (op[0]); 967 break; 968 case 2: 969 if (nonvoid) 970 pat = GEN_FCN (icode) (target, op[0], op[1]); 971 else 972 pat = GEN_FCN (icode) (op[0], op[1]); 973 break; 974 case 3: 975 if (nonvoid) 976 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]); 977 else 978 pat = GEN_FCN (icode) (op[0], op[1], op[2]); 979 break; 980 case 4: 981 if (nonvoid) 982 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]); 983 else 984 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]); 985 break; 986 case 5: 987 if (nonvoid) 988 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]); 989 else 990 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]); 991 break; 992 case 6: 993 if (nonvoid) 994 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4], op[5]); 995 else 996 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4], op[5]); 997 break; 998 default: 999 gcc_unreachable (); 1000 } 1001 if (!pat) 1002 return NULL_RTX; 1003 emit_insn (pat); 1004 1005 if (nonvoid) 1006 return target; 1007 else 1008 return const0_rtx; 1009} 1010 1011 1012static const int s390_hotpatch_hw_max = 1000000; 1013static int s390_hotpatch_hw_before_label = 0; 1014static int s390_hotpatch_hw_after_label = 0; 1015 1016/* Check whether the hotpatch attribute is applied to a function and, if it has 1017 an argument, the argument is valid. */ 1018 1019static tree 1020s390_handle_hotpatch_attribute (tree *node, tree name, tree args, 1021 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs) 1022{ 1023 tree expr; 1024 tree expr2; 1025 int err; 1026 1027 if (TREE_CODE (*node) != FUNCTION_DECL) 1028 { 1029 warning (OPT_Wattributes, "%qE attribute only applies to functions", 1030 name); 1031 *no_add_attrs = true; 1032 } 1033 if (args != NULL && TREE_CHAIN (args) != NULL) 1034 { 1035 expr = TREE_VALUE (args); 1036 expr2 = TREE_VALUE (TREE_CHAIN (args)); 1037 } 1038 if (args == NULL || TREE_CHAIN (args) == NULL) 1039 err = 1; 1040 else if (TREE_CODE (expr) != INTEGER_CST 1041 || !INTEGRAL_TYPE_P (TREE_TYPE (expr)) 1042 || wi::gtu_p (expr, s390_hotpatch_hw_max)) 1043 err = 1; 1044 else if (TREE_CODE (expr2) != INTEGER_CST 1045 || !INTEGRAL_TYPE_P (TREE_TYPE (expr2)) 1046 || wi::gtu_p (expr2, s390_hotpatch_hw_max)) 1047 err = 1; 1048 else 1049 err = 0; 1050 if (err) 1051 { 1052 error ("requested %qE attribute is not a comma separated pair of" 1053 " non-negative integer constants or too large (max. %d)", name, 1054 s390_hotpatch_hw_max); 1055 *no_add_attrs = true; 1056 } 1057 1058 return NULL_TREE; 1059} 1060 1061/* Expand the s390_vector_bool type attribute. */ 1062 1063static tree 1064s390_handle_vectorbool_attribute (tree *node, tree name ATTRIBUTE_UNUSED, 1065 tree args ATTRIBUTE_UNUSED, 1066 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs) 1067{ 1068 tree type = *node, result = NULL_TREE; 1069 machine_mode mode; 1070 1071 while (POINTER_TYPE_P (type) 1072 || TREE_CODE (type) == FUNCTION_TYPE 1073 || TREE_CODE (type) == METHOD_TYPE 1074 || TREE_CODE (type) == ARRAY_TYPE) 1075 type = TREE_TYPE (type); 1076 1077 mode = TYPE_MODE (type); 1078 switch (mode) 1079 { 1080 case DImode: case V2DImode: result = s390_builtin_types[BT_BV2DI]; break; 1081 case SImode: case V4SImode: result = s390_builtin_types[BT_BV4SI]; break; 1082 case HImode: case V8HImode: result = s390_builtin_types[BT_BV8HI]; break; 1083 case QImode: case V16QImode: result = s390_builtin_types[BT_BV16QI]; 1084 default: break; 1085 } 1086 1087 *no_add_attrs = true; /* No need to hang on to the attribute. */ 1088 1089 if (result) 1090 *node = lang_hooks.types.reconstruct_complex_type (*node, result); 1091 1092 return NULL_TREE; 1093} 1094 1095static const struct attribute_spec s390_attribute_table[] = { 1096 { "hotpatch", 2, 2, true, false, false, s390_handle_hotpatch_attribute, false }, 1097 { "s390_vector_bool", 0, 0, false, true, false, s390_handle_vectorbool_attribute, true }, 1098 /* End element. */ 1099 { NULL, 0, 0, false, false, false, NULL, false } 1100}; 1101 1102/* Return the alignment for LABEL. We default to the -falign-labels 1103 value except for the literal pool base label. */ 1104int 1105s390_label_align (rtx label) 1106{ 1107 rtx_insn *prev_insn = prev_active_insn (label); 1108 rtx set, src; 1109 1110 if (prev_insn == NULL_RTX) 1111 goto old; 1112 1113 set = single_set (prev_insn); 1114 1115 if (set == NULL_RTX) 1116 goto old; 1117 1118 src = SET_SRC (set); 1119 1120 /* Don't align literal pool base labels. */ 1121 if (GET_CODE (src) == UNSPEC 1122 && XINT (src, 1) == UNSPEC_MAIN_BASE) 1123 return 0; 1124 1125 old: 1126 return align_labels_log; 1127} 1128 1129static machine_mode 1130s390_libgcc_cmp_return_mode (void) 1131{ 1132 return TARGET_64BIT ? DImode : SImode; 1133} 1134 1135static machine_mode 1136s390_libgcc_shift_count_mode (void) 1137{ 1138 return TARGET_64BIT ? DImode : SImode; 1139} 1140 1141static machine_mode 1142s390_unwind_word_mode (void) 1143{ 1144 return TARGET_64BIT ? DImode : SImode; 1145} 1146 1147/* Return true if the back end supports mode MODE. */ 1148static bool 1149s390_scalar_mode_supported_p (machine_mode mode) 1150{ 1151 /* In contrast to the default implementation reject TImode constants on 31bit 1152 TARGET_ZARCH for ABI compliance. */ 1153 if (!TARGET_64BIT && TARGET_ZARCH && mode == TImode) 1154 return false; 1155 1156 if (DECIMAL_FLOAT_MODE_P (mode)) 1157 return default_decimal_float_supported_p (); 1158 1159 return default_scalar_mode_supported_p (mode); 1160} 1161 1162/* Return true if the back end supports vector mode MODE. */ 1163static bool 1164s390_vector_mode_supported_p (machine_mode mode) 1165{ 1166 machine_mode inner; 1167 1168 if (!VECTOR_MODE_P (mode) 1169 || !TARGET_VX 1170 || GET_MODE_SIZE (mode) > 16) 1171 return false; 1172 1173 inner = GET_MODE_INNER (mode); 1174 1175 switch (inner) 1176 { 1177 case QImode: 1178 case HImode: 1179 case SImode: 1180 case DImode: 1181 case TImode: 1182 case SFmode: 1183 case DFmode: 1184 case TFmode: 1185 return true; 1186 default: 1187 return false; 1188 } 1189} 1190 1191/* Set the has_landing_pad_p flag in struct machine_function to VALUE. */ 1192 1193void 1194s390_set_has_landing_pad_p (bool value) 1195{ 1196 cfun->machine->has_landing_pad_p = value; 1197} 1198 1199/* If two condition code modes are compatible, return a condition code 1200 mode which is compatible with both. Otherwise, return 1201 VOIDmode. */ 1202 1203static machine_mode 1204s390_cc_modes_compatible (machine_mode m1, machine_mode m2) 1205{ 1206 if (m1 == m2) 1207 return m1; 1208 1209 switch (m1) 1210 { 1211 case CCZmode: 1212 if (m2 == CCUmode || m2 == CCTmode || m2 == CCZ1mode 1213 || m2 == CCSmode || m2 == CCSRmode || m2 == CCURmode) 1214 return m2; 1215 return VOIDmode; 1216 1217 case CCSmode: 1218 case CCUmode: 1219 case CCTmode: 1220 case CCSRmode: 1221 case CCURmode: 1222 case CCZ1mode: 1223 if (m2 == CCZmode) 1224 return m1; 1225 1226 return VOIDmode; 1227 1228 default: 1229 return VOIDmode; 1230 } 1231 return VOIDmode; 1232} 1233 1234/* Return true if SET either doesn't set the CC register, or else 1235 the source and destination have matching CC modes and that 1236 CC mode is at least as constrained as REQ_MODE. */ 1237 1238static bool 1239s390_match_ccmode_set (rtx set, machine_mode req_mode) 1240{ 1241 machine_mode set_mode; 1242 1243 gcc_assert (GET_CODE (set) == SET); 1244 1245 if (GET_CODE (SET_DEST (set)) != REG || !CC_REGNO_P (REGNO (SET_DEST (set)))) 1246 return 1; 1247 1248 set_mode = GET_MODE (SET_DEST (set)); 1249 switch (set_mode) 1250 { 1251 case CCSmode: 1252 case CCSRmode: 1253 case CCUmode: 1254 case CCURmode: 1255 case CCLmode: 1256 case CCL1mode: 1257 case CCL2mode: 1258 case CCL3mode: 1259 case CCT1mode: 1260 case CCT2mode: 1261 case CCT3mode: 1262 case CCVEQmode: 1263 case CCVHmode: 1264 case CCVHUmode: 1265 case CCVFHmode: 1266 case CCVFHEmode: 1267 if (req_mode != set_mode) 1268 return 0; 1269 break; 1270 1271 case CCZmode: 1272 if (req_mode != CCSmode && req_mode != CCUmode && req_mode != CCTmode 1273 && req_mode != CCSRmode && req_mode != CCURmode) 1274 return 0; 1275 break; 1276 1277 case CCAPmode: 1278 case CCANmode: 1279 if (req_mode != CCAmode) 1280 return 0; 1281 break; 1282 1283 default: 1284 gcc_unreachable (); 1285 } 1286 1287 return (GET_MODE (SET_SRC (set)) == set_mode); 1288} 1289 1290/* Return true if every SET in INSN that sets the CC register 1291 has source and destination with matching CC modes and that 1292 CC mode is at least as constrained as REQ_MODE. 1293 If REQ_MODE is VOIDmode, always return false. */ 1294 1295bool 1296s390_match_ccmode (rtx_insn *insn, machine_mode req_mode) 1297{ 1298 int i; 1299 1300 /* s390_tm_ccmode returns VOIDmode to indicate failure. */ 1301 if (req_mode == VOIDmode) 1302 return false; 1303 1304 if (GET_CODE (PATTERN (insn)) == SET) 1305 return s390_match_ccmode_set (PATTERN (insn), req_mode); 1306 1307 if (GET_CODE (PATTERN (insn)) == PARALLEL) 1308 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++) 1309 { 1310 rtx set = XVECEXP (PATTERN (insn), 0, i); 1311 if (GET_CODE (set) == SET) 1312 if (!s390_match_ccmode_set (set, req_mode)) 1313 return false; 1314 } 1315 1316 return true; 1317} 1318 1319/* If a test-under-mask instruction can be used to implement 1320 (compare (and ... OP1) OP2), return the CC mode required 1321 to do that. Otherwise, return VOIDmode. 1322 MIXED is true if the instruction can distinguish between 1323 CC1 and CC2 for mixed selected bits (TMxx), it is false 1324 if the instruction cannot (TM). */ 1325 1326machine_mode 1327s390_tm_ccmode (rtx op1, rtx op2, bool mixed) 1328{ 1329 int bit0, bit1; 1330 1331 /* ??? Fixme: should work on CONST_DOUBLE as well. */ 1332 if (GET_CODE (op1) != CONST_INT || GET_CODE (op2) != CONST_INT) 1333 return VOIDmode; 1334 1335 /* Selected bits all zero: CC0. 1336 e.g.: int a; if ((a & (16 + 128)) == 0) */ 1337 if (INTVAL (op2) == 0) 1338 return CCTmode; 1339 1340 /* Selected bits all one: CC3. 1341 e.g.: int a; if ((a & (16 + 128)) == 16 + 128) */ 1342 if (INTVAL (op2) == INTVAL (op1)) 1343 return CCT3mode; 1344 1345 /* Exactly two bits selected, mixed zeroes and ones: CC1 or CC2. e.g.: 1346 int a; 1347 if ((a & (16 + 128)) == 16) -> CCT1 1348 if ((a & (16 + 128)) == 128) -> CCT2 */ 1349 if (mixed) 1350 { 1351 bit1 = exact_log2 (INTVAL (op2)); 1352 bit0 = exact_log2 (INTVAL (op1) ^ INTVAL (op2)); 1353 if (bit0 != -1 && bit1 != -1) 1354 return bit0 > bit1 ? CCT1mode : CCT2mode; 1355 } 1356 1357 return VOIDmode; 1358} 1359 1360/* Given a comparison code OP (EQ, NE, etc.) and the operands 1361 OP0 and OP1 of a COMPARE, return the mode to be used for the 1362 comparison. */ 1363 1364machine_mode 1365s390_select_ccmode (enum rtx_code code, rtx op0, rtx op1) 1366{ 1367 if (TARGET_VX 1368 && register_operand (op0, DFmode) 1369 && register_operand (op1, DFmode)) 1370 { 1371 /* LT, LE, UNGT, UNGE require swapping OP0 and OP1. Either 1372 s390_emit_compare or s390_canonicalize_comparison will take 1373 care of it. */ 1374 switch (code) 1375 { 1376 case EQ: 1377 case NE: 1378 return CCVEQmode; 1379 case GT: 1380 case UNLE: 1381 return CCVFHmode; 1382 case GE: 1383 case UNLT: 1384 return CCVFHEmode; 1385 default: 1386 ; 1387 } 1388 } 1389 1390 switch (code) 1391 { 1392 case EQ: 1393 case NE: 1394 if ((GET_CODE (op0) == NEG || GET_CODE (op0) == ABS) 1395 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT) 1396 return CCAPmode; 1397 if (GET_CODE (op0) == PLUS && GET_CODE (XEXP (op0, 1)) == CONST_INT 1398 && CONST_OK_FOR_K (INTVAL (XEXP (op0, 1)))) 1399 return CCAPmode; 1400 if ((GET_CODE (op0) == PLUS || GET_CODE (op0) == MINUS 1401 || GET_CODE (op1) == NEG) 1402 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT) 1403 return CCLmode; 1404 1405 if (GET_CODE (op0) == AND) 1406 { 1407 /* Check whether we can potentially do it via TM. */ 1408 machine_mode ccmode; 1409 ccmode = s390_tm_ccmode (XEXP (op0, 1), op1, 1); 1410 if (ccmode != VOIDmode) 1411 { 1412 /* Relax CCTmode to CCZmode to allow fall-back to AND 1413 if that turns out to be beneficial. */ 1414 return ccmode == CCTmode ? CCZmode : ccmode; 1415 } 1416 } 1417 1418 if (register_operand (op0, HImode) 1419 && GET_CODE (op1) == CONST_INT 1420 && (INTVAL (op1) == -1 || INTVAL (op1) == 65535)) 1421 return CCT3mode; 1422 if (register_operand (op0, QImode) 1423 && GET_CODE (op1) == CONST_INT 1424 && (INTVAL (op1) == -1 || INTVAL (op1) == 255)) 1425 return CCT3mode; 1426 1427 return CCZmode; 1428 1429 case LE: 1430 case LT: 1431 case GE: 1432 case GT: 1433 /* The only overflow condition of NEG and ABS happens when 1434 -INT_MAX is used as parameter, which stays negative. So 1435 we have an overflow from a positive value to a negative. 1436 Using CCAP mode the resulting cc can be used for comparisons. */ 1437 if ((GET_CODE (op0) == NEG || GET_CODE (op0) == ABS) 1438 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT) 1439 return CCAPmode; 1440 1441 /* If constants are involved in an add instruction it is possible to use 1442 the resulting cc for comparisons with zero. Knowing the sign of the 1443 constant the overflow behavior gets predictable. e.g.: 1444 int a, b; if ((b = a + c) > 0) 1445 with c as a constant value: c < 0 -> CCAN and c >= 0 -> CCAP */ 1446 if (GET_CODE (op0) == PLUS && GET_CODE (XEXP (op0, 1)) == CONST_INT 1447 && (CONST_OK_FOR_K (INTVAL (XEXP (op0, 1))) 1448 || (CONST_OK_FOR_CONSTRAINT_P (INTVAL (XEXP (op0, 1)), 'O', "Os") 1449 /* Avoid INT32_MIN on 32 bit. */ 1450 && (!TARGET_ZARCH || INTVAL (XEXP (op0, 1)) != -0x7fffffff - 1)))) 1451 { 1452 if (INTVAL (XEXP((op0), 1)) < 0) 1453 return CCANmode; 1454 else 1455 return CCAPmode; 1456 } 1457 /* Fall through. */ 1458 case UNORDERED: 1459 case ORDERED: 1460 case UNEQ: 1461 case UNLE: 1462 case UNLT: 1463 case UNGE: 1464 case UNGT: 1465 case LTGT: 1466 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND) 1467 && GET_CODE (op1) != CONST_INT) 1468 return CCSRmode; 1469 return CCSmode; 1470 1471 case LTU: 1472 case GEU: 1473 if (GET_CODE (op0) == PLUS 1474 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT) 1475 return CCL1mode; 1476 1477 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND) 1478 && GET_CODE (op1) != CONST_INT) 1479 return CCURmode; 1480 return CCUmode; 1481 1482 case LEU: 1483 case GTU: 1484 if (GET_CODE (op0) == MINUS 1485 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT) 1486 return CCL2mode; 1487 1488 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND) 1489 && GET_CODE (op1) != CONST_INT) 1490 return CCURmode; 1491 return CCUmode; 1492 1493 default: 1494 gcc_unreachable (); 1495 } 1496} 1497 1498/* Replace the comparison OP0 CODE OP1 by a semantically equivalent one 1499 that we can implement more efficiently. */ 1500 1501static void 1502s390_canonicalize_comparison (int *code, rtx *op0, rtx *op1, 1503 bool op0_preserve_value) 1504{ 1505 if (op0_preserve_value) 1506 return; 1507 1508 /* Convert ZERO_EXTRACT back to AND to enable TM patterns. */ 1509 if ((*code == EQ || *code == NE) 1510 && *op1 == const0_rtx 1511 && GET_CODE (*op0) == ZERO_EXTRACT 1512 && GET_CODE (XEXP (*op0, 1)) == CONST_INT 1513 && GET_CODE (XEXP (*op0, 2)) == CONST_INT 1514 && SCALAR_INT_MODE_P (GET_MODE (XEXP (*op0, 0)))) 1515 { 1516 rtx inner = XEXP (*op0, 0); 1517 HOST_WIDE_INT modesize = GET_MODE_BITSIZE (GET_MODE (inner)); 1518 HOST_WIDE_INT len = INTVAL (XEXP (*op0, 1)); 1519 HOST_WIDE_INT pos = INTVAL (XEXP (*op0, 2)); 1520 1521 if (len > 0 && len < modesize 1522 && pos >= 0 && pos + len <= modesize 1523 && modesize <= HOST_BITS_PER_WIDE_INT) 1524 { 1525 unsigned HOST_WIDE_INT block; 1526 block = ((unsigned HOST_WIDE_INT) 1 << len) - 1; 1527 block <<= modesize - pos - len; 1528 1529 *op0 = gen_rtx_AND (GET_MODE (inner), inner, 1530 gen_int_mode (block, GET_MODE (inner))); 1531 } 1532 } 1533 1534 /* Narrow AND of memory against immediate to enable TM. */ 1535 if ((*code == EQ || *code == NE) 1536 && *op1 == const0_rtx 1537 && GET_CODE (*op0) == AND 1538 && GET_CODE (XEXP (*op0, 1)) == CONST_INT 1539 && SCALAR_INT_MODE_P (GET_MODE (XEXP (*op0, 0)))) 1540 { 1541 rtx inner = XEXP (*op0, 0); 1542 rtx mask = XEXP (*op0, 1); 1543 1544 /* Ignore paradoxical SUBREGs if all extra bits are masked out. */ 1545 if (GET_CODE (inner) == SUBREG 1546 && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (inner))) 1547 && (GET_MODE_SIZE (GET_MODE (inner)) 1548 >= GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner)))) 1549 && ((INTVAL (mask) 1550 & GET_MODE_MASK (GET_MODE (inner)) 1551 & ~GET_MODE_MASK (GET_MODE (SUBREG_REG (inner)))) 1552 == 0)) 1553 inner = SUBREG_REG (inner); 1554 1555 /* Do not change volatile MEMs. */ 1556 if (MEM_P (inner) && !MEM_VOLATILE_P (inner)) 1557 { 1558 int part = s390_single_part (XEXP (*op0, 1), 1559 GET_MODE (inner), QImode, 0); 1560 if (part >= 0) 1561 { 1562 mask = gen_int_mode (s390_extract_part (mask, QImode, 0), QImode); 1563 inner = adjust_address_nv (inner, QImode, part); 1564 *op0 = gen_rtx_AND (QImode, inner, mask); 1565 } 1566 } 1567 } 1568 1569 /* Narrow comparisons against 0xffff to HImode if possible. */ 1570 if ((*code == EQ || *code == NE) 1571 && GET_CODE (*op1) == CONST_INT 1572 && INTVAL (*op1) == 0xffff 1573 && SCALAR_INT_MODE_P (GET_MODE (*op0)) 1574 && (nonzero_bits (*op0, GET_MODE (*op0)) 1575 & ~(unsigned HOST_WIDE_INT) 0xffff) == 0) 1576 { 1577 *op0 = gen_lowpart (HImode, *op0); 1578 *op1 = constm1_rtx; 1579 } 1580 1581 /* Remove redundant UNSPEC_STRCMPCC_TO_INT conversions if possible. */ 1582 if (GET_CODE (*op0) == UNSPEC 1583 && XINT (*op0, 1) == UNSPEC_STRCMPCC_TO_INT 1584 && XVECLEN (*op0, 0) == 1 1585 && GET_MODE (XVECEXP (*op0, 0, 0)) == CCUmode 1586 && GET_CODE (XVECEXP (*op0, 0, 0)) == REG 1587 && REGNO (XVECEXP (*op0, 0, 0)) == CC_REGNUM 1588 && *op1 == const0_rtx) 1589 { 1590 enum rtx_code new_code = UNKNOWN; 1591 switch (*code) 1592 { 1593 case EQ: new_code = EQ; break; 1594 case NE: new_code = NE; break; 1595 case LT: new_code = GTU; break; 1596 case GT: new_code = LTU; break; 1597 case LE: new_code = GEU; break; 1598 case GE: new_code = LEU; break; 1599 default: break; 1600 } 1601 1602 if (new_code != UNKNOWN) 1603 { 1604 *op0 = XVECEXP (*op0, 0, 0); 1605 *code = new_code; 1606 } 1607 } 1608 1609 /* Remove redundant UNSPEC_CC_TO_INT conversions if possible. */ 1610 if (GET_CODE (*op0) == UNSPEC 1611 && XINT (*op0, 1) == UNSPEC_CC_TO_INT 1612 && XVECLEN (*op0, 0) == 1 1613 && GET_CODE (XVECEXP (*op0, 0, 0)) == REG 1614 && REGNO (XVECEXP (*op0, 0, 0)) == CC_REGNUM 1615 && CONST_INT_P (*op1)) 1616 { 1617 enum rtx_code new_code = UNKNOWN; 1618 switch (GET_MODE (XVECEXP (*op0, 0, 0))) 1619 { 1620 case CCZmode: 1621 case CCRAWmode: 1622 switch (*code) 1623 { 1624 case EQ: new_code = EQ; break; 1625 case NE: new_code = NE; break; 1626 default: break; 1627 } 1628 break; 1629 default: break; 1630 } 1631 1632 if (new_code != UNKNOWN) 1633 { 1634 /* For CCRAWmode put the required cc mask into the second 1635 operand. */ 1636 if (GET_MODE (XVECEXP (*op0, 0, 0)) == CCRAWmode 1637 && INTVAL (*op1) >= 0 && INTVAL (*op1) <= 3) 1638 *op1 = gen_rtx_CONST_INT (VOIDmode, 1 << (3 - INTVAL (*op1))); 1639 *op0 = XVECEXP (*op0, 0, 0); 1640 *code = new_code; 1641 } 1642 } 1643 1644 /* Simplify cascaded EQ, NE with const0_rtx. */ 1645 if ((*code == NE || *code == EQ) 1646 && (GET_CODE (*op0) == EQ || GET_CODE (*op0) == NE) 1647 && GET_MODE (*op0) == SImode 1648 && GET_MODE (XEXP (*op0, 0)) == CCZ1mode 1649 && REG_P (XEXP (*op0, 0)) 1650 && XEXP (*op0, 1) == const0_rtx 1651 && *op1 == const0_rtx) 1652 { 1653 if ((*code == EQ && GET_CODE (*op0) == NE) 1654 || (*code == NE && GET_CODE (*op0) == EQ)) 1655 *code = EQ; 1656 else 1657 *code = NE; 1658 *op0 = XEXP (*op0, 0); 1659 } 1660 1661 /* Prefer register over memory as first operand. */ 1662 if (MEM_P (*op0) && REG_P (*op1)) 1663 { 1664 rtx tem = *op0; *op0 = *op1; *op1 = tem; 1665 *code = (int)swap_condition ((enum rtx_code)*code); 1666 } 1667 1668 /* Using the scalar variants of vector instructions for 64 bit FP 1669 comparisons might require swapping the operands. */ 1670 if (TARGET_VX 1671 && register_operand (*op0, DFmode) 1672 && register_operand (*op1, DFmode) 1673 && (*code == LT || *code == LE || *code == UNGT || *code == UNGE)) 1674 { 1675 rtx tmp; 1676 1677 switch (*code) 1678 { 1679 case LT: *code = GT; break; 1680 case LE: *code = GE; break; 1681 case UNGT: *code = UNLE; break; 1682 case UNGE: *code = UNLT; break; 1683 default: ; 1684 } 1685 tmp = *op0; *op0 = *op1; *op1 = tmp; 1686 } 1687} 1688 1689/* Helper function for s390_emit_compare. If possible emit a 64 bit 1690 FP compare using the single element variant of vector instructions. 1691 Replace CODE with the comparison code to be used in the CC reg 1692 compare and return the condition code register RTX in CC. */ 1693 1694static bool 1695s390_expand_vec_compare_scalar (enum rtx_code *code, rtx cmp1, rtx cmp2, 1696 rtx *cc) 1697{ 1698 machine_mode cmp_mode; 1699 bool swap_p = false; 1700 1701 switch (*code) 1702 { 1703 case EQ: cmp_mode = CCVEQmode; break; 1704 case NE: cmp_mode = CCVEQmode; break; 1705 case GT: cmp_mode = CCVFHmode; break; 1706 case GE: cmp_mode = CCVFHEmode; break; 1707 case UNLE: cmp_mode = CCVFHmode; break; 1708 case UNLT: cmp_mode = CCVFHEmode; break; 1709 case LT: cmp_mode = CCVFHmode; *code = GT; swap_p = true; break; 1710 case LE: cmp_mode = CCVFHEmode; *code = GE; swap_p = true; break; 1711 case UNGE: cmp_mode = CCVFHmode; *code = UNLE; swap_p = true; break; 1712 case UNGT: cmp_mode = CCVFHEmode; *code = UNLT; swap_p = true; break; 1713 default: return false; 1714 } 1715 1716 if (swap_p) 1717 { 1718 rtx tmp = cmp2; 1719 cmp2 = cmp1; 1720 cmp1 = tmp; 1721 } 1722 *cc = gen_rtx_REG (cmp_mode, CC_REGNUM); 1723 emit_insn (gen_rtx_PARALLEL (VOIDmode, 1724 gen_rtvec (2, 1725 gen_rtx_SET (VOIDmode, *cc, 1726 gen_rtx_COMPARE (cmp_mode, cmp1, 1727 cmp2)), 1728 gen_rtx_CLOBBER (VOIDmode, 1729 gen_rtx_SCRATCH (V2DImode))))); 1730 return true; 1731} 1732 1733 1734/* Emit a compare instruction suitable to implement the comparison 1735 OP0 CODE OP1. Return the correct condition RTL to be placed in 1736 the IF_THEN_ELSE of the conditional branch testing the result. */ 1737 1738rtx 1739s390_emit_compare (enum rtx_code code, rtx op0, rtx op1) 1740{ 1741 machine_mode mode = s390_select_ccmode (code, op0, op1); 1742 rtx cc; 1743 1744 if (TARGET_VX 1745 && register_operand (op0, DFmode) 1746 && register_operand (op1, DFmode) 1747 && s390_expand_vec_compare_scalar (&code, op0, op1, &cc)) 1748 { 1749 /* Work has been done by s390_expand_vec_compare_scalar already. */ 1750 } 1751 else if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC) 1752 { 1753 /* Do not output a redundant compare instruction if a 1754 compare_and_swap pattern already computed the result and the 1755 machine modes are compatible. */ 1756 gcc_assert (s390_cc_modes_compatible (GET_MODE (op0), mode) 1757 == GET_MODE (op0)); 1758 cc = op0; 1759 } 1760 else 1761 { 1762 cc = gen_rtx_REG (mode, CC_REGNUM); 1763 emit_insn (gen_rtx_SET (VOIDmode, cc, gen_rtx_COMPARE (mode, op0, op1))); 1764 } 1765 1766 return gen_rtx_fmt_ee (code, VOIDmode, cc, const0_rtx); 1767} 1768 1769/* Emit a SImode compare and swap instruction setting MEM to NEW_RTX if OLD 1770 matches CMP. 1771 Return the correct condition RTL to be placed in the IF_THEN_ELSE of the 1772 conditional branch testing the result. */ 1773 1774static rtx 1775s390_emit_compare_and_swap (enum rtx_code code, rtx old, rtx mem, 1776 rtx cmp, rtx new_rtx) 1777{ 1778 emit_insn (gen_atomic_compare_and_swapsi_internal (old, mem, cmp, new_rtx)); 1779 return s390_emit_compare (code, gen_rtx_REG (CCZ1mode, CC_REGNUM), 1780 const0_rtx); 1781} 1782 1783/* Emit a jump instruction to TARGET and return it. If COND is 1784 NULL_RTX, emit an unconditional jump, else a conditional jump under 1785 condition COND. */ 1786 1787rtx_insn * 1788s390_emit_jump (rtx target, rtx cond) 1789{ 1790 rtx insn; 1791 1792 target = gen_rtx_LABEL_REF (VOIDmode, target); 1793 if (cond) 1794 target = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, target, pc_rtx); 1795 1796 insn = gen_rtx_SET (VOIDmode, pc_rtx, target); 1797 return emit_jump_insn (insn); 1798} 1799 1800/* Return branch condition mask to implement a branch 1801 specified by CODE. Return -1 for invalid comparisons. */ 1802 1803int 1804s390_branch_condition_mask (rtx code) 1805{ 1806 const int CC0 = 1 << 3; 1807 const int CC1 = 1 << 2; 1808 const int CC2 = 1 << 1; 1809 const int CC3 = 1 << 0; 1810 1811 gcc_assert (GET_CODE (XEXP (code, 0)) == REG); 1812 gcc_assert (REGNO (XEXP (code, 0)) == CC_REGNUM); 1813 gcc_assert (XEXP (code, 1) == const0_rtx 1814 || (GET_MODE (XEXP (code, 0)) == CCRAWmode 1815 && CONST_INT_P (XEXP (code, 1)))); 1816 1817 1818 switch (GET_MODE (XEXP (code, 0))) 1819 { 1820 case CCZmode: 1821 case CCZ1mode: 1822 switch (GET_CODE (code)) 1823 { 1824 case EQ: return CC0; 1825 case NE: return CC1 | CC2 | CC3; 1826 default: return -1; 1827 } 1828 break; 1829 1830 case CCT1mode: 1831 switch (GET_CODE (code)) 1832 { 1833 case EQ: return CC1; 1834 case NE: return CC0 | CC2 | CC3; 1835 default: return -1; 1836 } 1837 break; 1838 1839 case CCT2mode: 1840 switch (GET_CODE (code)) 1841 { 1842 case EQ: return CC2; 1843 case NE: return CC0 | CC1 | CC3; 1844 default: return -1; 1845 } 1846 break; 1847 1848 case CCT3mode: 1849 switch (GET_CODE (code)) 1850 { 1851 case EQ: return CC3; 1852 case NE: return CC0 | CC1 | CC2; 1853 default: return -1; 1854 } 1855 break; 1856 1857 case CCLmode: 1858 switch (GET_CODE (code)) 1859 { 1860 case EQ: return CC0 | CC2; 1861 case NE: return CC1 | CC3; 1862 default: return -1; 1863 } 1864 break; 1865 1866 case CCL1mode: 1867 switch (GET_CODE (code)) 1868 { 1869 case LTU: return CC2 | CC3; /* carry */ 1870 case GEU: return CC0 | CC1; /* no carry */ 1871 default: return -1; 1872 } 1873 break; 1874 1875 case CCL2mode: 1876 switch (GET_CODE (code)) 1877 { 1878 case GTU: return CC0 | CC1; /* borrow */ 1879 case LEU: return CC2 | CC3; /* no borrow */ 1880 default: return -1; 1881 } 1882 break; 1883 1884 case CCL3mode: 1885 switch (GET_CODE (code)) 1886 { 1887 case EQ: return CC0 | CC2; 1888 case NE: return CC1 | CC3; 1889 case LTU: return CC1; 1890 case GTU: return CC3; 1891 case LEU: return CC1 | CC2; 1892 case GEU: return CC2 | CC3; 1893 default: return -1; 1894 } 1895 1896 case CCUmode: 1897 switch (GET_CODE (code)) 1898 { 1899 case EQ: return CC0; 1900 case NE: return CC1 | CC2 | CC3; 1901 case LTU: return CC1; 1902 case GTU: return CC2; 1903 case LEU: return CC0 | CC1; 1904 case GEU: return CC0 | CC2; 1905 default: return -1; 1906 } 1907 break; 1908 1909 case CCURmode: 1910 switch (GET_CODE (code)) 1911 { 1912 case EQ: return CC0; 1913 case NE: return CC2 | CC1 | CC3; 1914 case LTU: return CC2; 1915 case GTU: return CC1; 1916 case LEU: return CC0 | CC2; 1917 case GEU: return CC0 | CC1; 1918 default: return -1; 1919 } 1920 break; 1921 1922 case CCAPmode: 1923 switch (GET_CODE (code)) 1924 { 1925 case EQ: return CC0; 1926 case NE: return CC1 | CC2 | CC3; 1927 case LT: return CC1 | CC3; 1928 case GT: return CC2; 1929 case LE: return CC0 | CC1 | CC3; 1930 case GE: return CC0 | CC2; 1931 default: return -1; 1932 } 1933 break; 1934 1935 case CCANmode: 1936 switch (GET_CODE (code)) 1937 { 1938 case EQ: return CC0; 1939 case NE: return CC1 | CC2 | CC3; 1940 case LT: return CC1; 1941 case GT: return CC2 | CC3; 1942 case LE: return CC0 | CC1; 1943 case GE: return CC0 | CC2 | CC3; 1944 default: return -1; 1945 } 1946 break; 1947 1948 case CCSmode: 1949 switch (GET_CODE (code)) 1950 { 1951 case EQ: return CC0; 1952 case NE: return CC1 | CC2 | CC3; 1953 case LT: return CC1; 1954 case GT: return CC2; 1955 case LE: return CC0 | CC1; 1956 case GE: return CC0 | CC2; 1957 case UNORDERED: return CC3; 1958 case ORDERED: return CC0 | CC1 | CC2; 1959 case UNEQ: return CC0 | CC3; 1960 case UNLT: return CC1 | CC3; 1961 case UNGT: return CC2 | CC3; 1962 case UNLE: return CC0 | CC1 | CC3; 1963 case UNGE: return CC0 | CC2 | CC3; 1964 case LTGT: return CC1 | CC2; 1965 default: return -1; 1966 } 1967 break; 1968 1969 case CCSRmode: 1970 switch (GET_CODE (code)) 1971 { 1972 case EQ: return CC0; 1973 case NE: return CC2 | CC1 | CC3; 1974 case LT: return CC2; 1975 case GT: return CC1; 1976 case LE: return CC0 | CC2; 1977 case GE: return CC0 | CC1; 1978 case UNORDERED: return CC3; 1979 case ORDERED: return CC0 | CC2 | CC1; 1980 case UNEQ: return CC0 | CC3; 1981 case UNLT: return CC2 | CC3; 1982 case UNGT: return CC1 | CC3; 1983 case UNLE: return CC0 | CC2 | CC3; 1984 case UNGE: return CC0 | CC1 | CC3; 1985 case LTGT: return CC2 | CC1; 1986 default: return -1; 1987 } 1988 break; 1989 1990 /* Vector comparison modes. */ 1991 1992 case CCVEQmode: 1993 switch (GET_CODE (code)) 1994 { 1995 case EQ: return CC0; 1996 case NE: return CC3; 1997 default: return -1; 1998 } 1999 2000 case CCVEQANYmode: 2001 switch (GET_CODE (code)) 2002 { 2003 case EQ: return CC0 | CC1; 2004 case NE: return CC3 | CC1; 2005 default: return -1; 2006 } 2007 2008 /* Integer vector compare modes. */ 2009 2010 case CCVHmode: 2011 switch (GET_CODE (code)) 2012 { 2013 case GT: return CC0; 2014 case LE: return CC3; 2015 default: return -1; 2016 } 2017 2018 case CCVHANYmode: 2019 switch (GET_CODE (code)) 2020 { 2021 case GT: return CC0 | CC1; 2022 case LE: return CC3 | CC1; 2023 default: return -1; 2024 } 2025 2026 case CCVHUmode: 2027 switch (GET_CODE (code)) 2028 { 2029 case GTU: return CC0; 2030 case LEU: return CC3; 2031 default: return -1; 2032 } 2033 2034 case CCVHUANYmode: 2035 switch (GET_CODE (code)) 2036 { 2037 case GTU: return CC0 | CC1; 2038 case LEU: return CC3 | CC1; 2039 default: return -1; 2040 } 2041 2042 /* FP vector compare modes. */ 2043 2044 case CCVFHmode: 2045 switch (GET_CODE (code)) 2046 { 2047 case GT: return CC0; 2048 case UNLE: return CC3; 2049 default: return -1; 2050 } 2051 2052 case CCVFHANYmode: 2053 switch (GET_CODE (code)) 2054 { 2055 case GT: return CC0 | CC1; 2056 case UNLE: return CC3 | CC1; 2057 default: return -1; 2058 } 2059 2060 case CCVFHEmode: 2061 switch (GET_CODE (code)) 2062 { 2063 case GE: return CC0; 2064 case UNLT: return CC3; 2065 default: return -1; 2066 } 2067 2068 case CCVFHEANYmode: 2069 switch (GET_CODE (code)) 2070 { 2071 case GE: return CC0 | CC1; 2072 case UNLT: return CC3 | CC1; 2073 default: return -1; 2074 } 2075 2076 2077 case CCRAWmode: 2078 switch (GET_CODE (code)) 2079 { 2080 case EQ: 2081 return INTVAL (XEXP (code, 1)); 2082 case NE: 2083 return (INTVAL (XEXP (code, 1))) ^ 0xf; 2084 default: 2085 gcc_unreachable (); 2086 } 2087 2088 default: 2089 return -1; 2090 } 2091} 2092 2093 2094/* Return branch condition mask to implement a compare and branch 2095 specified by CODE. Return -1 for invalid comparisons. */ 2096 2097int 2098s390_compare_and_branch_condition_mask (rtx code) 2099{ 2100 const int CC0 = 1 << 3; 2101 const int CC1 = 1 << 2; 2102 const int CC2 = 1 << 1; 2103 2104 switch (GET_CODE (code)) 2105 { 2106 case EQ: 2107 return CC0; 2108 case NE: 2109 return CC1 | CC2; 2110 case LT: 2111 case LTU: 2112 return CC1; 2113 case GT: 2114 case GTU: 2115 return CC2; 2116 case LE: 2117 case LEU: 2118 return CC0 | CC1; 2119 case GE: 2120 case GEU: 2121 return CC0 | CC2; 2122 default: 2123 gcc_unreachable (); 2124 } 2125 return -1; 2126} 2127 2128/* If INV is false, return assembler mnemonic string to implement 2129 a branch specified by CODE. If INV is true, return mnemonic 2130 for the corresponding inverted branch. */ 2131 2132static const char * 2133s390_branch_condition_mnemonic (rtx code, int inv) 2134{ 2135 int mask; 2136 2137 static const char *const mnemonic[16] = 2138 { 2139 NULL, "o", "h", "nle", 2140 "l", "nhe", "lh", "ne", 2141 "e", "nlh", "he", "nl", 2142 "le", "nh", "no", NULL 2143 }; 2144 2145 if (GET_CODE (XEXP (code, 0)) == REG 2146 && REGNO (XEXP (code, 0)) == CC_REGNUM 2147 && (XEXP (code, 1) == const0_rtx 2148 || (GET_MODE (XEXP (code, 0)) == CCRAWmode 2149 && CONST_INT_P (XEXP (code, 1))))) 2150 mask = s390_branch_condition_mask (code); 2151 else 2152 mask = s390_compare_and_branch_condition_mask (code); 2153 2154 gcc_assert (mask >= 0); 2155 2156 if (inv) 2157 mask ^= 15; 2158 2159 gcc_assert (mask >= 1 && mask <= 14); 2160 2161 return mnemonic[mask]; 2162} 2163 2164/* Return the part of op which has a value different from def. 2165 The size of the part is determined by mode. 2166 Use this function only if you already know that op really 2167 contains such a part. */ 2168 2169unsigned HOST_WIDE_INT 2170s390_extract_part (rtx op, machine_mode mode, int def) 2171{ 2172 unsigned HOST_WIDE_INT value = 0; 2173 int max_parts = HOST_BITS_PER_WIDE_INT / GET_MODE_BITSIZE (mode); 2174 int part_bits = GET_MODE_BITSIZE (mode); 2175 unsigned HOST_WIDE_INT part_mask 2176 = ((unsigned HOST_WIDE_INT)1 << part_bits) - 1; 2177 int i; 2178 2179 for (i = 0; i < max_parts; i++) 2180 { 2181 if (i == 0) 2182 value = (unsigned HOST_WIDE_INT) INTVAL (op); 2183 else 2184 value >>= part_bits; 2185 2186 if ((value & part_mask) != (def & part_mask)) 2187 return value & part_mask; 2188 } 2189 2190 gcc_unreachable (); 2191} 2192 2193/* If OP is an integer constant of mode MODE with exactly one 2194 part of mode PART_MODE unequal to DEF, return the number of that 2195 part. Otherwise, return -1. */ 2196 2197int 2198s390_single_part (rtx op, 2199 machine_mode mode, 2200 machine_mode part_mode, 2201 int def) 2202{ 2203 unsigned HOST_WIDE_INT value = 0; 2204 int n_parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (part_mode); 2205 unsigned HOST_WIDE_INT part_mask 2206 = ((unsigned HOST_WIDE_INT)1 << GET_MODE_BITSIZE (part_mode)) - 1; 2207 int i, part = -1; 2208 2209 if (GET_CODE (op) != CONST_INT) 2210 return -1; 2211 2212 for (i = 0; i < n_parts; i++) 2213 { 2214 if (i == 0) 2215 value = (unsigned HOST_WIDE_INT) INTVAL (op); 2216 else 2217 value >>= GET_MODE_BITSIZE (part_mode); 2218 2219 if ((value & part_mask) != (def & part_mask)) 2220 { 2221 if (part != -1) 2222 return -1; 2223 else 2224 part = i; 2225 } 2226 } 2227 return part == -1 ? -1 : n_parts - 1 - part; 2228} 2229 2230/* Return true if IN contains a contiguous bitfield in the lower SIZE 2231 bits and no other bits are set in IN. POS and LENGTH can be used 2232 to obtain the start position and the length of the bitfield. 2233 2234 POS gives the position of the first bit of the bitfield counting 2235 from the lowest order bit starting with zero. In order to use this 2236 value for S/390 instructions this has to be converted to "bits big 2237 endian" style. */ 2238 2239bool 2240s390_contiguous_bitmask_p (unsigned HOST_WIDE_INT in, int size, 2241 int *pos, int *length) 2242{ 2243 int tmp_pos = 0; 2244 int tmp_length = 0; 2245 int i; 2246 unsigned HOST_WIDE_INT mask = 1ULL; 2247 bool contiguous = false; 2248 2249 for (i = 0; i < size; mask <<= 1, i++) 2250 { 2251 if (contiguous) 2252 { 2253 if (mask & in) 2254 tmp_length++; 2255 else 2256 break; 2257 } 2258 else 2259 { 2260 if (mask & in) 2261 { 2262 contiguous = true; 2263 tmp_length++; 2264 } 2265 else 2266 tmp_pos++; 2267 } 2268 } 2269 2270 if (!tmp_length) 2271 return false; 2272 2273 /* Calculate a mask for all bits beyond the contiguous bits. */ 2274 mask = (-1LL & ~(((1ULL << (tmp_length + tmp_pos - 1)) << 1) - 1)); 2275 2276 if ((unsigned)size < sizeof (HOST_WIDE_INT) * BITS_PER_UNIT) 2277 mask &= (HOST_WIDE_INT_1U << size) - 1; 2278 2279 if (mask & in) 2280 return false; 2281 2282 if (tmp_length + tmp_pos - 1 > size) 2283 return false; 2284 2285 if (length) 2286 *length = tmp_length; 2287 2288 if (pos) 2289 *pos = tmp_pos; 2290 2291 return true; 2292} 2293 2294bool 2295s390_const_vec_duplicate_p (rtx op) 2296{ 2297 if (!VECTOR_MODE_P (GET_MODE (op)) 2298 || GET_CODE (op) != CONST_VECTOR 2299 || !CONST_INT_P (XVECEXP (op, 0, 0))) 2300 return false; 2301 2302 if (GET_MODE_NUNITS (GET_MODE (op)) > 1) 2303 { 2304 int i; 2305 2306 for (i = 1; i < GET_MODE_NUNITS (GET_MODE (op)); ++i) 2307 if (!rtx_equal_p (XVECEXP (op, 0, i), XVECEXP (op, 0, 0))) 2308 return false; 2309 } 2310 return true; 2311} 2312/* Return true if OP contains the same contiguous bitfield in *all* 2313 its elements. START and END can be used to obtain the start and 2314 end position of the bitfield. 2315 2316 START/STOP give the position of the first/last bit of the bitfield 2317 counting from the lowest order bit starting with zero. In order to 2318 use these values for S/390 instructions this has to be converted to 2319 "bits big endian" style. */ 2320 2321bool 2322s390_contiguous_bitmask_vector_p (rtx op, int *start, int *end) 2323{ 2324 unsigned HOST_WIDE_INT mask; 2325 int length, size; 2326 2327 if (!s390_const_vec_duplicate_p (op)) 2328 return false; 2329 2330 size = GET_MODE_UNIT_BITSIZE (GET_MODE (op)); 2331 mask = UINTVAL (XVECEXP (op, 0, 0)); 2332 if (s390_contiguous_bitmask_p (mask, size, start, 2333 end != NULL ? &length : NULL)) 2334 { 2335 if (end != NULL) 2336 *end = *start + length - 1; 2337 return true; 2338 } 2339 /* 0xff00000f style immediates can be covered by swapping start and 2340 end indices in vgm. */ 2341 if (s390_contiguous_bitmask_p (~mask, size, start, 2342 end != NULL ? &length : NULL)) 2343 { 2344 if (end != NULL) 2345 *end = *start - 1; 2346 if (start != NULL) 2347 *start = *start + length; 2348 return true; 2349 } 2350 return false; 2351} 2352 2353/* Return true if C consists only of byte chunks being either 0 or 2354 0xff. If MASK is !=NULL a byte mask is generated which is 2355 appropriate for the vector generate byte mask instruction. */ 2356 2357bool 2358s390_bytemask_vector_p (rtx op, unsigned *mask) 2359{ 2360 int i; 2361 unsigned tmp_mask = 0; 2362 int nunit, unit_size; 2363 2364 if (!VECTOR_MODE_P (GET_MODE (op)) 2365 || GET_CODE (op) != CONST_VECTOR 2366 || !CONST_INT_P (XVECEXP (op, 0, 0))) 2367 return false; 2368 2369 nunit = GET_MODE_NUNITS (GET_MODE (op)); 2370 unit_size = GET_MODE_UNIT_SIZE (GET_MODE (op)); 2371 2372 for (i = 0; i < nunit; i++) 2373 { 2374 unsigned HOST_WIDE_INT c; 2375 int j; 2376 2377 if (!CONST_INT_P (XVECEXP (op, 0, i))) 2378 return false; 2379 2380 c = UINTVAL (XVECEXP (op, 0, i)); 2381 for (j = 0; j < unit_size; j++) 2382 { 2383 if ((c & 0xff) != 0 && (c & 0xff) != 0xff) 2384 return false; 2385 tmp_mask |= (c & 1) << ((nunit - 1 - i) * unit_size + j); 2386 c = c >> BITS_PER_UNIT; 2387 } 2388 } 2389 2390 if (mask != NULL) 2391 *mask = tmp_mask; 2392 2393 return true; 2394} 2395 2396/* Check whether a rotate of ROTL followed by an AND of CONTIG is 2397 equivalent to a shift followed by the AND. In particular, CONTIG 2398 should not overlap the (rotated) bit 0/bit 63 gap. Negative values 2399 for ROTL indicate a rotate to the right. */ 2400 2401bool 2402s390_extzv_shift_ok (int bitsize, int rotl, unsigned HOST_WIDE_INT contig) 2403{ 2404 int pos, len; 2405 bool ok; 2406 2407 ok = s390_contiguous_bitmask_p (contig, bitsize, &pos, &len); 2408 gcc_assert (ok); 2409 2410 return ((rotl >= 0 && rotl <= pos) 2411 || (rotl < 0 && -rotl <= bitsize - len - pos)); 2412} 2413 2414/* Check whether we can (and want to) split a double-word 2415 move in mode MODE from SRC to DST into two single-word 2416 moves, moving the subword FIRST_SUBWORD first. */ 2417 2418bool 2419s390_split_ok_p (rtx dst, rtx src, machine_mode mode, int first_subword) 2420{ 2421 /* Floating point and vector registers cannot be split. */ 2422 if (FP_REG_P (src) || FP_REG_P (dst) || VECTOR_REG_P (src) || VECTOR_REG_P (dst)) 2423 return false; 2424 2425 /* We don't need to split if operands are directly accessible. */ 2426 if (s_operand (src, mode) || s_operand (dst, mode)) 2427 return false; 2428 2429 /* Non-offsettable memory references cannot be split. */ 2430 if ((GET_CODE (src) == MEM && !offsettable_memref_p (src)) 2431 || (GET_CODE (dst) == MEM && !offsettable_memref_p (dst))) 2432 return false; 2433 2434 /* Moving the first subword must not clobber a register 2435 needed to move the second subword. */ 2436 if (register_operand (dst, mode)) 2437 { 2438 rtx subreg = operand_subword (dst, first_subword, 0, mode); 2439 if (reg_overlap_mentioned_p (subreg, src)) 2440 return false; 2441 } 2442 2443 return true; 2444} 2445 2446/* Return true if it can be proven that [MEM1, MEM1 + SIZE] 2447 and [MEM2, MEM2 + SIZE] do overlap and false 2448 otherwise. */ 2449 2450bool 2451s390_overlap_p (rtx mem1, rtx mem2, HOST_WIDE_INT size) 2452{ 2453 rtx addr1, addr2, addr_delta; 2454 HOST_WIDE_INT delta; 2455 2456 if (GET_CODE (mem1) != MEM || GET_CODE (mem2) != MEM) 2457 return true; 2458 2459 if (size == 0) 2460 return false; 2461 2462 addr1 = XEXP (mem1, 0); 2463 addr2 = XEXP (mem2, 0); 2464 2465 addr_delta = simplify_binary_operation (MINUS, Pmode, addr2, addr1); 2466 2467 /* This overlapping check is used by peepholes merging memory block operations. 2468 Overlapping operations would otherwise be recognized by the S/390 hardware 2469 and would fall back to a slower implementation. Allowing overlapping 2470 operations would lead to slow code but not to wrong code. Therefore we are 2471 somewhat optimistic if we cannot prove that the memory blocks are 2472 overlapping. 2473 That's why we return false here although this may accept operations on 2474 overlapping memory areas. */ 2475 if (!addr_delta || GET_CODE (addr_delta) != CONST_INT) 2476 return false; 2477 2478 delta = INTVAL (addr_delta); 2479 2480 if (delta == 0 2481 || (delta > 0 && delta < size) 2482 || (delta < 0 && -delta < size)) 2483 return true; 2484 2485 return false; 2486} 2487 2488/* Check whether the address of memory reference MEM2 equals exactly 2489 the address of memory reference MEM1 plus DELTA. Return true if 2490 we can prove this to be the case, false otherwise. */ 2491 2492bool 2493s390_offset_p (rtx mem1, rtx mem2, rtx delta) 2494{ 2495 rtx addr1, addr2, addr_delta; 2496 2497 if (GET_CODE (mem1) != MEM || GET_CODE (mem2) != MEM) 2498 return false; 2499 2500 addr1 = XEXP (mem1, 0); 2501 addr2 = XEXP (mem2, 0); 2502 2503 addr_delta = simplify_binary_operation (MINUS, Pmode, addr2, addr1); 2504 if (!addr_delta || !rtx_equal_p (addr_delta, delta)) 2505 return false; 2506 2507 return true; 2508} 2509 2510/* Expand logical operator CODE in mode MODE with operands OPERANDS. */ 2511 2512void 2513s390_expand_logical_operator (enum rtx_code code, machine_mode mode, 2514 rtx *operands) 2515{ 2516 machine_mode wmode = mode; 2517 rtx dst = operands[0]; 2518 rtx src1 = operands[1]; 2519 rtx src2 = operands[2]; 2520 rtx op, clob, tem; 2521 2522 /* If we cannot handle the operation directly, use a temp register. */ 2523 if (!s390_logical_operator_ok_p (operands)) 2524 dst = gen_reg_rtx (mode); 2525 2526 /* QImode and HImode patterns make sense only if we have a destination 2527 in memory. Otherwise perform the operation in SImode. */ 2528 if ((mode == QImode || mode == HImode) && GET_CODE (dst) != MEM) 2529 wmode = SImode; 2530 2531 /* Widen operands if required. */ 2532 if (mode != wmode) 2533 { 2534 if (GET_CODE (dst) == SUBREG 2535 && (tem = simplify_subreg (wmode, dst, mode, 0)) != 0) 2536 dst = tem; 2537 else if (REG_P (dst)) 2538 dst = gen_rtx_SUBREG (wmode, dst, 0); 2539 else 2540 dst = gen_reg_rtx (wmode); 2541 2542 if (GET_CODE (src1) == SUBREG 2543 && (tem = simplify_subreg (wmode, src1, mode, 0)) != 0) 2544 src1 = tem; 2545 else if (GET_MODE (src1) != VOIDmode) 2546 src1 = gen_rtx_SUBREG (wmode, force_reg (mode, src1), 0); 2547 2548 if (GET_CODE (src2) == SUBREG 2549 && (tem = simplify_subreg (wmode, src2, mode, 0)) != 0) 2550 src2 = tem; 2551 else if (GET_MODE (src2) != VOIDmode) 2552 src2 = gen_rtx_SUBREG (wmode, force_reg (mode, src2), 0); 2553 } 2554 2555 /* Emit the instruction. */ 2556 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, wmode, src1, src2)); 2557 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM)); 2558 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob))); 2559 2560 /* Fix up the destination if needed. */ 2561 if (dst != operands[0]) 2562 emit_move_insn (operands[0], gen_lowpart (mode, dst)); 2563} 2564 2565/* Check whether OPERANDS are OK for a logical operation (AND, IOR, XOR). */ 2566 2567bool 2568s390_logical_operator_ok_p (rtx *operands) 2569{ 2570 /* If the destination operand is in memory, it needs to coincide 2571 with one of the source operands. After reload, it has to be 2572 the first source operand. */ 2573 if (GET_CODE (operands[0]) == MEM) 2574 return rtx_equal_p (operands[0], operands[1]) 2575 || (!reload_completed && rtx_equal_p (operands[0], operands[2])); 2576 2577 return true; 2578} 2579 2580/* Narrow logical operation CODE of memory operand MEMOP with immediate 2581 operand IMMOP to switch from SS to SI type instructions. */ 2582 2583void 2584s390_narrow_logical_operator (enum rtx_code code, rtx *memop, rtx *immop) 2585{ 2586 int def = code == AND ? -1 : 0; 2587 HOST_WIDE_INT mask; 2588 int part; 2589 2590 gcc_assert (GET_CODE (*memop) == MEM); 2591 gcc_assert (!MEM_VOLATILE_P (*memop)); 2592 2593 mask = s390_extract_part (*immop, QImode, def); 2594 part = s390_single_part (*immop, GET_MODE (*memop), QImode, def); 2595 gcc_assert (part >= 0); 2596 2597 *memop = adjust_address (*memop, QImode, part); 2598 *immop = gen_int_mode (mask, QImode); 2599} 2600 2601 2602/* How to allocate a 'struct machine_function'. */ 2603 2604static struct machine_function * 2605s390_init_machine_status (void) 2606{ 2607 return ggc_cleared_alloc<machine_function> (); 2608} 2609 2610/* Map for smallest class containing reg regno. */ 2611 2612const enum reg_class regclass_map[FIRST_PSEUDO_REGISTER] = 2613{ GENERAL_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 0 */ 2614 ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 4 */ 2615 ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 8 */ 2616 ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 12 */ 2617 FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 16 */ 2618 FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 20 */ 2619 FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 24 */ 2620 FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 28 */ 2621 ADDR_REGS, CC_REGS, ADDR_REGS, ADDR_REGS, /* 32 */ 2622 ACCESS_REGS, ACCESS_REGS, VEC_REGS, VEC_REGS, /* 36 */ 2623 VEC_REGS, VEC_REGS, VEC_REGS, VEC_REGS, /* 40 */ 2624 VEC_REGS, VEC_REGS, VEC_REGS, VEC_REGS, /* 44 */ 2625 VEC_REGS, VEC_REGS, VEC_REGS, VEC_REGS, /* 48 */ 2626 VEC_REGS, VEC_REGS /* 52 */ 2627}; 2628 2629/* Return attribute type of insn. */ 2630 2631static enum attr_type 2632s390_safe_attr_type (rtx_insn *insn) 2633{ 2634 if (recog_memoized (insn) >= 0) 2635 return get_attr_type (insn); 2636 else 2637 return TYPE_NONE; 2638} 2639 2640/* Return true if DISP is a valid short displacement. */ 2641 2642static bool 2643s390_short_displacement (rtx disp) 2644{ 2645 /* No displacement is OK. */ 2646 if (!disp) 2647 return true; 2648 2649 /* Without the long displacement facility we don't need to 2650 distingiush between long and short displacement. */ 2651 if (!TARGET_LONG_DISPLACEMENT) 2652 return true; 2653 2654 /* Integer displacement in range. */ 2655 if (GET_CODE (disp) == CONST_INT) 2656 return INTVAL (disp) >= 0 && INTVAL (disp) < 4096; 2657 2658 /* GOT offset is not OK, the GOT can be large. */ 2659 if (GET_CODE (disp) == CONST 2660 && GET_CODE (XEXP (disp, 0)) == UNSPEC 2661 && (XINT (XEXP (disp, 0), 1) == UNSPEC_GOT 2662 || XINT (XEXP (disp, 0), 1) == UNSPEC_GOTNTPOFF)) 2663 return false; 2664 2665 /* All other symbolic constants are literal pool references, 2666 which are OK as the literal pool must be small. */ 2667 if (GET_CODE (disp) == CONST) 2668 return true; 2669 2670 return false; 2671} 2672 2673/* Decompose a RTL expression ADDR for a memory address into 2674 its components, returned in OUT. 2675 2676 Returns false if ADDR is not a valid memory address, true 2677 otherwise. If OUT is NULL, don't return the components, 2678 but check for validity only. 2679 2680 Note: Only addresses in canonical form are recognized. 2681 LEGITIMIZE_ADDRESS should convert non-canonical forms to the 2682 canonical form so that they will be recognized. */ 2683 2684static int 2685s390_decompose_address (rtx addr, struct s390_address *out) 2686{ 2687 HOST_WIDE_INT offset = 0; 2688 rtx base = NULL_RTX; 2689 rtx indx = NULL_RTX; 2690 rtx disp = NULL_RTX; 2691 rtx orig_disp; 2692 bool pointer = false; 2693 bool base_ptr = false; 2694 bool indx_ptr = false; 2695 bool literal_pool = false; 2696 2697 /* We may need to substitute the literal pool base register into the address 2698 below. However, at this point we do not know which register is going to 2699 be used as base, so we substitute the arg pointer register. This is going 2700 to be treated as holding a pointer below -- it shouldn't be used for any 2701 other purpose. */ 2702 rtx fake_pool_base = gen_rtx_REG (Pmode, ARG_POINTER_REGNUM); 2703 2704 /* Decompose address into base + index + displacement. */ 2705 2706 if (GET_CODE (addr) == REG || GET_CODE (addr) == UNSPEC) 2707 base = addr; 2708 2709 else if (GET_CODE (addr) == PLUS) 2710 { 2711 rtx op0 = XEXP (addr, 0); 2712 rtx op1 = XEXP (addr, 1); 2713 enum rtx_code code0 = GET_CODE (op0); 2714 enum rtx_code code1 = GET_CODE (op1); 2715 2716 if (code0 == REG || code0 == UNSPEC) 2717 { 2718 if (code1 == REG || code1 == UNSPEC) 2719 { 2720 indx = op0; /* index + base */ 2721 base = op1; 2722 } 2723 2724 else 2725 { 2726 base = op0; /* base + displacement */ 2727 disp = op1; 2728 } 2729 } 2730 2731 else if (code0 == PLUS) 2732 { 2733 indx = XEXP (op0, 0); /* index + base + disp */ 2734 base = XEXP (op0, 1); 2735 disp = op1; 2736 } 2737 2738 else 2739 { 2740 return false; 2741 } 2742 } 2743 2744 else 2745 disp = addr; /* displacement */ 2746 2747 /* Extract integer part of displacement. */ 2748 orig_disp = disp; 2749 if (disp) 2750 { 2751 if (GET_CODE (disp) == CONST_INT) 2752 { 2753 offset = INTVAL (disp); 2754 disp = NULL_RTX; 2755 } 2756 else if (GET_CODE (disp) == CONST 2757 && GET_CODE (XEXP (disp, 0)) == PLUS 2758 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT) 2759 { 2760 offset = INTVAL (XEXP (XEXP (disp, 0), 1)); 2761 disp = XEXP (XEXP (disp, 0), 0); 2762 } 2763 } 2764 2765 /* Strip off CONST here to avoid special case tests later. */ 2766 if (disp && GET_CODE (disp) == CONST) 2767 disp = XEXP (disp, 0); 2768 2769 /* We can convert literal pool addresses to 2770 displacements by basing them off the base register. */ 2771 if (disp && GET_CODE (disp) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (disp)) 2772 { 2773 /* Either base or index must be free to hold the base register. */ 2774 if (!base) 2775 base = fake_pool_base, literal_pool = true; 2776 else if (!indx) 2777 indx = fake_pool_base, literal_pool = true; 2778 else 2779 return false; 2780 2781 /* Mark up the displacement. */ 2782 disp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, disp), 2783 UNSPEC_LTREL_OFFSET); 2784 } 2785 2786 /* Validate base register. */ 2787 if (base) 2788 { 2789 if (GET_CODE (base) == UNSPEC) 2790 switch (XINT (base, 1)) 2791 { 2792 case UNSPEC_LTREF: 2793 if (!disp) 2794 disp = gen_rtx_UNSPEC (Pmode, 2795 gen_rtvec (1, XVECEXP (base, 0, 0)), 2796 UNSPEC_LTREL_OFFSET); 2797 else 2798 return false; 2799 2800 base = XVECEXP (base, 0, 1); 2801 break; 2802 2803 case UNSPEC_LTREL_BASE: 2804 if (XVECLEN (base, 0) == 1) 2805 base = fake_pool_base, literal_pool = true; 2806 else 2807 base = XVECEXP (base, 0, 1); 2808 break; 2809 2810 default: 2811 return false; 2812 } 2813 2814 if (!REG_P (base) 2815 || (GET_MODE (base) != SImode 2816 && GET_MODE (base) != Pmode)) 2817 return false; 2818 2819 if (REGNO (base) == STACK_POINTER_REGNUM 2820 || REGNO (base) == FRAME_POINTER_REGNUM 2821 || ((reload_completed || reload_in_progress) 2822 && frame_pointer_needed 2823 && REGNO (base) == HARD_FRAME_POINTER_REGNUM) 2824 || REGNO (base) == ARG_POINTER_REGNUM 2825 || (flag_pic 2826 && REGNO (base) == PIC_OFFSET_TABLE_REGNUM)) 2827 pointer = base_ptr = true; 2828 2829 if ((reload_completed || reload_in_progress) 2830 && base == cfun->machine->base_reg) 2831 pointer = base_ptr = literal_pool = true; 2832 } 2833 2834 /* Validate index register. */ 2835 if (indx) 2836 { 2837 if (GET_CODE (indx) == UNSPEC) 2838 switch (XINT (indx, 1)) 2839 { 2840 case UNSPEC_LTREF: 2841 if (!disp) 2842 disp = gen_rtx_UNSPEC (Pmode, 2843 gen_rtvec (1, XVECEXP (indx, 0, 0)), 2844 UNSPEC_LTREL_OFFSET); 2845 else 2846 return false; 2847 2848 indx = XVECEXP (indx, 0, 1); 2849 break; 2850 2851 case UNSPEC_LTREL_BASE: 2852 if (XVECLEN (indx, 0) == 1) 2853 indx = fake_pool_base, literal_pool = true; 2854 else 2855 indx = XVECEXP (indx, 0, 1); 2856 break; 2857 2858 default: 2859 return false; 2860 } 2861 2862 if (!REG_P (indx) 2863 || (GET_MODE (indx) != SImode 2864 && GET_MODE (indx) != Pmode)) 2865 return false; 2866 2867 if (REGNO (indx) == STACK_POINTER_REGNUM 2868 || REGNO (indx) == FRAME_POINTER_REGNUM 2869 || ((reload_completed || reload_in_progress) 2870 && frame_pointer_needed 2871 && REGNO (indx) == HARD_FRAME_POINTER_REGNUM) 2872 || REGNO (indx) == ARG_POINTER_REGNUM 2873 || (flag_pic 2874 && REGNO (indx) == PIC_OFFSET_TABLE_REGNUM)) 2875 pointer = indx_ptr = true; 2876 2877 if ((reload_completed || reload_in_progress) 2878 && indx == cfun->machine->base_reg) 2879 pointer = indx_ptr = literal_pool = true; 2880 } 2881 2882 /* Prefer to use pointer as base, not index. */ 2883 if (base && indx && !base_ptr 2884 && (indx_ptr || (!REG_POINTER (base) && REG_POINTER (indx)))) 2885 { 2886 rtx tmp = base; 2887 base = indx; 2888 indx = tmp; 2889 } 2890 2891 /* Validate displacement. */ 2892 if (!disp) 2893 { 2894 /* If virtual registers are involved, the displacement will change later 2895 anyway as the virtual registers get eliminated. This could make a 2896 valid displacement invalid, but it is more likely to make an invalid 2897 displacement valid, because we sometimes access the register save area 2898 via negative offsets to one of those registers. 2899 Thus we don't check the displacement for validity here. If after 2900 elimination the displacement turns out to be invalid after all, 2901 this is fixed up by reload in any case. */ 2902 /* LRA maintains always displacements up to date and we need to 2903 know the displacement is right during all LRA not only at the 2904 final elimination. */ 2905 if (lra_in_progress 2906 || (base != arg_pointer_rtx 2907 && indx != arg_pointer_rtx 2908 && base != return_address_pointer_rtx 2909 && indx != return_address_pointer_rtx 2910 && base != frame_pointer_rtx 2911 && indx != frame_pointer_rtx 2912 && base != virtual_stack_vars_rtx 2913 && indx != virtual_stack_vars_rtx)) 2914 if (!DISP_IN_RANGE (offset)) 2915 return false; 2916 } 2917 else 2918 { 2919 /* All the special cases are pointers. */ 2920 pointer = true; 2921 2922 /* In the small-PIC case, the linker converts @GOT 2923 and @GOTNTPOFF offsets to possible displacements. */ 2924 if (GET_CODE (disp) == UNSPEC 2925 && (XINT (disp, 1) == UNSPEC_GOT 2926 || XINT (disp, 1) == UNSPEC_GOTNTPOFF) 2927 && flag_pic == 1) 2928 { 2929 ; 2930 } 2931 2932 /* Accept pool label offsets. */ 2933 else if (GET_CODE (disp) == UNSPEC 2934 && XINT (disp, 1) == UNSPEC_POOL_OFFSET) 2935 ; 2936 2937 /* Accept literal pool references. */ 2938 else if (GET_CODE (disp) == UNSPEC 2939 && XINT (disp, 1) == UNSPEC_LTREL_OFFSET) 2940 { 2941 /* In case CSE pulled a non literal pool reference out of 2942 the pool we have to reject the address. This is 2943 especially important when loading the GOT pointer on non 2944 zarch CPUs. In this case the literal pool contains an lt 2945 relative offset to the _GLOBAL_OFFSET_TABLE_ label which 2946 will most likely exceed the displacement. */ 2947 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF 2948 || !CONSTANT_POOL_ADDRESS_P (XVECEXP (disp, 0, 0))) 2949 return false; 2950 2951 orig_disp = gen_rtx_CONST (Pmode, disp); 2952 if (offset) 2953 { 2954 /* If we have an offset, make sure it does not 2955 exceed the size of the constant pool entry. */ 2956 rtx sym = XVECEXP (disp, 0, 0); 2957 if (offset >= GET_MODE_SIZE (get_pool_mode (sym))) 2958 return false; 2959 2960 orig_disp = plus_constant (Pmode, orig_disp, offset); 2961 } 2962 } 2963 2964 else 2965 return false; 2966 } 2967 2968 if (!base && !indx) 2969 pointer = true; 2970 2971 if (out) 2972 { 2973 out->base = base; 2974 out->indx = indx; 2975 out->disp = orig_disp; 2976 out->pointer = pointer; 2977 out->literal_pool = literal_pool; 2978 } 2979 2980 return true; 2981} 2982 2983/* Decompose a RTL expression OP for a shift count into its components, 2984 and return the base register in BASE and the offset in OFFSET. 2985 2986 Return true if OP is a valid shift count, false if not. */ 2987 2988bool 2989s390_decompose_shift_count (rtx op, rtx *base, HOST_WIDE_INT *offset) 2990{ 2991 HOST_WIDE_INT off = 0; 2992 2993 /* We can have an integer constant, an address register, 2994 or a sum of the two. */ 2995 if (GET_CODE (op) == CONST_INT) 2996 { 2997 off = INTVAL (op); 2998 op = NULL_RTX; 2999 } 3000 if (op && GET_CODE (op) == PLUS && GET_CODE (XEXP (op, 1)) == CONST_INT) 3001 { 3002 off = INTVAL (XEXP (op, 1)); 3003 op = XEXP (op, 0); 3004 } 3005 while (op && GET_CODE (op) == SUBREG) 3006 op = SUBREG_REG (op); 3007 3008 if (op && GET_CODE (op) != REG) 3009 return false; 3010 3011 if (offset) 3012 *offset = off; 3013 if (base) 3014 *base = op; 3015 3016 return true; 3017} 3018 3019 3020/* Return true if CODE is a valid address without index. */ 3021 3022bool 3023s390_legitimate_address_without_index_p (rtx op) 3024{ 3025 struct s390_address addr; 3026 3027 if (!s390_decompose_address (XEXP (op, 0), &addr)) 3028 return false; 3029 if (addr.indx) 3030 return false; 3031 3032 return true; 3033} 3034 3035 3036/* Return TRUE if ADDR is an operand valid for a load/store relative 3037 instruction. Be aware that the alignment of the operand needs to 3038 be checked separately. 3039 Valid addresses are single references or a sum of a reference and a 3040 constant integer. Return these parts in SYMREF and ADDEND. You can 3041 pass NULL in REF and/or ADDEND if you are not interested in these 3042 values. Literal pool references are *not* considered symbol 3043 references. */ 3044 3045static bool 3046s390_loadrelative_operand_p (rtx addr, rtx *symref, HOST_WIDE_INT *addend) 3047{ 3048 HOST_WIDE_INT tmpaddend = 0; 3049 3050 if (GET_CODE (addr) == CONST) 3051 addr = XEXP (addr, 0); 3052 3053 if (GET_CODE (addr) == PLUS) 3054 { 3055 if (!CONST_INT_P (XEXP (addr, 1))) 3056 return false; 3057 3058 tmpaddend = INTVAL (XEXP (addr, 1)); 3059 addr = XEXP (addr, 0); 3060 } 3061 3062 if ((GET_CODE (addr) == SYMBOL_REF && !CONSTANT_POOL_ADDRESS_P (addr)) 3063 || (GET_CODE (addr) == UNSPEC 3064 && (XINT (addr, 1) == UNSPEC_GOTENT 3065 || (TARGET_CPU_ZARCH && XINT (addr, 1) == UNSPEC_PLT)))) 3066 { 3067 if (symref) 3068 *symref = addr; 3069 if (addend) 3070 *addend = tmpaddend; 3071 3072 return true; 3073 } 3074 return false; 3075} 3076 3077/* Return true if the address in OP is valid for constraint letter C 3078 if wrapped in a MEM rtx. Set LIT_POOL_OK to true if it literal 3079 pool MEMs should be accepted. Only the Q, R, S, T constraint 3080 letters are allowed for C. */ 3081 3082static int 3083s390_check_qrst_address (char c, rtx op, bool lit_pool_ok) 3084{ 3085 struct s390_address addr; 3086 bool decomposed = false; 3087 3088 /* This check makes sure that no symbolic address (except literal 3089 pool references) are accepted by the R or T constraints. */ 3090 if (s390_loadrelative_operand_p (op, NULL, NULL)) 3091 return 0; 3092 3093 /* Ensure literal pool references are only accepted if LIT_POOL_OK. */ 3094 if (!lit_pool_ok) 3095 { 3096 if (!s390_decompose_address (op, &addr)) 3097 return 0; 3098 if (addr.literal_pool) 3099 return 0; 3100 decomposed = true; 3101 } 3102 3103 switch (c) 3104 { 3105 case 'Q': /* no index short displacement */ 3106 if (!decomposed && !s390_decompose_address (op, &addr)) 3107 return 0; 3108 if (addr.indx) 3109 return 0; 3110 if (!s390_short_displacement (addr.disp)) 3111 return 0; 3112 break; 3113 3114 case 'R': /* with index short displacement */ 3115 if (TARGET_LONG_DISPLACEMENT) 3116 { 3117 if (!decomposed && !s390_decompose_address (op, &addr)) 3118 return 0; 3119 if (!s390_short_displacement (addr.disp)) 3120 return 0; 3121 } 3122 /* Any invalid address here will be fixed up by reload, 3123 so accept it for the most generic constraint. */ 3124 break; 3125 3126 case 'S': /* no index long displacement */ 3127 if (!TARGET_LONG_DISPLACEMENT) 3128 return 0; 3129 if (!decomposed && !s390_decompose_address (op, &addr)) 3130 return 0; 3131 if (addr.indx) 3132 return 0; 3133 if (s390_short_displacement (addr.disp)) 3134 return 0; 3135 break; 3136 3137 case 'T': /* with index long displacement */ 3138 if (!TARGET_LONG_DISPLACEMENT) 3139 return 0; 3140 /* Any invalid address here will be fixed up by reload, 3141 so accept it for the most generic constraint. */ 3142 if ((decomposed || s390_decompose_address (op, &addr)) 3143 && s390_short_displacement (addr.disp)) 3144 return 0; 3145 break; 3146 default: 3147 return 0; 3148 } 3149 return 1; 3150} 3151 3152 3153/* Evaluates constraint strings described by the regular expression 3154 ([A|B|Z](Q|R|S|T))|U|W|Y and returns 1 if OP is a valid operand for 3155 the constraint given in STR, or 0 else. */ 3156 3157int 3158s390_mem_constraint (const char *str, rtx op) 3159{ 3160 char c = str[0]; 3161 3162 switch (c) 3163 { 3164 case 'A': 3165 /* Check for offsettable variants of memory constraints. */ 3166 if (!MEM_P (op) || MEM_VOLATILE_P (op)) 3167 return 0; 3168 if ((reload_completed || reload_in_progress) 3169 ? !offsettable_memref_p (op) : !offsettable_nonstrict_memref_p (op)) 3170 return 0; 3171 return s390_check_qrst_address (str[1], XEXP (op, 0), true); 3172 case 'B': 3173 /* Check for non-literal-pool variants of memory constraints. */ 3174 if (!MEM_P (op)) 3175 return 0; 3176 return s390_check_qrst_address (str[1], XEXP (op, 0), false); 3177 case 'Q': 3178 case 'R': 3179 case 'S': 3180 case 'T': 3181 if (GET_CODE (op) != MEM) 3182 return 0; 3183 return s390_check_qrst_address (c, XEXP (op, 0), true); 3184 case 'U': 3185 return (s390_check_qrst_address ('Q', op, true) 3186 || s390_check_qrst_address ('R', op, true)); 3187 case 'W': 3188 return (s390_check_qrst_address ('S', op, true) 3189 || s390_check_qrst_address ('T', op, true)); 3190 case 'Y': 3191 /* Simply check for the basic form of a shift count. Reload will 3192 take care of making sure we have a proper base register. */ 3193 if (!s390_decompose_shift_count (op, NULL, NULL)) 3194 return 0; 3195 break; 3196 case 'Z': 3197 return s390_check_qrst_address (str[1], op, true); 3198 default: 3199 return 0; 3200 } 3201 return 1; 3202} 3203 3204 3205/* Evaluates constraint strings starting with letter O. Input 3206 parameter C is the second letter following the "O" in the constraint 3207 string. Returns 1 if VALUE meets the respective constraint and 0 3208 otherwise. */ 3209 3210int 3211s390_O_constraint_str (const char c, HOST_WIDE_INT value) 3212{ 3213 if (!TARGET_EXTIMM) 3214 return 0; 3215 3216 switch (c) 3217 { 3218 case 's': 3219 return trunc_int_for_mode (value, SImode) == value; 3220 3221 case 'p': 3222 return value == 0 3223 || s390_single_part (GEN_INT (value), DImode, SImode, 0) == 1; 3224 3225 case 'n': 3226 return s390_single_part (GEN_INT (value - 1), DImode, SImode, -1) == 1; 3227 3228 default: 3229 gcc_unreachable (); 3230 } 3231} 3232 3233 3234/* Evaluates constraint strings starting with letter N. Parameter STR 3235 contains the letters following letter "N" in the constraint string. 3236 Returns true if VALUE matches the constraint. */ 3237 3238int 3239s390_N_constraint_str (const char *str, HOST_WIDE_INT value) 3240{ 3241 machine_mode mode, part_mode; 3242 int def; 3243 int part, part_goal; 3244 3245 3246 if (str[0] == 'x') 3247 part_goal = -1; 3248 else 3249 part_goal = str[0] - '0'; 3250 3251 switch (str[1]) 3252 { 3253 case 'Q': 3254 part_mode = QImode; 3255 break; 3256 case 'H': 3257 part_mode = HImode; 3258 break; 3259 case 'S': 3260 part_mode = SImode; 3261 break; 3262 default: 3263 return 0; 3264 } 3265 3266 switch (str[2]) 3267 { 3268 case 'H': 3269 mode = HImode; 3270 break; 3271 case 'S': 3272 mode = SImode; 3273 break; 3274 case 'D': 3275 mode = DImode; 3276 break; 3277 default: 3278 return 0; 3279 } 3280 3281 switch (str[3]) 3282 { 3283 case '0': 3284 def = 0; 3285 break; 3286 case 'F': 3287 def = -1; 3288 break; 3289 default: 3290 return 0; 3291 } 3292 3293 if (GET_MODE_SIZE (mode) <= GET_MODE_SIZE (part_mode)) 3294 return 0; 3295 3296 part = s390_single_part (GEN_INT (value), mode, part_mode, def); 3297 if (part < 0) 3298 return 0; 3299 if (part_goal != -1 && part_goal != part) 3300 return 0; 3301 3302 return 1; 3303} 3304 3305 3306/* Returns true if the input parameter VALUE is a float zero. */ 3307 3308int 3309s390_float_const_zero_p (rtx value) 3310{ 3311 return (GET_MODE_CLASS (GET_MODE (value)) == MODE_FLOAT 3312 && value == CONST0_RTX (GET_MODE (value))); 3313} 3314 3315/* Implement TARGET_REGISTER_MOVE_COST. */ 3316 3317static int 3318s390_register_move_cost (machine_mode mode, 3319 reg_class_t from, reg_class_t to) 3320{ 3321 /* On s390, copy between fprs and gprs is expensive. */ 3322 3323 /* It becomes somewhat faster having ldgr/lgdr. */ 3324 if (TARGET_Z10 && GET_MODE_SIZE (mode) == 8) 3325 { 3326 /* ldgr is single cycle. */ 3327 if (reg_classes_intersect_p (from, GENERAL_REGS) 3328 && reg_classes_intersect_p (to, FP_REGS)) 3329 return 1; 3330 /* lgdr needs 3 cycles. */ 3331 if (reg_classes_intersect_p (to, GENERAL_REGS) 3332 && reg_classes_intersect_p (from, FP_REGS)) 3333 return 3; 3334 } 3335 3336 /* Otherwise copying is done via memory. */ 3337 if ((reg_classes_intersect_p (from, GENERAL_REGS) 3338 && reg_classes_intersect_p (to, FP_REGS)) 3339 || (reg_classes_intersect_p (from, FP_REGS) 3340 && reg_classes_intersect_p (to, GENERAL_REGS))) 3341 return 10; 3342 3343 return 1; 3344} 3345 3346/* Implement TARGET_MEMORY_MOVE_COST. */ 3347 3348static int 3349s390_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED, 3350 reg_class_t rclass ATTRIBUTE_UNUSED, 3351 bool in ATTRIBUTE_UNUSED) 3352{ 3353 return 2; 3354} 3355 3356/* Compute a (partial) cost for rtx X. Return true if the complete 3357 cost has been computed, and false if subexpressions should be 3358 scanned. In either case, *TOTAL contains the cost result. 3359 CODE contains GET_CODE (x), OUTER_CODE contains the code 3360 of the superexpression of x. */ 3361 3362static bool 3363s390_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED, 3364 int *total, bool speed ATTRIBUTE_UNUSED) 3365{ 3366 switch (code) 3367 { 3368 case CONST: 3369 case CONST_INT: 3370 case LABEL_REF: 3371 case SYMBOL_REF: 3372 case CONST_DOUBLE: 3373 case MEM: 3374 *total = 0; 3375 return true; 3376 3377 case ASHIFT: 3378 case ASHIFTRT: 3379 case LSHIFTRT: 3380 case ROTATE: 3381 case ROTATERT: 3382 case AND: 3383 case IOR: 3384 case XOR: 3385 case NEG: 3386 case NOT: 3387 *total = COSTS_N_INSNS (1); 3388 return false; 3389 3390 case PLUS: 3391 case MINUS: 3392 *total = COSTS_N_INSNS (1); 3393 return false; 3394 3395 case MULT: 3396 switch (GET_MODE (x)) 3397 { 3398 case SImode: 3399 { 3400 rtx left = XEXP (x, 0); 3401 rtx right = XEXP (x, 1); 3402 if (GET_CODE (right) == CONST_INT 3403 && CONST_OK_FOR_K (INTVAL (right))) 3404 *total = s390_cost->mhi; 3405 else if (GET_CODE (left) == SIGN_EXTEND) 3406 *total = s390_cost->mh; 3407 else 3408 *total = s390_cost->ms; /* msr, ms, msy */ 3409 break; 3410 } 3411 case DImode: 3412 { 3413 rtx left = XEXP (x, 0); 3414 rtx right = XEXP (x, 1); 3415 if (TARGET_ZARCH) 3416 { 3417 if (GET_CODE (right) == CONST_INT 3418 && CONST_OK_FOR_K (INTVAL (right))) 3419 *total = s390_cost->mghi; 3420 else if (GET_CODE (left) == SIGN_EXTEND) 3421 *total = s390_cost->msgf; 3422 else 3423 *total = s390_cost->msg; /* msgr, msg */ 3424 } 3425 else /* TARGET_31BIT */ 3426 { 3427 if (GET_CODE (left) == SIGN_EXTEND 3428 && GET_CODE (right) == SIGN_EXTEND) 3429 /* mulsidi case: mr, m */ 3430 *total = s390_cost->m; 3431 else if (GET_CODE (left) == ZERO_EXTEND 3432 && GET_CODE (right) == ZERO_EXTEND 3433 && TARGET_CPU_ZARCH) 3434 /* umulsidi case: ml, mlr */ 3435 *total = s390_cost->ml; 3436 else 3437 /* Complex calculation is required. */ 3438 *total = COSTS_N_INSNS (40); 3439 } 3440 break; 3441 } 3442 case SFmode: 3443 case DFmode: 3444 *total = s390_cost->mult_df; 3445 break; 3446 case TFmode: 3447 *total = s390_cost->mxbr; 3448 break; 3449 default: 3450 return false; 3451 } 3452 return false; 3453 3454 case FMA: 3455 switch (GET_MODE (x)) 3456 { 3457 case DFmode: 3458 *total = s390_cost->madbr; 3459 break; 3460 case SFmode: 3461 *total = s390_cost->maebr; 3462 break; 3463 default: 3464 return false; 3465 } 3466 /* Negate in the third argument is free: FMSUB. */ 3467 if (GET_CODE (XEXP (x, 2)) == NEG) 3468 { 3469 *total += (rtx_cost (XEXP (x, 0), FMA, 0, speed) 3470 + rtx_cost (XEXP (x, 1), FMA, 1, speed) 3471 + rtx_cost (XEXP (XEXP (x, 2), 0), FMA, 2, speed)); 3472 return true; 3473 } 3474 return false; 3475 3476 case UDIV: 3477 case UMOD: 3478 if (GET_MODE (x) == TImode) /* 128 bit division */ 3479 *total = s390_cost->dlgr; 3480 else if (GET_MODE (x) == DImode) 3481 { 3482 rtx right = XEXP (x, 1); 3483 if (GET_CODE (right) == ZERO_EXTEND) /* 64 by 32 bit division */ 3484 *total = s390_cost->dlr; 3485 else /* 64 by 64 bit division */ 3486 *total = s390_cost->dlgr; 3487 } 3488 else if (GET_MODE (x) == SImode) /* 32 bit division */ 3489 *total = s390_cost->dlr; 3490 return false; 3491 3492 case DIV: 3493 case MOD: 3494 if (GET_MODE (x) == DImode) 3495 { 3496 rtx right = XEXP (x, 1); 3497 if (GET_CODE (right) == ZERO_EXTEND) /* 64 by 32 bit division */ 3498 if (TARGET_ZARCH) 3499 *total = s390_cost->dsgfr; 3500 else 3501 *total = s390_cost->dr; 3502 else /* 64 by 64 bit division */ 3503 *total = s390_cost->dsgr; 3504 } 3505 else if (GET_MODE (x) == SImode) /* 32 bit division */ 3506 *total = s390_cost->dlr; 3507 else if (GET_MODE (x) == SFmode) 3508 { 3509 *total = s390_cost->debr; 3510 } 3511 else if (GET_MODE (x) == DFmode) 3512 { 3513 *total = s390_cost->ddbr; 3514 } 3515 else if (GET_MODE (x) == TFmode) 3516 { 3517 *total = s390_cost->dxbr; 3518 } 3519 return false; 3520 3521 case SQRT: 3522 if (GET_MODE (x) == SFmode) 3523 *total = s390_cost->sqebr; 3524 else if (GET_MODE (x) == DFmode) 3525 *total = s390_cost->sqdbr; 3526 else /* TFmode */ 3527 *total = s390_cost->sqxbr; 3528 return false; 3529 3530 case SIGN_EXTEND: 3531 case ZERO_EXTEND: 3532 if (outer_code == MULT || outer_code == DIV || outer_code == MOD 3533 || outer_code == PLUS || outer_code == MINUS 3534 || outer_code == COMPARE) 3535 *total = 0; 3536 return false; 3537 3538 case COMPARE: 3539 *total = COSTS_N_INSNS (1); 3540 if (GET_CODE (XEXP (x, 0)) == AND 3541 && GET_CODE (XEXP (x, 1)) == CONST_INT 3542 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT) 3543 { 3544 rtx op0 = XEXP (XEXP (x, 0), 0); 3545 rtx op1 = XEXP (XEXP (x, 0), 1); 3546 rtx op2 = XEXP (x, 1); 3547 3548 if (memory_operand (op0, GET_MODE (op0)) 3549 && s390_tm_ccmode (op1, op2, 0) != VOIDmode) 3550 return true; 3551 if (register_operand (op0, GET_MODE (op0)) 3552 && s390_tm_ccmode (op1, op2, 1) != VOIDmode) 3553 return true; 3554 } 3555 return false; 3556 3557 default: 3558 return false; 3559 } 3560} 3561 3562/* Return the cost of an address rtx ADDR. */ 3563 3564static int 3565s390_address_cost (rtx addr, machine_mode mode ATTRIBUTE_UNUSED, 3566 addr_space_t as ATTRIBUTE_UNUSED, 3567 bool speed ATTRIBUTE_UNUSED) 3568{ 3569 struct s390_address ad; 3570 if (!s390_decompose_address (addr, &ad)) 3571 return 1000; 3572 3573 return ad.indx? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (1); 3574} 3575 3576/* If OP is a SYMBOL_REF of a thread-local symbol, return its TLS mode, 3577 otherwise return 0. */ 3578 3579int 3580tls_symbolic_operand (rtx op) 3581{ 3582 if (GET_CODE (op) != SYMBOL_REF) 3583 return 0; 3584 return SYMBOL_REF_TLS_MODEL (op); 3585} 3586 3587/* Split DImode access register reference REG (on 64-bit) into its constituent 3588 low and high parts, and store them into LO and HI. Note that gen_lowpart/ 3589 gen_highpart cannot be used as they assume all registers are word-sized, 3590 while our access registers have only half that size. */ 3591 3592void 3593s390_split_access_reg (rtx reg, rtx *lo, rtx *hi) 3594{ 3595 gcc_assert (TARGET_64BIT); 3596 gcc_assert (ACCESS_REG_P (reg)); 3597 gcc_assert (GET_MODE (reg) == DImode); 3598 gcc_assert (!(REGNO (reg) & 1)); 3599 3600 *lo = gen_rtx_REG (SImode, REGNO (reg) + 1); 3601 *hi = gen_rtx_REG (SImode, REGNO (reg)); 3602} 3603 3604/* Return true if OP contains a symbol reference */ 3605 3606bool 3607symbolic_reference_mentioned_p (rtx op) 3608{ 3609 const char *fmt; 3610 int i; 3611 3612 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF) 3613 return 1; 3614 3615 fmt = GET_RTX_FORMAT (GET_CODE (op)); 3616 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--) 3617 { 3618 if (fmt[i] == 'E') 3619 { 3620 int j; 3621 3622 for (j = XVECLEN (op, i) - 1; j >= 0; j--) 3623 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j))) 3624 return 1; 3625 } 3626 3627 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i))) 3628 return 1; 3629 } 3630 3631 return 0; 3632} 3633 3634/* Return true if OP contains a reference to a thread-local symbol. */ 3635 3636bool 3637tls_symbolic_reference_mentioned_p (rtx op) 3638{ 3639 const char *fmt; 3640 int i; 3641 3642 if (GET_CODE (op) == SYMBOL_REF) 3643 return tls_symbolic_operand (op); 3644 3645 fmt = GET_RTX_FORMAT (GET_CODE (op)); 3646 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--) 3647 { 3648 if (fmt[i] == 'E') 3649 { 3650 int j; 3651 3652 for (j = XVECLEN (op, i) - 1; j >= 0; j--) 3653 if (tls_symbolic_reference_mentioned_p (XVECEXP (op, i, j))) 3654 return true; 3655 } 3656 3657 else if (fmt[i] == 'e' && tls_symbolic_reference_mentioned_p (XEXP (op, i))) 3658 return true; 3659 } 3660 3661 return false; 3662} 3663 3664 3665/* Return true if OP is a legitimate general operand when 3666 generating PIC code. It is given that flag_pic is on 3667 and that OP satisfies CONSTANT_P or is a CONST_DOUBLE. */ 3668 3669int 3670legitimate_pic_operand_p (rtx op) 3671{ 3672 /* Accept all non-symbolic constants. */ 3673 if (!SYMBOLIC_CONST (op)) 3674 return 1; 3675 3676 /* Reject everything else; must be handled 3677 via emit_symbolic_move. */ 3678 return 0; 3679} 3680 3681/* Returns true if the constant value OP is a legitimate general operand. 3682 It is given that OP satisfies CONSTANT_P or is a CONST_DOUBLE. */ 3683 3684static bool 3685s390_legitimate_constant_p (machine_mode mode, rtx op) 3686{ 3687 if (TARGET_VX && VECTOR_MODE_P (mode) && GET_CODE (op) == CONST_VECTOR) 3688 { 3689 if (GET_MODE_SIZE (mode) != 16) 3690 return 0; 3691 3692 if (!satisfies_constraint_j00 (op) 3693 && !satisfies_constraint_jm1 (op) 3694 && !satisfies_constraint_jKK (op) 3695 && !satisfies_constraint_jxx (op) 3696 && !satisfies_constraint_jyy (op)) 3697 return 0; 3698 } 3699 3700 /* Accept all non-symbolic constants. */ 3701 if (!SYMBOLIC_CONST (op)) 3702 return 1; 3703 3704 /* Accept immediate LARL operands. */ 3705 if (TARGET_CPU_ZARCH && larl_operand (op, mode)) 3706 return 1; 3707 3708 /* Thread-local symbols are never legal constants. This is 3709 so that emit_call knows that computing such addresses 3710 might require a function call. */ 3711 if (TLS_SYMBOLIC_CONST (op)) 3712 return 0; 3713 3714 /* In the PIC case, symbolic constants must *not* be 3715 forced into the literal pool. We accept them here, 3716 so that they will be handled by emit_symbolic_move. */ 3717 if (flag_pic) 3718 return 1; 3719 3720 /* All remaining non-PIC symbolic constants are 3721 forced into the literal pool. */ 3722 return 0; 3723} 3724 3725/* Determine if it's legal to put X into the constant pool. This 3726 is not possible if X contains the address of a symbol that is 3727 not constant (TLS) or not known at final link time (PIC). */ 3728 3729static bool 3730s390_cannot_force_const_mem (machine_mode mode, rtx x) 3731{ 3732 switch (GET_CODE (x)) 3733 { 3734 case CONST_INT: 3735 case CONST_DOUBLE: 3736 case CONST_VECTOR: 3737 /* Accept all non-symbolic constants. */ 3738 return false; 3739 3740 case LABEL_REF: 3741 /* Labels are OK iff we are non-PIC. */ 3742 return flag_pic != 0; 3743 3744 case SYMBOL_REF: 3745 /* 'Naked' TLS symbol references are never OK, 3746 non-TLS symbols are OK iff we are non-PIC. */ 3747 if (tls_symbolic_operand (x)) 3748 return true; 3749 else 3750 return flag_pic != 0; 3751 3752 case CONST: 3753 return s390_cannot_force_const_mem (mode, XEXP (x, 0)); 3754 case PLUS: 3755 case MINUS: 3756 return s390_cannot_force_const_mem (mode, XEXP (x, 0)) 3757 || s390_cannot_force_const_mem (mode, XEXP (x, 1)); 3758 3759 case UNSPEC: 3760 switch (XINT (x, 1)) 3761 { 3762 /* Only lt-relative or GOT-relative UNSPECs are OK. */ 3763 case UNSPEC_LTREL_OFFSET: 3764 case UNSPEC_GOT: 3765 case UNSPEC_GOTOFF: 3766 case UNSPEC_PLTOFF: 3767 case UNSPEC_TLSGD: 3768 case UNSPEC_TLSLDM: 3769 case UNSPEC_NTPOFF: 3770 case UNSPEC_DTPOFF: 3771 case UNSPEC_GOTNTPOFF: 3772 case UNSPEC_INDNTPOFF: 3773 return false; 3774 3775 /* If the literal pool shares the code section, be put 3776 execute template placeholders into the pool as well. */ 3777 case UNSPEC_INSN: 3778 return TARGET_CPU_ZARCH; 3779 3780 default: 3781 return true; 3782 } 3783 break; 3784 3785 default: 3786 gcc_unreachable (); 3787 } 3788} 3789 3790/* Returns true if the constant value OP is a legitimate general 3791 operand during and after reload. The difference to 3792 legitimate_constant_p is that this function will not accept 3793 a constant that would need to be forced to the literal pool 3794 before it can be used as operand. 3795 This function accepts all constants which can be loaded directly 3796 into a GPR. */ 3797 3798bool 3799legitimate_reload_constant_p (rtx op) 3800{ 3801 /* Accept la(y) operands. */ 3802 if (GET_CODE (op) == CONST_INT 3803 && DISP_IN_RANGE (INTVAL (op))) 3804 return true; 3805 3806 /* Accept l(g)hi/l(g)fi operands. */ 3807 if (GET_CODE (op) == CONST_INT 3808 && (CONST_OK_FOR_K (INTVAL (op)) || CONST_OK_FOR_Os (INTVAL (op)))) 3809 return true; 3810 3811 /* Accept lliXX operands. */ 3812 if (TARGET_ZARCH 3813 && GET_CODE (op) == CONST_INT 3814 && trunc_int_for_mode (INTVAL (op), word_mode) == INTVAL (op) 3815 && s390_single_part (op, word_mode, HImode, 0) >= 0) 3816 return true; 3817 3818 if (TARGET_EXTIMM 3819 && GET_CODE (op) == CONST_INT 3820 && trunc_int_for_mode (INTVAL (op), word_mode) == INTVAL (op) 3821 && s390_single_part (op, word_mode, SImode, 0) >= 0) 3822 return true; 3823 3824 /* Accept larl operands. */ 3825 if (TARGET_CPU_ZARCH 3826 && larl_operand (op, VOIDmode)) 3827 return true; 3828 3829 /* Accept floating-point zero operands that fit into a single GPR. */ 3830 if (GET_CODE (op) == CONST_DOUBLE 3831 && s390_float_const_zero_p (op) 3832 && GET_MODE_SIZE (GET_MODE (op)) <= UNITS_PER_WORD) 3833 return true; 3834 3835 /* Accept double-word operands that can be split. */ 3836 if (GET_CODE (op) == CONST_INT 3837 && trunc_int_for_mode (INTVAL (op), word_mode) != INTVAL (op)) 3838 { 3839 machine_mode dword_mode = word_mode == SImode ? DImode : TImode; 3840 rtx hi = operand_subword (op, 0, 0, dword_mode); 3841 rtx lo = operand_subword (op, 1, 0, dword_mode); 3842 return legitimate_reload_constant_p (hi) 3843 && legitimate_reload_constant_p (lo); 3844 } 3845 3846 /* Everything else cannot be handled without reload. */ 3847 return false; 3848} 3849 3850/* Returns true if the constant value OP is a legitimate fp operand 3851 during and after reload. 3852 This function accepts all constants which can be loaded directly 3853 into an FPR. */ 3854 3855static bool 3856legitimate_reload_fp_constant_p (rtx op) 3857{ 3858 /* Accept floating-point zero operands if the load zero instruction 3859 can be used. Prior to z196 the load fp zero instruction caused a 3860 performance penalty if the result is used as BFP number. */ 3861 if (TARGET_Z196 3862 && GET_CODE (op) == CONST_DOUBLE 3863 && s390_float_const_zero_p (op)) 3864 return true; 3865 3866 return false; 3867} 3868 3869/* Returns true if the constant value OP is a legitimate vector operand 3870 during and after reload. 3871 This function accepts all constants which can be loaded directly 3872 into an VR. */ 3873 3874static bool 3875legitimate_reload_vector_constant_p (rtx op) 3876{ 3877 if (TARGET_VX && GET_MODE_SIZE (GET_MODE (op)) == 16 3878 && (satisfies_constraint_j00 (op) 3879 || satisfies_constraint_jm1 (op) 3880 || satisfies_constraint_jKK (op) 3881 || satisfies_constraint_jxx (op) 3882 || satisfies_constraint_jyy (op))) 3883 return true; 3884 3885 return false; 3886} 3887 3888/* Given an rtx OP being reloaded into a reg required to be in class RCLASS, 3889 return the class of reg to actually use. */ 3890 3891static reg_class_t 3892s390_preferred_reload_class (rtx op, reg_class_t rclass) 3893{ 3894 switch (GET_CODE (op)) 3895 { 3896 /* Constants we cannot reload into general registers 3897 must be forced into the literal pool. */ 3898 case CONST_VECTOR: 3899 case CONST_DOUBLE: 3900 case CONST_INT: 3901 if (reg_class_subset_p (GENERAL_REGS, rclass) 3902 && legitimate_reload_constant_p (op)) 3903 return GENERAL_REGS; 3904 else if (reg_class_subset_p (ADDR_REGS, rclass) 3905 && legitimate_reload_constant_p (op)) 3906 return ADDR_REGS; 3907 else if (reg_class_subset_p (FP_REGS, rclass) 3908 && legitimate_reload_fp_constant_p (op)) 3909 return FP_REGS; 3910 else if (reg_class_subset_p (VEC_REGS, rclass) 3911 && legitimate_reload_vector_constant_p (op)) 3912 return VEC_REGS; 3913 3914 return NO_REGS; 3915 3916 /* If a symbolic constant or a PLUS is reloaded, 3917 it is most likely being used as an address, so 3918 prefer ADDR_REGS. If 'class' is not a superset 3919 of ADDR_REGS, e.g. FP_REGS, reject this reload. */ 3920 case CONST: 3921 /* Symrefs cannot be pushed into the literal pool with -fPIC 3922 so we *MUST NOT* return NO_REGS for these cases 3923 (s390_cannot_force_const_mem will return true). 3924 3925 On the other hand we MUST return NO_REGS for symrefs with 3926 invalid addend which might have been pushed to the literal 3927 pool (no -fPIC). Usually we would expect them to be 3928 handled via secondary reload but this does not happen if 3929 they are used as literal pool slot replacement in reload 3930 inheritance (see emit_input_reload_insns). */ 3931 if (TARGET_CPU_ZARCH 3932 && GET_CODE (XEXP (op, 0)) == PLUS 3933 && GET_CODE (XEXP (XEXP(op, 0), 0)) == SYMBOL_REF 3934 && GET_CODE (XEXP (XEXP(op, 0), 1)) == CONST_INT) 3935 { 3936 if (flag_pic && reg_class_subset_p (ADDR_REGS, rclass)) 3937 return ADDR_REGS; 3938 else 3939 return NO_REGS; 3940 } 3941 /* fallthrough */ 3942 case LABEL_REF: 3943 case SYMBOL_REF: 3944 if (!legitimate_reload_constant_p (op)) 3945 return NO_REGS; 3946 /* fallthrough */ 3947 case PLUS: 3948 /* load address will be used. */ 3949 if (reg_class_subset_p (ADDR_REGS, rclass)) 3950 return ADDR_REGS; 3951 else 3952 return NO_REGS; 3953 3954 default: 3955 break; 3956 } 3957 3958 return rclass; 3959} 3960 3961/* Return true if ADDR is SYMBOL_REF + addend with addend being a 3962 multiple of ALIGNMENT and the SYMBOL_REF being naturally 3963 aligned. */ 3964 3965bool 3966s390_check_symref_alignment (rtx addr, HOST_WIDE_INT alignment) 3967{ 3968 HOST_WIDE_INT addend; 3969 rtx symref; 3970 3971 if (!s390_loadrelative_operand_p (addr, &symref, &addend)) 3972 return false; 3973 3974 if (addend & (alignment - 1)) 3975 return false; 3976 3977 if (GET_CODE (symref) == SYMBOL_REF 3978 && !SYMBOL_REF_NOT_NATURALLY_ALIGNED_P (symref)) 3979 return true; 3980 3981 if (GET_CODE (symref) == UNSPEC 3982 && alignment <= UNITS_PER_LONG) 3983 return true; 3984 3985 return false; 3986} 3987 3988/* ADDR is moved into REG using larl. If ADDR isn't a valid larl 3989 operand SCRATCH is used to reload the even part of the address and 3990 adding one. */ 3991 3992void 3993s390_reload_larl_operand (rtx reg, rtx addr, rtx scratch) 3994{ 3995 HOST_WIDE_INT addend; 3996 rtx symref; 3997 3998 if (!s390_loadrelative_operand_p (addr, &symref, &addend)) 3999 gcc_unreachable (); 4000 4001 if (!(addend & 1)) 4002 /* Easy case. The addend is even so larl will do fine. */ 4003 emit_move_insn (reg, addr); 4004 else 4005 { 4006 /* We can leave the scratch register untouched if the target 4007 register is a valid base register. */ 4008 if (REGNO (reg) < FIRST_PSEUDO_REGISTER 4009 && REGNO_REG_CLASS (REGNO (reg)) == ADDR_REGS) 4010 scratch = reg; 4011 4012 gcc_assert (REGNO (scratch) < FIRST_PSEUDO_REGISTER); 4013 gcc_assert (REGNO_REG_CLASS (REGNO (scratch)) == ADDR_REGS); 4014 4015 if (addend != 1) 4016 emit_move_insn (scratch, 4017 gen_rtx_CONST (Pmode, 4018 gen_rtx_PLUS (Pmode, symref, 4019 GEN_INT (addend - 1)))); 4020 else 4021 emit_move_insn (scratch, symref); 4022 4023 /* Increment the address using la in order to avoid clobbering cc. */ 4024 s390_load_address (reg, gen_rtx_PLUS (Pmode, scratch, const1_rtx)); 4025 } 4026} 4027 4028/* Generate what is necessary to move between REG and MEM using 4029 SCRATCH. The direction is given by TOMEM. */ 4030 4031void 4032s390_reload_symref_address (rtx reg, rtx mem, rtx scratch, bool tomem) 4033{ 4034 /* Reload might have pulled a constant out of the literal pool. 4035 Force it back in. */ 4036 if (CONST_INT_P (mem) || GET_CODE (mem) == CONST_DOUBLE 4037 || GET_CODE (mem) == CONST_VECTOR 4038 || GET_CODE (mem) == CONST) 4039 mem = force_const_mem (GET_MODE (reg), mem); 4040 4041 gcc_assert (MEM_P (mem)); 4042 4043 /* For a load from memory we can leave the scratch register 4044 untouched if the target register is a valid base register. */ 4045 if (!tomem 4046 && REGNO (reg) < FIRST_PSEUDO_REGISTER 4047 && REGNO_REG_CLASS (REGNO (reg)) == ADDR_REGS 4048 && GET_MODE (reg) == GET_MODE (scratch)) 4049 scratch = reg; 4050 4051 /* Load address into scratch register. Since we can't have a 4052 secondary reload for a secondary reload we have to cover the case 4053 where larl would need a secondary reload here as well. */ 4054 s390_reload_larl_operand (scratch, XEXP (mem, 0), scratch); 4055 4056 /* Now we can use a standard load/store to do the move. */ 4057 if (tomem) 4058 emit_move_insn (replace_equiv_address (mem, scratch), reg); 4059 else 4060 emit_move_insn (reg, replace_equiv_address (mem, scratch)); 4061} 4062 4063/* Inform reload about cases where moving X with a mode MODE to a register in 4064 RCLASS requires an extra scratch or immediate register. Return the class 4065 needed for the immediate register. */ 4066 4067static reg_class_t 4068s390_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i, 4069 machine_mode mode, secondary_reload_info *sri) 4070{ 4071 enum reg_class rclass = (enum reg_class) rclass_i; 4072 4073 /* Intermediate register needed. */ 4074 if (reg_classes_intersect_p (CC_REGS, rclass)) 4075 return GENERAL_REGS; 4076 4077 if (TARGET_VX) 4078 { 4079 /* The vst/vl vector move instructions allow only for short 4080 displacements. */ 4081 if (MEM_P (x) 4082 && GET_CODE (XEXP (x, 0)) == PLUS 4083 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT 4084 && !SHORT_DISP_IN_RANGE(INTVAL (XEXP (XEXP (x, 0), 1))) 4085 && reg_class_subset_p (rclass, VEC_REGS) 4086 && (!reg_class_subset_p (rclass, FP_REGS) 4087 || (GET_MODE_SIZE (mode) > 8 4088 && s390_class_max_nregs (FP_REGS, mode) == 1))) 4089 { 4090 if (in_p) 4091 sri->icode = (TARGET_64BIT ? 4092 CODE_FOR_reloaddi_la_in : 4093 CODE_FOR_reloadsi_la_in); 4094 else 4095 sri->icode = (TARGET_64BIT ? 4096 CODE_FOR_reloaddi_la_out : 4097 CODE_FOR_reloadsi_la_out); 4098 } 4099 } 4100 4101 if (TARGET_Z10) 4102 { 4103 HOST_WIDE_INT offset; 4104 rtx symref; 4105 4106 /* On z10 several optimizer steps may generate larl operands with 4107 an odd addend. */ 4108 if (in_p 4109 && s390_loadrelative_operand_p (x, &symref, &offset) 4110 && mode == Pmode 4111 && !SYMBOL_REF_ALIGN1_P (symref) 4112 && (offset & 1) == 1) 4113 sri->icode = ((mode == DImode) ? CODE_FOR_reloaddi_larl_odd_addend_z10 4114 : CODE_FOR_reloadsi_larl_odd_addend_z10); 4115 4116 /* Handle all the (mem (symref)) accesses we cannot use the z10 4117 instructions for. */ 4118 if (MEM_P (x) 4119 && s390_loadrelative_operand_p (XEXP (x, 0), NULL, NULL) 4120 && (mode == QImode 4121 || !reg_class_subset_p (rclass, GENERAL_REGS) 4122 || GET_MODE_SIZE (mode) > UNITS_PER_WORD 4123 || !s390_check_symref_alignment (XEXP (x, 0), 4124 GET_MODE_SIZE (mode)))) 4125 { 4126#define __SECONDARY_RELOAD_CASE(M,m) \ 4127 case M##mode: \ 4128 if (TARGET_64BIT) \ 4129 sri->icode = in_p ? CODE_FOR_reload##m##di_toreg_z10 : \ 4130 CODE_FOR_reload##m##di_tomem_z10; \ 4131 else \ 4132 sri->icode = in_p ? CODE_FOR_reload##m##si_toreg_z10 : \ 4133 CODE_FOR_reload##m##si_tomem_z10; \ 4134 break; 4135 4136 switch (GET_MODE (x)) 4137 { 4138 __SECONDARY_RELOAD_CASE (QI, qi); 4139 __SECONDARY_RELOAD_CASE (HI, hi); 4140 __SECONDARY_RELOAD_CASE (SI, si); 4141 __SECONDARY_RELOAD_CASE (DI, di); 4142 __SECONDARY_RELOAD_CASE (TI, ti); 4143 __SECONDARY_RELOAD_CASE (SF, sf); 4144 __SECONDARY_RELOAD_CASE (DF, df); 4145 __SECONDARY_RELOAD_CASE (TF, tf); 4146 __SECONDARY_RELOAD_CASE (SD, sd); 4147 __SECONDARY_RELOAD_CASE (DD, dd); 4148 __SECONDARY_RELOAD_CASE (TD, td); 4149 __SECONDARY_RELOAD_CASE (V1QI, v1qi); 4150 __SECONDARY_RELOAD_CASE (V2QI, v2qi); 4151 __SECONDARY_RELOAD_CASE (V4QI, v4qi); 4152 __SECONDARY_RELOAD_CASE (V8QI, v8qi); 4153 __SECONDARY_RELOAD_CASE (V16QI, v16qi); 4154 __SECONDARY_RELOAD_CASE (V1HI, v1hi); 4155 __SECONDARY_RELOAD_CASE (V2HI, v2hi); 4156 __SECONDARY_RELOAD_CASE (V4HI, v4hi); 4157 __SECONDARY_RELOAD_CASE (V8HI, v8hi); 4158 __SECONDARY_RELOAD_CASE (V1SI, v1si); 4159 __SECONDARY_RELOAD_CASE (V2SI, v2si); 4160 __SECONDARY_RELOAD_CASE (V4SI, v4si); 4161 __SECONDARY_RELOAD_CASE (V1DI, v1di); 4162 __SECONDARY_RELOAD_CASE (V2DI, v2di); 4163 __SECONDARY_RELOAD_CASE (V1TI, v1ti); 4164 __SECONDARY_RELOAD_CASE (V1SF, v1sf); 4165 __SECONDARY_RELOAD_CASE (V2SF, v2sf); 4166 __SECONDARY_RELOAD_CASE (V4SF, v4sf); 4167 __SECONDARY_RELOAD_CASE (V1DF, v1df); 4168 __SECONDARY_RELOAD_CASE (V2DF, v2df); 4169 __SECONDARY_RELOAD_CASE (V1TF, v1tf); 4170 default: 4171 gcc_unreachable (); 4172 } 4173#undef __SECONDARY_RELOAD_CASE 4174 } 4175 } 4176 4177 /* We need a scratch register when loading a PLUS expression which 4178 is not a legitimate operand of the LOAD ADDRESS instruction. */ 4179 /* LRA can deal with transformation of plus op very well -- so we 4180 don't need to prompt LRA in this case. */ 4181 if (! lra_in_progress && in_p && s390_plus_operand (x, mode)) 4182 sri->icode = (TARGET_64BIT ? 4183 CODE_FOR_reloaddi_plus : CODE_FOR_reloadsi_plus); 4184 4185 /* Performing a multiword move from or to memory we have to make sure the 4186 second chunk in memory is addressable without causing a displacement 4187 overflow. If that would be the case we calculate the address in 4188 a scratch register. */ 4189 if (MEM_P (x) 4190 && GET_CODE (XEXP (x, 0)) == PLUS 4191 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT 4192 && !DISP_IN_RANGE (INTVAL (XEXP (XEXP (x, 0), 1)) 4193 + GET_MODE_SIZE (mode) - 1)) 4194 { 4195 /* For GENERAL_REGS a displacement overflow is no problem if occurring 4196 in a s_operand address since we may fallback to lm/stm. So we only 4197 have to care about overflows in the b+i+d case. */ 4198 if ((reg_classes_intersect_p (GENERAL_REGS, rclass) 4199 && s390_class_max_nregs (GENERAL_REGS, mode) > 1 4200 && GET_CODE (XEXP (XEXP (x, 0), 0)) == PLUS) 4201 /* For FP_REGS no lm/stm is available so this check is triggered 4202 for displacement overflows in b+i+d and b+d like addresses. */ 4203 || (reg_classes_intersect_p (FP_REGS, rclass) 4204 && s390_class_max_nregs (FP_REGS, mode) > 1)) 4205 { 4206 if (in_p) 4207 sri->icode = (TARGET_64BIT ? 4208 CODE_FOR_reloaddi_la_in : 4209 CODE_FOR_reloadsi_la_in); 4210 else 4211 sri->icode = (TARGET_64BIT ? 4212 CODE_FOR_reloaddi_la_out : 4213 CODE_FOR_reloadsi_la_out); 4214 } 4215 } 4216 4217 /* A scratch address register is needed when a symbolic constant is 4218 copied to r0 compiling with -fPIC. In other cases the target 4219 register might be used as temporary (see legitimize_pic_address). */ 4220 if (in_p && SYMBOLIC_CONST (x) && flag_pic == 2 && rclass != ADDR_REGS) 4221 sri->icode = (TARGET_64BIT ? 4222 CODE_FOR_reloaddi_PIC_addr : 4223 CODE_FOR_reloadsi_PIC_addr); 4224 4225 /* Either scratch or no register needed. */ 4226 return NO_REGS; 4227} 4228 4229/* Generate code to load SRC, which is PLUS that is not a 4230 legitimate operand for the LA instruction, into TARGET. 4231 SCRATCH may be used as scratch register. */ 4232 4233void 4234s390_expand_plus_operand (rtx target, rtx src, 4235 rtx scratch) 4236{ 4237 rtx sum1, sum2; 4238 struct s390_address ad; 4239 4240 /* src must be a PLUS; get its two operands. */ 4241 gcc_assert (GET_CODE (src) == PLUS); 4242 gcc_assert (GET_MODE (src) == Pmode); 4243 4244 /* Check if any of the two operands is already scheduled 4245 for replacement by reload. This can happen e.g. when 4246 float registers occur in an address. */ 4247 sum1 = find_replacement (&XEXP (src, 0)); 4248 sum2 = find_replacement (&XEXP (src, 1)); 4249 src = gen_rtx_PLUS (Pmode, sum1, sum2); 4250 4251 /* If the address is already strictly valid, there's nothing to do. */ 4252 if (!s390_decompose_address (src, &ad) 4253 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base))) 4254 || (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx)))) 4255 { 4256 /* Otherwise, one of the operands cannot be an address register; 4257 we reload its value into the scratch register. */ 4258 if (true_regnum (sum1) < 1 || true_regnum (sum1) > 15) 4259 { 4260 emit_move_insn (scratch, sum1); 4261 sum1 = scratch; 4262 } 4263 if (true_regnum (sum2) < 1 || true_regnum (sum2) > 15) 4264 { 4265 emit_move_insn (scratch, sum2); 4266 sum2 = scratch; 4267 } 4268 4269 /* According to the way these invalid addresses are generated 4270 in reload.c, it should never happen (at least on s390) that 4271 *neither* of the PLUS components, after find_replacements 4272 was applied, is an address register. */ 4273 if (sum1 == scratch && sum2 == scratch) 4274 { 4275 debug_rtx (src); 4276 gcc_unreachable (); 4277 } 4278 4279 src = gen_rtx_PLUS (Pmode, sum1, sum2); 4280 } 4281 4282 /* Emit the LOAD ADDRESS pattern. Note that reload of PLUS 4283 is only ever performed on addresses, so we can mark the 4284 sum as legitimate for LA in any case. */ 4285 s390_load_address (target, src); 4286} 4287 4288 4289/* Return true if ADDR is a valid memory address. 4290 STRICT specifies whether strict register checking applies. */ 4291 4292static bool 4293s390_legitimate_address_p (machine_mode mode, rtx addr, bool strict) 4294{ 4295 struct s390_address ad; 4296 4297 if (TARGET_Z10 4298 && larl_operand (addr, VOIDmode) 4299 && (mode == VOIDmode 4300 || s390_check_symref_alignment (addr, GET_MODE_SIZE (mode)))) 4301 return true; 4302 4303 if (!s390_decompose_address (addr, &ad)) 4304 return false; 4305 4306 if (strict) 4307 { 4308 if (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base))) 4309 return false; 4310 4311 if (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx))) 4312 return false; 4313 } 4314 else 4315 { 4316 if (ad.base 4317 && !(REGNO (ad.base) >= FIRST_PSEUDO_REGISTER 4318 || REGNO_REG_CLASS (REGNO (ad.base)) == ADDR_REGS)) 4319 return false; 4320 4321 if (ad.indx 4322 && !(REGNO (ad.indx) >= FIRST_PSEUDO_REGISTER 4323 || REGNO_REG_CLASS (REGNO (ad.indx)) == ADDR_REGS)) 4324 return false; 4325 } 4326 return true; 4327} 4328 4329/* Return true if OP is a valid operand for the LA instruction. 4330 In 31-bit, we need to prove that the result is used as an 4331 address, as LA performs only a 31-bit addition. */ 4332 4333bool 4334legitimate_la_operand_p (rtx op) 4335{ 4336 struct s390_address addr; 4337 if (!s390_decompose_address (op, &addr)) 4338 return false; 4339 4340 return (TARGET_64BIT || addr.pointer); 4341} 4342 4343/* Return true if it is valid *and* preferable to use LA to 4344 compute the sum of OP1 and OP2. */ 4345 4346bool 4347preferred_la_operand_p (rtx op1, rtx op2) 4348{ 4349 struct s390_address addr; 4350 4351 if (op2 != const0_rtx) 4352 op1 = gen_rtx_PLUS (Pmode, op1, op2); 4353 4354 if (!s390_decompose_address (op1, &addr)) 4355 return false; 4356 if (addr.base && !REGNO_OK_FOR_BASE_P (REGNO (addr.base))) 4357 return false; 4358 if (addr.indx && !REGNO_OK_FOR_INDEX_P (REGNO (addr.indx))) 4359 return false; 4360 4361 /* Avoid LA instructions with index register on z196; it is 4362 preferable to use regular add instructions when possible. 4363 Starting with zEC12 the la with index register is "uncracked" 4364 again. */ 4365 if (addr.indx && s390_tune == PROCESSOR_2817_Z196) 4366 return false; 4367 4368 if (!TARGET_64BIT && !addr.pointer) 4369 return false; 4370 4371 if (addr.pointer) 4372 return true; 4373 4374 if ((addr.base && REG_P (addr.base) && REG_POINTER (addr.base)) 4375 || (addr.indx && REG_P (addr.indx) && REG_POINTER (addr.indx))) 4376 return true; 4377 4378 return false; 4379} 4380 4381/* Emit a forced load-address operation to load SRC into DST. 4382 This will use the LOAD ADDRESS instruction even in situations 4383 where legitimate_la_operand_p (SRC) returns false. */ 4384 4385void 4386s390_load_address (rtx dst, rtx src) 4387{ 4388 if (TARGET_64BIT) 4389 emit_move_insn (dst, src); 4390 else 4391 emit_insn (gen_force_la_31 (dst, src)); 4392} 4393 4394/* Return a legitimate reference for ORIG (an address) using the 4395 register REG. If REG is 0, a new pseudo is generated. 4396 4397 There are two types of references that must be handled: 4398 4399 1. Global data references must load the address from the GOT, via 4400 the PIC reg. An insn is emitted to do this load, and the reg is 4401 returned. 4402 4403 2. Static data references, constant pool addresses, and code labels 4404 compute the address as an offset from the GOT, whose base is in 4405 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to 4406 differentiate them from global data objects. The returned 4407 address is the PIC reg + an unspec constant. 4408 4409 TARGET_LEGITIMIZE_ADDRESS_P rejects symbolic references unless the PIC 4410 reg also appears in the address. */ 4411 4412rtx 4413legitimize_pic_address (rtx orig, rtx reg) 4414{ 4415 rtx addr = orig; 4416 rtx addend = const0_rtx; 4417 rtx new_rtx = orig; 4418 4419 gcc_assert (!TLS_SYMBOLIC_CONST (addr)); 4420 4421 if (GET_CODE (addr) == CONST) 4422 addr = XEXP (addr, 0); 4423 4424 if (GET_CODE (addr) == PLUS) 4425 { 4426 addend = XEXP (addr, 1); 4427 addr = XEXP (addr, 0); 4428 } 4429 4430 if ((GET_CODE (addr) == LABEL_REF 4431 || (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (addr)) 4432 || (GET_CODE (addr) == UNSPEC && 4433 (XINT (addr, 1) == UNSPEC_GOTENT 4434 || (TARGET_CPU_ZARCH && XINT (addr, 1) == UNSPEC_PLT)))) 4435 && GET_CODE (addend) == CONST_INT) 4436 { 4437 /* This can be locally addressed. */ 4438 4439 /* larl_operand requires UNSPECs to be wrapped in a const rtx. */ 4440 rtx const_addr = (GET_CODE (addr) == UNSPEC ? 4441 gen_rtx_CONST (Pmode, addr) : addr); 4442 4443 if (TARGET_CPU_ZARCH 4444 && larl_operand (const_addr, VOIDmode) 4445 && INTVAL (addend) < (HOST_WIDE_INT)1 << 31 4446 && INTVAL (addend) >= -((HOST_WIDE_INT)1 << 31)) 4447 { 4448 if (INTVAL (addend) & 1) 4449 { 4450 /* LARL can't handle odd offsets, so emit a pair of LARL 4451 and LA. */ 4452 rtx temp = reg? reg : gen_reg_rtx (Pmode); 4453 4454 if (!DISP_IN_RANGE (INTVAL (addend))) 4455 { 4456 HOST_WIDE_INT even = INTVAL (addend) - 1; 4457 addr = gen_rtx_PLUS (Pmode, addr, GEN_INT (even)); 4458 addr = gen_rtx_CONST (Pmode, addr); 4459 addend = const1_rtx; 4460 } 4461 4462 emit_move_insn (temp, addr); 4463 new_rtx = gen_rtx_PLUS (Pmode, temp, addend); 4464 4465 if (reg != 0) 4466 { 4467 s390_load_address (reg, new_rtx); 4468 new_rtx = reg; 4469 } 4470 } 4471 else 4472 { 4473 /* If the offset is even, we can just use LARL. This 4474 will happen automatically. */ 4475 } 4476 } 4477 else 4478 { 4479 /* No larl - Access local symbols relative to the GOT. */ 4480 4481 rtx temp = reg? reg : gen_reg_rtx (Pmode); 4482 4483 if (reload_in_progress || reload_completed) 4484 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true); 4485 4486 addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF); 4487 if (addend != const0_rtx) 4488 addr = gen_rtx_PLUS (Pmode, addr, addend); 4489 addr = gen_rtx_CONST (Pmode, addr); 4490 addr = force_const_mem (Pmode, addr); 4491 emit_move_insn (temp, addr); 4492 4493 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp); 4494 if (reg != 0) 4495 { 4496 s390_load_address (reg, new_rtx); 4497 new_rtx = reg; 4498 } 4499 } 4500 } 4501 else if (GET_CODE (addr) == SYMBOL_REF && addend == const0_rtx) 4502 { 4503 /* A non-local symbol reference without addend. 4504 4505 The symbol ref is wrapped into an UNSPEC to make sure the 4506 proper operand modifier (@GOT or @GOTENT) will be emitted. 4507 This will tell the linker to put the symbol into the GOT. 4508 4509 Additionally the code dereferencing the GOT slot is emitted here. 4510 4511 An addend to the symref needs to be added afterwards. 4512 legitimize_pic_address calls itself recursively to handle 4513 that case. So no need to do it here. */ 4514 4515 if (reg == 0) 4516 reg = gen_reg_rtx (Pmode); 4517 4518 if (TARGET_Z10) 4519 { 4520 /* Use load relative if possible. 4521 lgrl <target>, sym@GOTENT */ 4522 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTENT); 4523 new_rtx = gen_rtx_CONST (Pmode, new_rtx); 4524 new_rtx = gen_const_mem (GET_MODE (reg), new_rtx); 4525 4526 emit_move_insn (reg, new_rtx); 4527 new_rtx = reg; 4528 } 4529 else if (flag_pic == 1) 4530 { 4531 /* Assume GOT offset is a valid displacement operand (< 4k 4532 or < 512k with z990). This is handled the same way in 4533 both 31- and 64-bit code (@GOT). 4534 lg <target>, sym@GOT(r12) */ 4535 4536 if (reload_in_progress || reload_completed) 4537 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true); 4538 4539 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT); 4540 new_rtx = gen_rtx_CONST (Pmode, new_rtx); 4541 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx); 4542 new_rtx = gen_const_mem (Pmode, new_rtx); 4543 emit_move_insn (reg, new_rtx); 4544 new_rtx = reg; 4545 } 4546 else if (TARGET_CPU_ZARCH) 4547 { 4548 /* If the GOT offset might be >= 4k, we determine the position 4549 of the GOT entry via a PC-relative LARL (@GOTENT). 4550 larl temp, sym@GOTENT 4551 lg <target>, 0(temp) */ 4552 4553 rtx temp = reg ? reg : gen_reg_rtx (Pmode); 4554 4555 gcc_assert (REGNO (temp) >= FIRST_PSEUDO_REGISTER 4556 || REGNO_REG_CLASS (REGNO (temp)) == ADDR_REGS); 4557 4558 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTENT); 4559 new_rtx = gen_rtx_CONST (Pmode, new_rtx); 4560 emit_move_insn (temp, new_rtx); 4561 4562 new_rtx = gen_const_mem (Pmode, temp); 4563 emit_move_insn (reg, new_rtx); 4564 4565 new_rtx = reg; 4566 } 4567 else 4568 { 4569 /* If the GOT offset might be >= 4k, we have to load it 4570 from the literal pool (@GOT). 4571 4572 lg temp, lit-litbase(r13) 4573 lg <target>, 0(temp) 4574 lit: .long sym@GOT */ 4575 4576 rtx temp = reg ? reg : gen_reg_rtx (Pmode); 4577 4578 gcc_assert (REGNO (temp) >= FIRST_PSEUDO_REGISTER 4579 || REGNO_REG_CLASS (REGNO (temp)) == ADDR_REGS); 4580 4581 if (reload_in_progress || reload_completed) 4582 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true); 4583 4584 addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT); 4585 addr = gen_rtx_CONST (Pmode, addr); 4586 addr = force_const_mem (Pmode, addr); 4587 emit_move_insn (temp, addr); 4588 4589 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp); 4590 new_rtx = gen_const_mem (Pmode, new_rtx); 4591 emit_move_insn (reg, new_rtx); 4592 new_rtx = reg; 4593 } 4594 } 4595 else if (GET_CODE (addr) == UNSPEC && GET_CODE (addend) == CONST_INT) 4596 { 4597 gcc_assert (XVECLEN (addr, 0) == 1); 4598 switch (XINT (addr, 1)) 4599 { 4600 /* These address symbols (or PLT slots) relative to the GOT 4601 (not GOT slots!). In general this will exceed the 4602 displacement range so these value belong into the literal 4603 pool. */ 4604 case UNSPEC_GOTOFF: 4605 case UNSPEC_PLTOFF: 4606 new_rtx = force_const_mem (Pmode, orig); 4607 break; 4608 4609 /* For -fPIC the GOT size might exceed the displacement 4610 range so make sure the value is in the literal pool. */ 4611 case UNSPEC_GOT: 4612 if (flag_pic == 2) 4613 new_rtx = force_const_mem (Pmode, orig); 4614 break; 4615 4616 /* For @GOTENT larl is used. This is handled like local 4617 symbol refs. */ 4618 case UNSPEC_GOTENT: 4619 gcc_unreachable (); 4620 break; 4621 4622 /* @PLT is OK as is on 64-bit, must be converted to 4623 GOT-relative @PLTOFF on 31-bit. */ 4624 case UNSPEC_PLT: 4625 if (!TARGET_CPU_ZARCH) 4626 { 4627 rtx temp = reg? reg : gen_reg_rtx (Pmode); 4628 4629 if (reload_in_progress || reload_completed) 4630 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true); 4631 4632 addr = XVECEXP (addr, 0, 0); 4633 addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 4634 UNSPEC_PLTOFF); 4635 if (addend != const0_rtx) 4636 addr = gen_rtx_PLUS (Pmode, addr, addend); 4637 addr = gen_rtx_CONST (Pmode, addr); 4638 addr = force_const_mem (Pmode, addr); 4639 emit_move_insn (temp, addr); 4640 4641 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp); 4642 if (reg != 0) 4643 { 4644 s390_load_address (reg, new_rtx); 4645 new_rtx = reg; 4646 } 4647 } 4648 else 4649 /* On 64 bit larl can be used. This case is handled like 4650 local symbol refs. */ 4651 gcc_unreachable (); 4652 break; 4653 4654 /* Everything else cannot happen. */ 4655 default: 4656 gcc_unreachable (); 4657 } 4658 } 4659 else if (addend != const0_rtx) 4660 { 4661 /* Otherwise, compute the sum. */ 4662 4663 rtx base = legitimize_pic_address (addr, reg); 4664 new_rtx = legitimize_pic_address (addend, 4665 base == reg ? NULL_RTX : reg); 4666 if (GET_CODE (new_rtx) == CONST_INT) 4667 new_rtx = plus_constant (Pmode, base, INTVAL (new_rtx)); 4668 else 4669 { 4670 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1))) 4671 { 4672 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0)); 4673 new_rtx = XEXP (new_rtx, 1); 4674 } 4675 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx); 4676 } 4677 4678 if (GET_CODE (new_rtx) == CONST) 4679 new_rtx = XEXP (new_rtx, 0); 4680 new_rtx = force_operand (new_rtx, 0); 4681 } 4682 4683 return new_rtx; 4684} 4685 4686/* Load the thread pointer into a register. */ 4687 4688rtx 4689s390_get_thread_pointer (void) 4690{ 4691 rtx tp = gen_reg_rtx (Pmode); 4692 4693 emit_move_insn (tp, gen_rtx_REG (Pmode, TP_REGNUM)); 4694 mark_reg_pointer (tp, BITS_PER_WORD); 4695 4696 return tp; 4697} 4698 4699/* Emit a tls call insn. The call target is the SYMBOL_REF stored 4700 in s390_tls_symbol which always refers to __tls_get_offset. 4701 The returned offset is written to RESULT_REG and an USE rtx is 4702 generated for TLS_CALL. */ 4703 4704static GTY(()) rtx s390_tls_symbol; 4705 4706static void 4707s390_emit_tls_call_insn (rtx result_reg, rtx tls_call) 4708{ 4709 rtx insn; 4710 4711 if (!flag_pic) 4712 emit_insn (s390_load_got ()); 4713 4714 if (!s390_tls_symbol) 4715 s390_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_offset"); 4716 4717 insn = s390_emit_call (s390_tls_symbol, tls_call, result_reg, 4718 gen_rtx_REG (Pmode, RETURN_REGNUM)); 4719 4720 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), result_reg); 4721 RTL_CONST_CALL_P (insn) = 1; 4722} 4723 4724/* ADDR contains a thread-local SYMBOL_REF. Generate code to compute 4725 this (thread-local) address. REG may be used as temporary. */ 4726 4727static rtx 4728legitimize_tls_address (rtx addr, rtx reg) 4729{ 4730 rtx new_rtx, tls_call, temp, base, r2, insn; 4731 4732 if (GET_CODE (addr) == SYMBOL_REF) 4733 switch (tls_symbolic_operand (addr)) 4734 { 4735 case TLS_MODEL_GLOBAL_DYNAMIC: 4736 start_sequence (); 4737 r2 = gen_rtx_REG (Pmode, 2); 4738 tls_call = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_TLSGD); 4739 new_rtx = gen_rtx_CONST (Pmode, tls_call); 4740 new_rtx = force_const_mem (Pmode, new_rtx); 4741 emit_move_insn (r2, new_rtx); 4742 s390_emit_tls_call_insn (r2, tls_call); 4743 insn = get_insns (); 4744 end_sequence (); 4745 4746 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_NTPOFF); 4747 temp = gen_reg_rtx (Pmode); 4748 emit_libcall_block (insn, temp, r2, new_rtx); 4749 4750 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp); 4751 if (reg != 0) 4752 { 4753 s390_load_address (reg, new_rtx); 4754 new_rtx = reg; 4755 } 4756 break; 4757 4758 case TLS_MODEL_LOCAL_DYNAMIC: 4759 start_sequence (); 4760 r2 = gen_rtx_REG (Pmode, 2); 4761 tls_call = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TLSLDM); 4762 new_rtx = gen_rtx_CONST (Pmode, tls_call); 4763 new_rtx = force_const_mem (Pmode, new_rtx); 4764 emit_move_insn (r2, new_rtx); 4765 s390_emit_tls_call_insn (r2, tls_call); 4766 insn = get_insns (); 4767 end_sequence (); 4768 4769 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TLSLDM_NTPOFF); 4770 temp = gen_reg_rtx (Pmode); 4771 emit_libcall_block (insn, temp, r2, new_rtx); 4772 4773 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp); 4774 base = gen_reg_rtx (Pmode); 4775 s390_load_address (base, new_rtx); 4776 4777 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_DTPOFF); 4778 new_rtx = gen_rtx_CONST (Pmode, new_rtx); 4779 new_rtx = force_const_mem (Pmode, new_rtx); 4780 temp = gen_reg_rtx (Pmode); 4781 emit_move_insn (temp, new_rtx); 4782 4783 new_rtx = gen_rtx_PLUS (Pmode, base, temp); 4784 if (reg != 0) 4785 { 4786 s390_load_address (reg, new_rtx); 4787 new_rtx = reg; 4788 } 4789 break; 4790 4791 case TLS_MODEL_INITIAL_EXEC: 4792 if (flag_pic == 1) 4793 { 4794 /* Assume GOT offset < 4k. This is handled the same way 4795 in both 31- and 64-bit code. */ 4796 4797 if (reload_in_progress || reload_completed) 4798 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true); 4799 4800 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTNTPOFF); 4801 new_rtx = gen_rtx_CONST (Pmode, new_rtx); 4802 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx); 4803 new_rtx = gen_const_mem (Pmode, new_rtx); 4804 temp = gen_reg_rtx (Pmode); 4805 emit_move_insn (temp, new_rtx); 4806 } 4807 else if (TARGET_CPU_ZARCH) 4808 { 4809 /* If the GOT offset might be >= 4k, we determine the position 4810 of the GOT entry via a PC-relative LARL. */ 4811 4812 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_INDNTPOFF); 4813 new_rtx = gen_rtx_CONST (Pmode, new_rtx); 4814 temp = gen_reg_rtx (Pmode); 4815 emit_move_insn (temp, new_rtx); 4816 4817 new_rtx = gen_const_mem (Pmode, temp); 4818 temp = gen_reg_rtx (Pmode); 4819 emit_move_insn (temp, new_rtx); 4820 } 4821 else if (flag_pic) 4822 { 4823 /* If the GOT offset might be >= 4k, we have to load it 4824 from the literal pool. */ 4825 4826 if (reload_in_progress || reload_completed) 4827 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true); 4828 4829 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTNTPOFF); 4830 new_rtx = gen_rtx_CONST (Pmode, new_rtx); 4831 new_rtx = force_const_mem (Pmode, new_rtx); 4832 temp = gen_reg_rtx (Pmode); 4833 emit_move_insn (temp, new_rtx); 4834 4835 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp); 4836 new_rtx = gen_const_mem (Pmode, new_rtx); 4837 4838 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, new_rtx, addr), UNSPEC_TLS_LOAD); 4839 temp = gen_reg_rtx (Pmode); 4840 emit_insn (gen_rtx_SET (Pmode, temp, new_rtx)); 4841 } 4842 else 4843 { 4844 /* In position-dependent code, load the absolute address of 4845 the GOT entry from the literal pool. */ 4846 4847 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_INDNTPOFF); 4848 new_rtx = gen_rtx_CONST (Pmode, new_rtx); 4849 new_rtx = force_const_mem (Pmode, new_rtx); 4850 temp = gen_reg_rtx (Pmode); 4851 emit_move_insn (temp, new_rtx); 4852 4853 new_rtx = temp; 4854 new_rtx = gen_const_mem (Pmode, new_rtx); 4855 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, new_rtx, addr), UNSPEC_TLS_LOAD); 4856 temp = gen_reg_rtx (Pmode); 4857 emit_insn (gen_rtx_SET (Pmode, temp, new_rtx)); 4858 } 4859 4860 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp); 4861 if (reg != 0) 4862 { 4863 s390_load_address (reg, new_rtx); 4864 new_rtx = reg; 4865 } 4866 break; 4867 4868 case TLS_MODEL_LOCAL_EXEC: 4869 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_NTPOFF); 4870 new_rtx = gen_rtx_CONST (Pmode, new_rtx); 4871 new_rtx = force_const_mem (Pmode, new_rtx); 4872 temp = gen_reg_rtx (Pmode); 4873 emit_move_insn (temp, new_rtx); 4874 4875 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp); 4876 if (reg != 0) 4877 { 4878 s390_load_address (reg, new_rtx); 4879 new_rtx = reg; 4880 } 4881 break; 4882 4883 default: 4884 gcc_unreachable (); 4885 } 4886 4887 else if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == UNSPEC) 4888 { 4889 switch (XINT (XEXP (addr, 0), 1)) 4890 { 4891 case UNSPEC_INDNTPOFF: 4892 gcc_assert (TARGET_CPU_ZARCH); 4893 new_rtx = addr; 4894 break; 4895 4896 default: 4897 gcc_unreachable (); 4898 } 4899 } 4900 4901 else if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS 4902 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT) 4903 { 4904 new_rtx = XEXP (XEXP (addr, 0), 0); 4905 if (GET_CODE (new_rtx) != SYMBOL_REF) 4906 new_rtx = gen_rtx_CONST (Pmode, new_rtx); 4907 4908 new_rtx = legitimize_tls_address (new_rtx, reg); 4909 new_rtx = plus_constant (Pmode, new_rtx, 4910 INTVAL (XEXP (XEXP (addr, 0), 1))); 4911 new_rtx = force_operand (new_rtx, 0); 4912 } 4913 4914 else 4915 gcc_unreachable (); /* for now ... */ 4916 4917 return new_rtx; 4918} 4919 4920/* Emit insns making the address in operands[1] valid for a standard 4921 move to operands[0]. operands[1] is replaced by an address which 4922 should be used instead of the former RTX to emit the move 4923 pattern. */ 4924 4925void 4926emit_symbolic_move (rtx *operands) 4927{ 4928 rtx temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode); 4929 4930 if (GET_CODE (operands[0]) == MEM) 4931 operands[1] = force_reg (Pmode, operands[1]); 4932 else if (TLS_SYMBOLIC_CONST (operands[1])) 4933 operands[1] = legitimize_tls_address (operands[1], temp); 4934 else if (flag_pic) 4935 operands[1] = legitimize_pic_address (operands[1], temp); 4936} 4937 4938/* Try machine-dependent ways of modifying an illegitimate address X 4939 to be legitimate. If we find one, return the new, valid address. 4940 4941 OLDX is the address as it was before break_out_memory_refs was called. 4942 In some cases it is useful to look at this to decide what needs to be done. 4943 4944 MODE is the mode of the operand pointed to by X. 4945 4946 When -fpic is used, special handling is needed for symbolic references. 4947 See comments by legitimize_pic_address for details. */ 4948 4949static rtx 4950s390_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, 4951 machine_mode mode ATTRIBUTE_UNUSED) 4952{ 4953 rtx constant_term = const0_rtx; 4954 4955 if (TLS_SYMBOLIC_CONST (x)) 4956 { 4957 x = legitimize_tls_address (x, 0); 4958 4959 if (s390_legitimate_address_p (mode, x, FALSE)) 4960 return x; 4961 } 4962 else if (GET_CODE (x) == PLUS 4963 && (TLS_SYMBOLIC_CONST (XEXP (x, 0)) 4964 || TLS_SYMBOLIC_CONST (XEXP (x, 1)))) 4965 { 4966 return x; 4967 } 4968 else if (flag_pic) 4969 { 4970 if (SYMBOLIC_CONST (x) 4971 || (GET_CODE (x) == PLUS 4972 && (SYMBOLIC_CONST (XEXP (x, 0)) 4973 || SYMBOLIC_CONST (XEXP (x, 1))))) 4974 x = legitimize_pic_address (x, 0); 4975 4976 if (s390_legitimate_address_p (mode, x, FALSE)) 4977 return x; 4978 } 4979 4980 x = eliminate_constant_term (x, &constant_term); 4981 4982 /* Optimize loading of large displacements by splitting them 4983 into the multiple of 4K and the rest; this allows the 4984 former to be CSE'd if possible. 4985 4986 Don't do this if the displacement is added to a register 4987 pointing into the stack frame, as the offsets will 4988 change later anyway. */ 4989 4990 if (GET_CODE (constant_term) == CONST_INT 4991 && !TARGET_LONG_DISPLACEMENT 4992 && !DISP_IN_RANGE (INTVAL (constant_term)) 4993 && !(REG_P (x) && REGNO_PTR_FRAME_P (REGNO (x)))) 4994 { 4995 HOST_WIDE_INT lower = INTVAL (constant_term) & 0xfff; 4996 HOST_WIDE_INT upper = INTVAL (constant_term) ^ lower; 4997 4998 rtx temp = gen_reg_rtx (Pmode); 4999 rtx val = force_operand (GEN_INT (upper), temp); 5000 if (val != temp) 5001 emit_move_insn (temp, val); 5002 5003 x = gen_rtx_PLUS (Pmode, x, temp); 5004 constant_term = GEN_INT (lower); 5005 } 5006 5007 if (GET_CODE (x) == PLUS) 5008 { 5009 if (GET_CODE (XEXP (x, 0)) == REG) 5010 { 5011 rtx temp = gen_reg_rtx (Pmode); 5012 rtx val = force_operand (XEXP (x, 1), temp); 5013 if (val != temp) 5014 emit_move_insn (temp, val); 5015 5016 x = gen_rtx_PLUS (Pmode, XEXP (x, 0), temp); 5017 } 5018 5019 else if (GET_CODE (XEXP (x, 1)) == REG) 5020 { 5021 rtx temp = gen_reg_rtx (Pmode); 5022 rtx val = force_operand (XEXP (x, 0), temp); 5023 if (val != temp) 5024 emit_move_insn (temp, val); 5025 5026 x = gen_rtx_PLUS (Pmode, temp, XEXP (x, 1)); 5027 } 5028 } 5029 5030 if (constant_term != const0_rtx) 5031 x = gen_rtx_PLUS (Pmode, x, constant_term); 5032 5033 return x; 5034} 5035 5036/* Try a machine-dependent way of reloading an illegitimate address AD 5037 operand. If we find one, push the reload and return the new address. 5038 5039 MODE is the mode of the enclosing MEM. OPNUM is the operand number 5040 and TYPE is the reload type of the current reload. */ 5041 5042rtx 5043legitimize_reload_address (rtx ad, machine_mode mode ATTRIBUTE_UNUSED, 5044 int opnum, int type) 5045{ 5046 if (!optimize || TARGET_LONG_DISPLACEMENT) 5047 return NULL_RTX; 5048 5049 if (GET_CODE (ad) == PLUS) 5050 { 5051 rtx tem = simplify_binary_operation (PLUS, Pmode, 5052 XEXP (ad, 0), XEXP (ad, 1)); 5053 if (tem) 5054 ad = tem; 5055 } 5056 5057 if (GET_CODE (ad) == PLUS 5058 && GET_CODE (XEXP (ad, 0)) == REG 5059 && GET_CODE (XEXP (ad, 1)) == CONST_INT 5060 && !DISP_IN_RANGE (INTVAL (XEXP (ad, 1)))) 5061 { 5062 HOST_WIDE_INT lower = INTVAL (XEXP (ad, 1)) & 0xfff; 5063 HOST_WIDE_INT upper = INTVAL (XEXP (ad, 1)) ^ lower; 5064 rtx cst, tem, new_rtx; 5065 5066 cst = GEN_INT (upper); 5067 if (!legitimate_reload_constant_p (cst)) 5068 cst = force_const_mem (Pmode, cst); 5069 5070 tem = gen_rtx_PLUS (Pmode, XEXP (ad, 0), cst); 5071 new_rtx = gen_rtx_PLUS (Pmode, tem, GEN_INT (lower)); 5072 5073 push_reload (XEXP (tem, 1), 0, &XEXP (tem, 1), 0, 5074 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, 5075 opnum, (enum reload_type) type); 5076 return new_rtx; 5077 } 5078 5079 return NULL_RTX; 5080} 5081 5082/* Emit code to move LEN bytes from DST to SRC. */ 5083 5084bool 5085s390_expand_movmem (rtx dst, rtx src, rtx len) 5086{ 5087 /* When tuning for z10 or higher we rely on the Glibc functions to 5088 do the right thing. Only for constant lengths below 64k we will 5089 generate inline code. */ 5090 if (s390_tune >= PROCESSOR_2097_Z10 5091 && (GET_CODE (len) != CONST_INT || INTVAL (len) > (1<<16))) 5092 return false; 5093 5094 if (GET_CODE (len) == CONST_INT && INTVAL (len) >= 0 && INTVAL (len) <= 256) 5095 { 5096 if (INTVAL (len) > 0) 5097 emit_insn (gen_movmem_short (dst, src, GEN_INT (INTVAL (len) - 1))); 5098 } 5099 5100 else if (TARGET_MVCLE) 5101 { 5102 emit_insn (gen_movmem_long (dst, src, convert_to_mode (Pmode, len, 1))); 5103 } 5104 5105 else 5106 { 5107 rtx dst_addr, src_addr, count, blocks, temp; 5108 rtx_code_label *loop_start_label = gen_label_rtx (); 5109 rtx_code_label *loop_end_label = gen_label_rtx (); 5110 rtx_code_label *end_label = gen_label_rtx (); 5111 machine_mode mode; 5112 5113 mode = GET_MODE (len); 5114 if (mode == VOIDmode) 5115 mode = Pmode; 5116 5117 dst_addr = gen_reg_rtx (Pmode); 5118 src_addr = gen_reg_rtx (Pmode); 5119 count = gen_reg_rtx (mode); 5120 blocks = gen_reg_rtx (mode); 5121 5122 convert_move (count, len, 1); 5123 emit_cmp_and_jump_insns (count, const0_rtx, 5124 EQ, NULL_RTX, mode, 1, end_label); 5125 5126 emit_move_insn (dst_addr, force_operand (XEXP (dst, 0), NULL_RTX)); 5127 emit_move_insn (src_addr, force_operand (XEXP (src, 0), NULL_RTX)); 5128 dst = change_address (dst, VOIDmode, dst_addr); 5129 src = change_address (src, VOIDmode, src_addr); 5130 5131 temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1, 5132 OPTAB_DIRECT); 5133 if (temp != count) 5134 emit_move_insn (count, temp); 5135 5136 temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1, 5137 OPTAB_DIRECT); 5138 if (temp != blocks) 5139 emit_move_insn (blocks, temp); 5140 5141 emit_cmp_and_jump_insns (blocks, const0_rtx, 5142 EQ, NULL_RTX, mode, 1, loop_end_label); 5143 5144 emit_label (loop_start_label); 5145 5146 if (TARGET_Z10 5147 && (GET_CODE (len) != CONST_INT || INTVAL (len) > 768)) 5148 { 5149 rtx prefetch; 5150 5151 /* Issue a read prefetch for the +3 cache line. */ 5152 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, src_addr, GEN_INT (768)), 5153 const0_rtx, const0_rtx); 5154 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true; 5155 emit_insn (prefetch); 5156 5157 /* Issue a write prefetch for the +3 cache line. */ 5158 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (768)), 5159 const1_rtx, const0_rtx); 5160 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true; 5161 emit_insn (prefetch); 5162 } 5163 5164 emit_insn (gen_movmem_short (dst, src, GEN_INT (255))); 5165 s390_load_address (dst_addr, 5166 gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (256))); 5167 s390_load_address (src_addr, 5168 gen_rtx_PLUS (Pmode, src_addr, GEN_INT (256))); 5169 5170 temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1, 5171 OPTAB_DIRECT); 5172 if (temp != blocks) 5173 emit_move_insn (blocks, temp); 5174 5175 emit_cmp_and_jump_insns (blocks, const0_rtx, 5176 EQ, NULL_RTX, mode, 1, loop_end_label); 5177 5178 emit_jump (loop_start_label); 5179 emit_label (loop_end_label); 5180 5181 emit_insn (gen_movmem_short (dst, src, 5182 convert_to_mode (Pmode, count, 1))); 5183 emit_label (end_label); 5184 } 5185 return true; 5186} 5187 5188/* Emit code to set LEN bytes at DST to VAL. 5189 Make use of clrmem if VAL is zero. */ 5190 5191void 5192s390_expand_setmem (rtx dst, rtx len, rtx val) 5193{ 5194 if (GET_CODE (len) == CONST_INT && INTVAL (len) == 0) 5195 return; 5196 5197 gcc_assert (GET_CODE (val) == CONST_INT || GET_MODE (val) == QImode); 5198 5199 if (GET_CODE (len) == CONST_INT && INTVAL (len) > 0 && INTVAL (len) <= 257) 5200 { 5201 if (val == const0_rtx && INTVAL (len) <= 256) 5202 emit_insn (gen_clrmem_short (dst, GEN_INT (INTVAL (len) - 1))); 5203 else 5204 { 5205 /* Initialize memory by storing the first byte. */ 5206 emit_move_insn (adjust_address (dst, QImode, 0), val); 5207 5208 if (INTVAL (len) > 1) 5209 { 5210 /* Initiate 1 byte overlap move. 5211 The first byte of DST is propagated through DSTP1. 5212 Prepare a movmem for: DST+1 = DST (length = LEN - 1). 5213 DST is set to size 1 so the rest of the memory location 5214 does not count as source operand. */ 5215 rtx dstp1 = adjust_address (dst, VOIDmode, 1); 5216 set_mem_size (dst, 1); 5217 5218 emit_insn (gen_movmem_short (dstp1, dst, 5219 GEN_INT (INTVAL (len) - 2))); 5220 } 5221 } 5222 } 5223 5224 else if (TARGET_MVCLE) 5225 { 5226 val = force_not_mem (convert_modes (Pmode, QImode, val, 1)); 5227 emit_insn (gen_setmem_long (dst, convert_to_mode (Pmode, len, 1), val)); 5228 } 5229 5230 else 5231 { 5232 rtx dst_addr, count, blocks, temp, dstp1 = NULL_RTX; 5233 rtx_code_label *loop_start_label = gen_label_rtx (); 5234 rtx_code_label *loop_end_label = gen_label_rtx (); 5235 rtx_code_label *end_label = gen_label_rtx (); 5236 machine_mode mode; 5237 5238 mode = GET_MODE (len); 5239 if (mode == VOIDmode) 5240 mode = Pmode; 5241 5242 dst_addr = gen_reg_rtx (Pmode); 5243 count = gen_reg_rtx (mode); 5244 blocks = gen_reg_rtx (mode); 5245 5246 convert_move (count, len, 1); 5247 emit_cmp_and_jump_insns (count, const0_rtx, 5248 EQ, NULL_RTX, mode, 1, end_label); 5249 5250 emit_move_insn (dst_addr, force_operand (XEXP (dst, 0), NULL_RTX)); 5251 dst = change_address (dst, VOIDmode, dst_addr); 5252 5253 if (val == const0_rtx) 5254 temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1, 5255 OPTAB_DIRECT); 5256 else 5257 { 5258 dstp1 = adjust_address (dst, VOIDmode, 1); 5259 set_mem_size (dst, 1); 5260 5261 /* Initialize memory by storing the first byte. */ 5262 emit_move_insn (adjust_address (dst, QImode, 0), val); 5263 5264 /* If count is 1 we are done. */ 5265 emit_cmp_and_jump_insns (count, const1_rtx, 5266 EQ, NULL_RTX, mode, 1, end_label); 5267 5268 temp = expand_binop (mode, add_optab, count, GEN_INT (-2), count, 1, 5269 OPTAB_DIRECT); 5270 } 5271 if (temp != count) 5272 emit_move_insn (count, temp); 5273 5274 temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1, 5275 OPTAB_DIRECT); 5276 if (temp != blocks) 5277 emit_move_insn (blocks, temp); 5278 5279 emit_cmp_and_jump_insns (blocks, const0_rtx, 5280 EQ, NULL_RTX, mode, 1, loop_end_label); 5281 5282 emit_label (loop_start_label); 5283 5284 if (TARGET_Z10 5285 && (GET_CODE (len) != CONST_INT || INTVAL (len) > 1024)) 5286 { 5287 /* Issue a write prefetch for the +4 cache line. */ 5288 rtx prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, dst_addr, 5289 GEN_INT (1024)), 5290 const1_rtx, const0_rtx); 5291 emit_insn (prefetch); 5292 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true; 5293 } 5294 5295 if (val == const0_rtx) 5296 emit_insn (gen_clrmem_short (dst, GEN_INT (255))); 5297 else 5298 emit_insn (gen_movmem_short (dstp1, dst, GEN_INT (255))); 5299 s390_load_address (dst_addr, 5300 gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (256))); 5301 5302 temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1, 5303 OPTAB_DIRECT); 5304 if (temp != blocks) 5305 emit_move_insn (blocks, temp); 5306 5307 emit_cmp_and_jump_insns (blocks, const0_rtx, 5308 EQ, NULL_RTX, mode, 1, loop_end_label); 5309 5310 emit_jump (loop_start_label); 5311 emit_label (loop_end_label); 5312 5313 if (val == const0_rtx) 5314 emit_insn (gen_clrmem_short (dst, convert_to_mode (Pmode, count, 1))); 5315 else 5316 emit_insn (gen_movmem_short (dstp1, dst, convert_to_mode (Pmode, count, 1))); 5317 emit_label (end_label); 5318 } 5319} 5320 5321/* Emit code to compare LEN bytes at OP0 with those at OP1, 5322 and return the result in TARGET. */ 5323 5324bool 5325s390_expand_cmpmem (rtx target, rtx op0, rtx op1, rtx len) 5326{ 5327 rtx ccreg = gen_rtx_REG (CCUmode, CC_REGNUM); 5328 rtx tmp; 5329 5330 /* When tuning for z10 or higher we rely on the Glibc functions to 5331 do the right thing. Only for constant lengths below 64k we will 5332 generate inline code. */ 5333 if (s390_tune >= PROCESSOR_2097_Z10 5334 && (GET_CODE (len) != CONST_INT || INTVAL (len) > (1<<16))) 5335 return false; 5336 5337 /* As the result of CMPINT is inverted compared to what we need, 5338 we have to swap the operands. */ 5339 tmp = op0; op0 = op1; op1 = tmp; 5340 5341 if (GET_CODE (len) == CONST_INT && INTVAL (len) >= 0 && INTVAL (len) <= 256) 5342 { 5343 if (INTVAL (len) > 0) 5344 { 5345 emit_insn (gen_cmpmem_short (op0, op1, GEN_INT (INTVAL (len) - 1))); 5346 emit_insn (gen_cmpint (target, ccreg)); 5347 } 5348 else 5349 emit_move_insn (target, const0_rtx); 5350 } 5351 else if (TARGET_MVCLE) 5352 { 5353 emit_insn (gen_cmpmem_long (op0, op1, convert_to_mode (Pmode, len, 1))); 5354 emit_insn (gen_cmpint (target, ccreg)); 5355 } 5356 else 5357 { 5358 rtx addr0, addr1, count, blocks, temp; 5359 rtx_code_label *loop_start_label = gen_label_rtx (); 5360 rtx_code_label *loop_end_label = gen_label_rtx (); 5361 rtx_code_label *end_label = gen_label_rtx (); 5362 machine_mode mode; 5363 5364 mode = GET_MODE (len); 5365 if (mode == VOIDmode) 5366 mode = Pmode; 5367 5368 addr0 = gen_reg_rtx (Pmode); 5369 addr1 = gen_reg_rtx (Pmode); 5370 count = gen_reg_rtx (mode); 5371 blocks = gen_reg_rtx (mode); 5372 5373 convert_move (count, len, 1); 5374 emit_cmp_and_jump_insns (count, const0_rtx, 5375 EQ, NULL_RTX, mode, 1, end_label); 5376 5377 emit_move_insn (addr0, force_operand (XEXP (op0, 0), NULL_RTX)); 5378 emit_move_insn (addr1, force_operand (XEXP (op1, 0), NULL_RTX)); 5379 op0 = change_address (op0, VOIDmode, addr0); 5380 op1 = change_address (op1, VOIDmode, addr1); 5381 5382 temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1, 5383 OPTAB_DIRECT); 5384 if (temp != count) 5385 emit_move_insn (count, temp); 5386 5387 temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1, 5388 OPTAB_DIRECT); 5389 if (temp != blocks) 5390 emit_move_insn (blocks, temp); 5391 5392 emit_cmp_and_jump_insns (blocks, const0_rtx, 5393 EQ, NULL_RTX, mode, 1, loop_end_label); 5394 5395 emit_label (loop_start_label); 5396 5397 if (TARGET_Z10 5398 && (GET_CODE (len) != CONST_INT || INTVAL (len) > 512)) 5399 { 5400 rtx prefetch; 5401 5402 /* Issue a read prefetch for the +2 cache line of operand 1. */ 5403 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, addr0, GEN_INT (512)), 5404 const0_rtx, const0_rtx); 5405 emit_insn (prefetch); 5406 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true; 5407 5408 /* Issue a read prefetch for the +2 cache line of operand 2. */ 5409 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, addr1, GEN_INT (512)), 5410 const0_rtx, const0_rtx); 5411 emit_insn (prefetch); 5412 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true; 5413 } 5414 5415 emit_insn (gen_cmpmem_short (op0, op1, GEN_INT (255))); 5416 temp = gen_rtx_NE (VOIDmode, ccreg, const0_rtx); 5417 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp, 5418 gen_rtx_LABEL_REF (VOIDmode, end_label), pc_rtx); 5419 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp); 5420 emit_jump_insn (temp); 5421 5422 s390_load_address (addr0, 5423 gen_rtx_PLUS (Pmode, addr0, GEN_INT (256))); 5424 s390_load_address (addr1, 5425 gen_rtx_PLUS (Pmode, addr1, GEN_INT (256))); 5426 5427 temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1, 5428 OPTAB_DIRECT); 5429 if (temp != blocks) 5430 emit_move_insn (blocks, temp); 5431 5432 emit_cmp_and_jump_insns (blocks, const0_rtx, 5433 EQ, NULL_RTX, mode, 1, loop_end_label); 5434 5435 emit_jump (loop_start_label); 5436 emit_label (loop_end_label); 5437 5438 emit_insn (gen_cmpmem_short (op0, op1, 5439 convert_to_mode (Pmode, count, 1))); 5440 emit_label (end_label); 5441 5442 emit_insn (gen_cmpint (target, ccreg)); 5443 } 5444 return true; 5445} 5446 5447/* Emit a conditional jump to LABEL for condition code mask MASK using 5448 comparsion operator COMPARISON. Return the emitted jump insn. */ 5449 5450static rtx 5451s390_emit_ccraw_jump (HOST_WIDE_INT mask, enum rtx_code comparison, rtx label) 5452{ 5453 rtx temp; 5454 5455 gcc_assert (comparison == EQ || comparison == NE); 5456 gcc_assert (mask > 0 && mask < 15); 5457 5458 temp = gen_rtx_fmt_ee (comparison, VOIDmode, 5459 gen_rtx_REG (CCRAWmode, CC_REGNUM), GEN_INT (mask)); 5460 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp, 5461 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx); 5462 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp); 5463 return emit_jump_insn (temp); 5464} 5465 5466/* Emit the instructions to implement strlen of STRING and store the 5467 result in TARGET. The string has the known ALIGNMENT. This 5468 version uses vector instructions and is therefore not appropriate 5469 for targets prior to z13. */ 5470 5471void 5472s390_expand_vec_strlen (rtx target, rtx string, rtx alignment) 5473{ 5474 int very_unlikely = REG_BR_PROB_BASE / 100 - 1; 5475 int very_likely = REG_BR_PROB_BASE - 1; 5476 rtx highest_index_to_load_reg = gen_reg_rtx (Pmode); 5477 rtx str_reg = gen_reg_rtx (V16QImode); 5478 rtx str_addr_base_reg = gen_reg_rtx (Pmode); 5479 rtx str_idx_reg = gen_reg_rtx (Pmode); 5480 rtx result_reg = gen_reg_rtx (V16QImode); 5481 rtx is_aligned_label = gen_label_rtx (); 5482 rtx into_loop_label = NULL_RTX; 5483 rtx loop_start_label = gen_label_rtx (); 5484 rtx temp; 5485 rtx len = gen_reg_rtx (QImode); 5486 rtx cond; 5487 5488 s390_load_address (str_addr_base_reg, XEXP (string, 0)); 5489 emit_move_insn (str_idx_reg, const0_rtx); 5490 5491 if (INTVAL (alignment) < 16) 5492 { 5493 /* Check whether the address happens to be aligned properly so 5494 jump directly to the aligned loop. */ 5495 emit_cmp_and_jump_insns (gen_rtx_AND (Pmode, 5496 str_addr_base_reg, GEN_INT (15)), 5497 const0_rtx, EQ, NULL_RTX, 5498 Pmode, 1, is_aligned_label); 5499 5500 temp = gen_reg_rtx (Pmode); 5501 temp = expand_binop (Pmode, and_optab, str_addr_base_reg, 5502 GEN_INT (15), temp, 1, OPTAB_DIRECT); 5503 gcc_assert (REG_P (temp)); 5504 highest_index_to_load_reg = 5505 expand_binop (Pmode, sub_optab, GEN_INT (15), temp, 5506 highest_index_to_load_reg, 1, OPTAB_DIRECT); 5507 gcc_assert (REG_P (highest_index_to_load_reg)); 5508 emit_insn (gen_vllv16qi (str_reg, 5509 convert_to_mode (SImode, highest_index_to_load_reg, 1), 5510 gen_rtx_MEM (BLKmode, str_addr_base_reg))); 5511 5512 into_loop_label = gen_label_rtx (); 5513 s390_emit_jump (into_loop_label, NULL_RTX); 5514 emit_barrier (); 5515 } 5516 5517 emit_label (is_aligned_label); 5518 LABEL_NUSES (is_aligned_label) = INTVAL (alignment) < 16 ? 2 : 1; 5519 5520 /* Reaching this point we are only performing 16 bytes aligned 5521 loads. */ 5522 emit_move_insn (highest_index_to_load_reg, GEN_INT (15)); 5523 5524 emit_label (loop_start_label); 5525 LABEL_NUSES (loop_start_label) = 1; 5526 5527 /* Load 16 bytes of the string into VR. */ 5528 emit_move_insn (str_reg, 5529 gen_rtx_MEM (V16QImode, 5530 gen_rtx_PLUS (Pmode, str_idx_reg, 5531 str_addr_base_reg))); 5532 if (into_loop_label != NULL_RTX) 5533 { 5534 emit_label (into_loop_label); 5535 LABEL_NUSES (into_loop_label) = 1; 5536 } 5537 5538 /* Increment string index by 16 bytes. */ 5539 expand_binop (Pmode, add_optab, str_idx_reg, GEN_INT (16), 5540 str_idx_reg, 1, OPTAB_DIRECT); 5541 5542 emit_insn (gen_vec_vfenesv16qi (result_reg, str_reg, str_reg, 5543 GEN_INT (VSTRING_FLAG_ZS | VSTRING_FLAG_CS))); 5544 5545 add_int_reg_note (s390_emit_ccraw_jump (8, NE, loop_start_label), 5546 REG_BR_PROB, very_likely); 5547 emit_insn (gen_vec_extractv16qi (len, result_reg, GEN_INT (7))); 5548 5549 /* If the string pointer wasn't aligned we have loaded less then 16 5550 bytes and the remaining bytes got filled with zeros (by vll). 5551 Now we have to check whether the resulting index lies within the 5552 bytes actually part of the string. */ 5553 5554 cond = s390_emit_compare (GT, convert_to_mode (Pmode, len, 1), 5555 highest_index_to_load_reg); 5556 s390_load_address (highest_index_to_load_reg, 5557 gen_rtx_PLUS (Pmode, highest_index_to_load_reg, 5558 const1_rtx)); 5559 if (TARGET_64BIT) 5560 emit_insn (gen_movdicc (str_idx_reg, cond, 5561 highest_index_to_load_reg, str_idx_reg)); 5562 else 5563 emit_insn (gen_movsicc (str_idx_reg, cond, 5564 highest_index_to_load_reg, str_idx_reg)); 5565 5566 add_int_reg_note (s390_emit_jump (is_aligned_label, cond), REG_BR_PROB, 5567 very_unlikely); 5568 5569 expand_binop (Pmode, add_optab, str_idx_reg, 5570 GEN_INT (-16), str_idx_reg, 1, OPTAB_DIRECT); 5571 /* FIXME: len is already zero extended - so avoid the llgcr emitted 5572 here. */ 5573 temp = expand_binop (Pmode, add_optab, str_idx_reg, 5574 convert_to_mode (Pmode, len, 1), 5575 target, 1, OPTAB_DIRECT); 5576 if (temp != target) 5577 emit_move_insn (target, temp); 5578} 5579 5580void 5581s390_expand_vec_movstr (rtx result, rtx dst, rtx src) 5582{ 5583 int very_unlikely = REG_BR_PROB_BASE / 100 - 1; 5584 rtx temp = gen_reg_rtx (Pmode); 5585 rtx src_addr = XEXP (src, 0); 5586 rtx dst_addr = XEXP (dst, 0); 5587 rtx src_addr_reg = gen_reg_rtx (Pmode); 5588 rtx dst_addr_reg = gen_reg_rtx (Pmode); 5589 rtx offset = gen_reg_rtx (Pmode); 5590 rtx vsrc = gen_reg_rtx (V16QImode); 5591 rtx vpos = gen_reg_rtx (V16QImode); 5592 rtx loadlen = gen_reg_rtx (SImode); 5593 rtx gpos_qi = gen_reg_rtx(QImode); 5594 rtx gpos = gen_reg_rtx (SImode); 5595 rtx done_label = gen_label_rtx (); 5596 rtx loop_label = gen_label_rtx (); 5597 rtx exit_label = gen_label_rtx (); 5598 rtx full_label = gen_label_rtx (); 5599 5600 /* Perform a quick check for string ending on the first up to 16 5601 bytes and exit early if successful. */ 5602 5603 emit_insn (gen_vlbb (vsrc, src, GEN_INT (6))); 5604 emit_insn (gen_lcbb (loadlen, src_addr, GEN_INT (6))); 5605 emit_insn (gen_vfenezv16qi (vpos, vsrc, vsrc)); 5606 emit_insn (gen_vec_extractv16qi (gpos_qi, vpos, GEN_INT (7))); 5607 emit_move_insn (gpos, gen_rtx_SUBREG (SImode, gpos_qi, 0)); 5608 /* gpos is the byte index if a zero was found and 16 otherwise. 5609 So if it is lower than the loaded bytes we have a hit. */ 5610 emit_cmp_and_jump_insns (gpos, loadlen, GE, NULL_RTX, SImode, 1, 5611 full_label); 5612 emit_insn (gen_vstlv16qi (vsrc, gpos, dst)); 5613 5614 force_expand_binop (Pmode, add_optab, dst_addr, gpos, result, 5615 1, OPTAB_DIRECT); 5616 emit_jump (exit_label); 5617 emit_barrier (); 5618 5619 emit_label (full_label); 5620 LABEL_NUSES (full_label) = 1; 5621 5622 /* Calculate `offset' so that src + offset points to the last byte 5623 before 16 byte alignment. */ 5624 5625 /* temp = src_addr & 0xf */ 5626 force_expand_binop (Pmode, and_optab, src_addr, GEN_INT (15), temp, 5627 1, OPTAB_DIRECT); 5628 5629 /* offset = 0xf - temp */ 5630 emit_move_insn (offset, GEN_INT (15)); 5631 force_expand_binop (Pmode, sub_optab, offset, temp, offset, 5632 1, OPTAB_DIRECT); 5633 5634 /* Store `offset' bytes in the dstination string. The quick check 5635 has loaded at least `offset' bytes into vsrc. */ 5636 5637 emit_insn (gen_vstlv16qi (vsrc, gen_lowpart (SImode, offset), dst)); 5638 5639 /* Advance to the next byte to be loaded. */ 5640 force_expand_binop (Pmode, add_optab, offset, const1_rtx, offset, 5641 1, OPTAB_DIRECT); 5642 5643 /* Make sure the addresses are single regs which can be used as a 5644 base. */ 5645 emit_move_insn (src_addr_reg, src_addr); 5646 emit_move_insn (dst_addr_reg, dst_addr); 5647 5648 /* MAIN LOOP */ 5649 5650 emit_label (loop_label); 5651 LABEL_NUSES (loop_label) = 1; 5652 5653 emit_move_insn (vsrc, 5654 gen_rtx_MEM (V16QImode, 5655 gen_rtx_PLUS (Pmode, src_addr_reg, offset))); 5656 5657 emit_insn (gen_vec_vfenesv16qi (vpos, vsrc, vsrc, 5658 GEN_INT (VSTRING_FLAG_ZS | VSTRING_FLAG_CS))); 5659 add_int_reg_note (s390_emit_ccraw_jump (8, EQ, done_label), 5660 REG_BR_PROB, very_unlikely); 5661 5662 emit_move_insn (gen_rtx_MEM (V16QImode, 5663 gen_rtx_PLUS (Pmode, dst_addr_reg, offset)), 5664 vsrc); 5665 /* offset += 16 */ 5666 force_expand_binop (Pmode, add_optab, offset, GEN_INT (16), 5667 offset, 1, OPTAB_DIRECT); 5668 5669 emit_jump (loop_label); 5670 emit_barrier (); 5671 5672 /* REGULAR EXIT */ 5673 5674 /* We are done. Add the offset of the zero character to the dst_addr 5675 pointer to get the result. */ 5676 5677 emit_label (done_label); 5678 LABEL_NUSES (done_label) = 1; 5679 5680 force_expand_binop (Pmode, add_optab, dst_addr_reg, offset, dst_addr_reg, 5681 1, OPTAB_DIRECT); 5682 5683 emit_insn (gen_vec_extractv16qi (gpos_qi, vpos, GEN_INT (7))); 5684 emit_move_insn (gpos, gen_rtx_SUBREG (SImode, gpos_qi, 0)); 5685 5686 emit_insn (gen_vstlv16qi (vsrc, gpos, gen_rtx_MEM (BLKmode, dst_addr_reg))); 5687 5688 force_expand_binop (Pmode, add_optab, dst_addr_reg, gpos, result, 5689 1, OPTAB_DIRECT); 5690 5691 /* EARLY EXIT */ 5692 5693 emit_label (exit_label); 5694 LABEL_NUSES (exit_label) = 1; 5695} 5696 5697 5698/* Expand conditional increment or decrement using alc/slb instructions. 5699 Should generate code setting DST to either SRC or SRC + INCREMENT, 5700 depending on the result of the comparison CMP_OP0 CMP_CODE CMP_OP1. 5701 Returns true if successful, false otherwise. 5702 5703 That makes it possible to implement some if-constructs without jumps e.g.: 5704 (borrow = CC0 | CC1 and carry = CC2 | CC3) 5705 unsigned int a, b, c; 5706 if (a < b) c++; -> CCU b > a -> CC2; c += carry; 5707 if (a < b) c--; -> CCL3 a - b -> borrow; c -= borrow; 5708 if (a <= b) c++; -> CCL3 b - a -> borrow; c += carry; 5709 if (a <= b) c--; -> CCU a <= b -> borrow; c -= borrow; 5710 5711 Checks for EQ and NE with a nonzero value need an additional xor e.g.: 5712 if (a == b) c++; -> CCL3 a ^= b; 0 - a -> borrow; c += carry; 5713 if (a == b) c--; -> CCU a ^= b; a <= 0 -> CC0 | CC1; c -= borrow; 5714 if (a != b) c++; -> CCU a ^= b; a > 0 -> CC2; c += carry; 5715 if (a != b) c--; -> CCL3 a ^= b; 0 - a -> borrow; c -= borrow; */ 5716 5717bool 5718s390_expand_addcc (enum rtx_code cmp_code, rtx cmp_op0, rtx cmp_op1, 5719 rtx dst, rtx src, rtx increment) 5720{ 5721 machine_mode cmp_mode; 5722 machine_mode cc_mode; 5723 rtx op_res; 5724 rtx insn; 5725 rtvec p; 5726 int ret; 5727 5728 if ((GET_MODE (cmp_op0) == SImode || GET_MODE (cmp_op0) == VOIDmode) 5729 && (GET_MODE (cmp_op1) == SImode || GET_MODE (cmp_op1) == VOIDmode)) 5730 cmp_mode = SImode; 5731 else if ((GET_MODE (cmp_op0) == DImode || GET_MODE (cmp_op0) == VOIDmode) 5732 && (GET_MODE (cmp_op1) == DImode || GET_MODE (cmp_op1) == VOIDmode)) 5733 cmp_mode = DImode; 5734 else 5735 return false; 5736 5737 /* Try ADD LOGICAL WITH CARRY. */ 5738 if (increment == const1_rtx) 5739 { 5740 /* Determine CC mode to use. */ 5741 if (cmp_code == EQ || cmp_code == NE) 5742 { 5743 if (cmp_op1 != const0_rtx) 5744 { 5745 cmp_op0 = expand_simple_binop (cmp_mode, XOR, cmp_op0, cmp_op1, 5746 NULL_RTX, 0, OPTAB_WIDEN); 5747 cmp_op1 = const0_rtx; 5748 } 5749 5750 cmp_code = cmp_code == EQ ? LEU : GTU; 5751 } 5752 5753 if (cmp_code == LTU || cmp_code == LEU) 5754 { 5755 rtx tem = cmp_op0; 5756 cmp_op0 = cmp_op1; 5757 cmp_op1 = tem; 5758 cmp_code = swap_condition (cmp_code); 5759 } 5760 5761 switch (cmp_code) 5762 { 5763 case GTU: 5764 cc_mode = CCUmode; 5765 break; 5766 5767 case GEU: 5768 cc_mode = CCL3mode; 5769 break; 5770 5771 default: 5772 return false; 5773 } 5774 5775 /* Emit comparison instruction pattern. */ 5776 if (!register_operand (cmp_op0, cmp_mode)) 5777 cmp_op0 = force_reg (cmp_mode, cmp_op0); 5778 5779 insn = gen_rtx_SET (VOIDmode, gen_rtx_REG (cc_mode, CC_REGNUM), 5780 gen_rtx_COMPARE (cc_mode, cmp_op0, cmp_op1)); 5781 /* We use insn_invalid_p here to add clobbers if required. */ 5782 ret = insn_invalid_p (emit_insn (insn), false); 5783 gcc_assert (!ret); 5784 5785 /* Emit ALC instruction pattern. */ 5786 op_res = gen_rtx_fmt_ee (cmp_code, GET_MODE (dst), 5787 gen_rtx_REG (cc_mode, CC_REGNUM), 5788 const0_rtx); 5789 5790 if (src != const0_rtx) 5791 { 5792 if (!register_operand (src, GET_MODE (dst))) 5793 src = force_reg (GET_MODE (dst), src); 5794 5795 op_res = gen_rtx_PLUS (GET_MODE (dst), op_res, src); 5796 op_res = gen_rtx_PLUS (GET_MODE (dst), op_res, const0_rtx); 5797 } 5798 5799 p = rtvec_alloc (2); 5800 RTVEC_ELT (p, 0) = 5801 gen_rtx_SET (VOIDmode, dst, op_res); 5802 RTVEC_ELT (p, 1) = 5803 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM)); 5804 emit_insn (gen_rtx_PARALLEL (VOIDmode, p)); 5805 5806 return true; 5807 } 5808 5809 /* Try SUBTRACT LOGICAL WITH BORROW. */ 5810 if (increment == constm1_rtx) 5811 { 5812 /* Determine CC mode to use. */ 5813 if (cmp_code == EQ || cmp_code == NE) 5814 { 5815 if (cmp_op1 != const0_rtx) 5816 { 5817 cmp_op0 = expand_simple_binop (cmp_mode, XOR, cmp_op0, cmp_op1, 5818 NULL_RTX, 0, OPTAB_WIDEN); 5819 cmp_op1 = const0_rtx; 5820 } 5821 5822 cmp_code = cmp_code == EQ ? LEU : GTU; 5823 } 5824 5825 if (cmp_code == GTU || cmp_code == GEU) 5826 { 5827 rtx tem = cmp_op0; 5828 cmp_op0 = cmp_op1; 5829 cmp_op1 = tem; 5830 cmp_code = swap_condition (cmp_code); 5831 } 5832 5833 switch (cmp_code) 5834 { 5835 case LEU: 5836 cc_mode = CCUmode; 5837 break; 5838 5839 case LTU: 5840 cc_mode = CCL3mode; 5841 break; 5842 5843 default: 5844 return false; 5845 } 5846 5847 /* Emit comparison instruction pattern. */ 5848 if (!register_operand (cmp_op0, cmp_mode)) 5849 cmp_op0 = force_reg (cmp_mode, cmp_op0); 5850 5851 insn = gen_rtx_SET (VOIDmode, gen_rtx_REG (cc_mode, CC_REGNUM), 5852 gen_rtx_COMPARE (cc_mode, cmp_op0, cmp_op1)); 5853 /* We use insn_invalid_p here to add clobbers if required. */ 5854 ret = insn_invalid_p (emit_insn (insn), false); 5855 gcc_assert (!ret); 5856 5857 /* Emit SLB instruction pattern. */ 5858 if (!register_operand (src, GET_MODE (dst))) 5859 src = force_reg (GET_MODE (dst), src); 5860 5861 op_res = gen_rtx_MINUS (GET_MODE (dst), 5862 gen_rtx_MINUS (GET_MODE (dst), src, const0_rtx), 5863 gen_rtx_fmt_ee (cmp_code, GET_MODE (dst), 5864 gen_rtx_REG (cc_mode, CC_REGNUM), 5865 const0_rtx)); 5866 p = rtvec_alloc (2); 5867 RTVEC_ELT (p, 0) = 5868 gen_rtx_SET (VOIDmode, dst, op_res); 5869 RTVEC_ELT (p, 1) = 5870 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM)); 5871 emit_insn (gen_rtx_PARALLEL (VOIDmode, p)); 5872 5873 return true; 5874 } 5875 5876 return false; 5877} 5878 5879/* Expand code for the insv template. Return true if successful. */ 5880 5881bool 5882s390_expand_insv (rtx dest, rtx op1, rtx op2, rtx src) 5883{ 5884 int bitsize = INTVAL (op1); 5885 int bitpos = INTVAL (op2); 5886 machine_mode mode = GET_MODE (dest); 5887 machine_mode smode; 5888 int smode_bsize, mode_bsize; 5889 rtx op, clobber; 5890 5891 if (bitsize + bitpos > GET_MODE_BITSIZE (mode)) 5892 return false; 5893 5894 /* Generate INSERT IMMEDIATE (IILL et al). */ 5895 /* (set (ze (reg)) (const_int)). */ 5896 if (TARGET_ZARCH 5897 && register_operand (dest, word_mode) 5898 && (bitpos % 16) == 0 5899 && (bitsize % 16) == 0 5900 && const_int_operand (src, VOIDmode)) 5901 { 5902 HOST_WIDE_INT val = INTVAL (src); 5903 int regpos = bitpos + bitsize; 5904 5905 while (regpos > bitpos) 5906 { 5907 machine_mode putmode; 5908 int putsize; 5909 5910 if (TARGET_EXTIMM && (regpos % 32 == 0) && (regpos >= bitpos + 32)) 5911 putmode = SImode; 5912 else 5913 putmode = HImode; 5914 5915 putsize = GET_MODE_BITSIZE (putmode); 5916 regpos -= putsize; 5917 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest, 5918 GEN_INT (putsize), 5919 GEN_INT (regpos)), 5920 gen_int_mode (val, putmode)); 5921 val >>= putsize; 5922 } 5923 gcc_assert (regpos == bitpos); 5924 return true; 5925 } 5926 5927 smode = smallest_mode_for_size (bitsize, MODE_INT); 5928 smode_bsize = GET_MODE_BITSIZE (smode); 5929 mode_bsize = GET_MODE_BITSIZE (mode); 5930 5931 /* Generate STORE CHARACTERS UNDER MASK (STCM et al). */ 5932 if (bitpos == 0 5933 && (bitsize % BITS_PER_UNIT) == 0 5934 && MEM_P (dest) 5935 && (register_operand (src, word_mode) 5936 || const_int_operand (src, VOIDmode))) 5937 { 5938 /* Emit standard pattern if possible. */ 5939 if (smode_bsize == bitsize) 5940 { 5941 emit_move_insn (adjust_address (dest, smode, 0), 5942 gen_lowpart (smode, src)); 5943 return true; 5944 } 5945 5946 /* (set (ze (mem)) (const_int)). */ 5947 else if (const_int_operand (src, VOIDmode)) 5948 { 5949 int size = bitsize / BITS_PER_UNIT; 5950 rtx src_mem = adjust_address (force_const_mem (word_mode, src), 5951 BLKmode, 5952 UNITS_PER_WORD - size); 5953 5954 dest = adjust_address (dest, BLKmode, 0); 5955 set_mem_size (dest, size); 5956 s390_expand_movmem (dest, src_mem, GEN_INT (size)); 5957 return true; 5958 } 5959 5960 /* (set (ze (mem)) (reg)). */ 5961 else if (register_operand (src, word_mode)) 5962 { 5963 if (bitsize <= 32) 5964 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest, op1, 5965 const0_rtx), src); 5966 else 5967 { 5968 /* Emit st,stcmh sequence. */ 5969 int stcmh_width = bitsize - 32; 5970 int size = stcmh_width / BITS_PER_UNIT; 5971 5972 emit_move_insn (adjust_address (dest, SImode, size), 5973 gen_lowpart (SImode, src)); 5974 set_mem_size (dest, size); 5975 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest, 5976 GEN_INT (stcmh_width), 5977 const0_rtx), 5978 gen_rtx_LSHIFTRT (word_mode, src, GEN_INT (32))); 5979 } 5980 return true; 5981 } 5982 } 5983 5984 /* Generate INSERT CHARACTERS UNDER MASK (IC, ICM et al). */ 5985 if ((bitpos % BITS_PER_UNIT) == 0 5986 && (bitsize % BITS_PER_UNIT) == 0 5987 && (bitpos & 32) == ((bitpos + bitsize - 1) & 32) 5988 && MEM_P (src) 5989 && (mode == DImode || mode == SImode) 5990 && register_operand (dest, mode)) 5991 { 5992 /* Emit a strict_low_part pattern if possible. */ 5993 if (smode_bsize == bitsize && bitpos == mode_bsize - smode_bsize) 5994 { 5995 op = gen_rtx_STRICT_LOW_PART (VOIDmode, gen_lowpart (smode, dest)); 5996 op = gen_rtx_SET (VOIDmode, op, gen_lowpart (smode, src)); 5997 clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM)); 5998 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clobber))); 5999 return true; 6000 } 6001 6002 /* ??? There are more powerful versions of ICM that are not 6003 completely represented in the md file. */ 6004 } 6005 6006 /* For z10, generate ROTATE THEN INSERT SELECTED BITS (RISBG et al). */ 6007 if (TARGET_Z10 && (mode == DImode || mode == SImode)) 6008 { 6009 machine_mode mode_s = GET_MODE (src); 6010 6011 if (mode_s == VOIDmode) 6012 { 6013 /* Assume const_int etc already in the proper mode. */ 6014 src = force_reg (mode, src); 6015 } 6016 else if (mode_s != mode) 6017 { 6018 gcc_assert (GET_MODE_BITSIZE (mode_s) >= bitsize); 6019 src = force_reg (mode_s, src); 6020 src = gen_lowpart (mode, src); 6021 } 6022 6023 op = gen_rtx_ZERO_EXTRACT (mode, dest, op1, op2), 6024 op = gen_rtx_SET (VOIDmode, op, src); 6025 6026 if (!TARGET_ZEC12) 6027 { 6028 clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM)); 6029 op = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clobber)); 6030 } 6031 emit_insn (op); 6032 6033 return true; 6034 } 6035 6036 return false; 6037} 6038 6039/* A subroutine of s390_expand_cs_hqi and s390_expand_atomic which returns a 6040 register that holds VAL of mode MODE shifted by COUNT bits. */ 6041 6042static inline rtx 6043s390_expand_mask_and_shift (rtx val, machine_mode mode, rtx count) 6044{ 6045 val = expand_simple_binop (SImode, AND, val, GEN_INT (GET_MODE_MASK (mode)), 6046 NULL_RTX, 1, OPTAB_DIRECT); 6047 return expand_simple_binop (SImode, ASHIFT, val, count, 6048 NULL_RTX, 1, OPTAB_DIRECT); 6049} 6050 6051/* Generate a vector comparison COND of CMP_OP1 and CMP_OP2 and store 6052 the result in TARGET. */ 6053 6054void 6055s390_expand_vec_compare (rtx target, enum rtx_code cond, 6056 rtx cmp_op1, rtx cmp_op2) 6057{ 6058 machine_mode mode = GET_MODE (target); 6059 bool neg_p = false, swap_p = false; 6060 rtx tmp; 6061 6062 if (GET_MODE (cmp_op1) == V2DFmode) 6063 { 6064 switch (cond) 6065 { 6066 /* NE a != b -> !(a == b) */ 6067 case NE: cond = EQ; neg_p = true; break; 6068 /* UNGT a u> b -> !(b >= a) */ 6069 case UNGT: cond = GE; neg_p = true; swap_p = true; break; 6070 /* UNGE a u>= b -> !(b > a) */ 6071 case UNGE: cond = GT; neg_p = true; swap_p = true; break; 6072 /* LE: a <= b -> b >= a */ 6073 case LE: cond = GE; swap_p = true; break; 6074 /* UNLE: a u<= b -> !(a > b) */ 6075 case UNLE: cond = GT; neg_p = true; break; 6076 /* LT: a < b -> b > a */ 6077 case LT: cond = GT; swap_p = true; break; 6078 /* UNLT: a u< b -> !(a >= b) */ 6079 case UNLT: cond = GE; neg_p = true; break; 6080 case UNEQ: 6081 emit_insn (gen_vec_cmpuneqv2df (target, cmp_op1, cmp_op2)); 6082 return; 6083 case LTGT: 6084 emit_insn (gen_vec_cmpltgtv2df (target, cmp_op1, cmp_op2)); 6085 return; 6086 case ORDERED: 6087 emit_insn (gen_vec_orderedv2df (target, cmp_op1, cmp_op2)); 6088 return; 6089 case UNORDERED: 6090 emit_insn (gen_vec_unorderedv2df (target, cmp_op1, cmp_op2)); 6091 return; 6092 default: break; 6093 } 6094 } 6095 else 6096 { 6097 switch (cond) 6098 { 6099 /* NE: a != b -> !(a == b) */ 6100 case NE: cond = EQ; neg_p = true; break; 6101 /* GE: a >= b -> !(b > a) */ 6102 case GE: cond = GT; neg_p = true; swap_p = true; break; 6103 /* GEU: a >= b -> !(b > a) */ 6104 case GEU: cond = GTU; neg_p = true; swap_p = true; break; 6105 /* LE: a <= b -> !(a > b) */ 6106 case LE: cond = GT; neg_p = true; break; 6107 /* LEU: a <= b -> !(a > b) */ 6108 case LEU: cond = GTU; neg_p = true; break; 6109 /* LT: a < b -> b > a */ 6110 case LT: cond = GT; swap_p = true; break; 6111 /* LTU: a < b -> b > a */ 6112 case LTU: cond = GTU; swap_p = true; break; 6113 default: break; 6114 } 6115 } 6116 6117 if (swap_p) 6118 { 6119 tmp = cmp_op1; cmp_op1 = cmp_op2; cmp_op2 = tmp; 6120 } 6121 6122 emit_insn (gen_rtx_SET (VOIDmode, target, gen_rtx_fmt_ee (cond, 6123 mode, 6124 cmp_op1, cmp_op2))); 6125 if (neg_p) 6126 emit_insn (gen_rtx_SET (VOIDmode, target, gen_rtx_NOT (mode, target))); 6127} 6128 6129/* Expand the comparison CODE of CMP1 and CMP2 and copy 1 or 0 into 6130 TARGET if either all (ALL_P is true) or any (ALL_P is false) of the 6131 elements in CMP1 and CMP2 fulfill the comparison. */ 6132void 6133s390_expand_vec_compare_cc (rtx target, enum rtx_code code, 6134 rtx cmp1, rtx cmp2, bool all_p) 6135{ 6136 enum rtx_code new_code = code; 6137 machine_mode cmp_mode, full_cmp_mode, scratch_mode; 6138 rtx tmp_reg = gen_reg_rtx (SImode); 6139 bool swap_p = false; 6140 6141 if (GET_MODE_CLASS (GET_MODE (cmp1)) == MODE_VECTOR_INT) 6142 { 6143 switch (code) 6144 { 6145 case EQ: cmp_mode = CCVEQmode; break; 6146 case NE: cmp_mode = CCVEQmode; break; 6147 case GT: cmp_mode = CCVHmode; break; 6148 case GE: cmp_mode = CCVHmode; new_code = LE; swap_p = true; break; 6149 case LT: cmp_mode = CCVHmode; new_code = GT; swap_p = true; break; 6150 case LE: cmp_mode = CCVHmode; new_code = LE; break; 6151 case GTU: cmp_mode = CCVHUmode; break; 6152 case GEU: cmp_mode = CCVHUmode; new_code = LEU; swap_p = true; break; 6153 case LTU: cmp_mode = CCVHUmode; new_code = GTU; swap_p = true; break; 6154 case LEU: cmp_mode = CCVHUmode; new_code = LEU; break; 6155 default: gcc_unreachable (); 6156 } 6157 scratch_mode = GET_MODE (cmp1); 6158 } 6159 else if (GET_MODE (cmp1) == V2DFmode) 6160 { 6161 switch (code) 6162 { 6163 case EQ: cmp_mode = CCVEQmode; break; 6164 case NE: cmp_mode = CCVEQmode; break; 6165 case GT: cmp_mode = CCVFHmode; break; 6166 case GE: cmp_mode = CCVFHEmode; break; 6167 case UNLE: cmp_mode = CCVFHmode; break; 6168 case UNLT: cmp_mode = CCVFHEmode; break; 6169 case LT: cmp_mode = CCVFHmode; new_code = GT; swap_p = true; break; 6170 case LE: cmp_mode = CCVFHEmode; new_code = GE; swap_p = true; break; 6171 default: gcc_unreachable (); 6172 } 6173 scratch_mode = V2DImode; 6174 } 6175 else 6176 gcc_unreachable (); 6177 6178 if (!all_p) 6179 switch (cmp_mode) 6180 { 6181 case CCVEQmode: full_cmp_mode = CCVEQANYmode; break; 6182 case CCVHmode: full_cmp_mode = CCVHANYmode; break; 6183 case CCVHUmode: full_cmp_mode = CCVHUANYmode; break; 6184 case CCVFHmode: full_cmp_mode = CCVFHANYmode; break; 6185 case CCVFHEmode: full_cmp_mode = CCVFHEANYmode; break; 6186 default: gcc_unreachable (); 6187 } 6188 else 6189 /* The modes without ANY match the ALL modes. */ 6190 full_cmp_mode = cmp_mode; 6191 6192 if (swap_p) 6193 { 6194 rtx tmp = cmp2; 6195 cmp2 = cmp1; 6196 cmp1 = tmp; 6197 } 6198 6199 emit_insn (gen_rtx_PARALLEL (VOIDmode, 6200 gen_rtvec (2, gen_rtx_SET (VOIDmode, 6201 gen_rtx_REG (cmp_mode, CC_REGNUM), 6202 gen_rtx_COMPARE (cmp_mode, cmp1, cmp2)), 6203 gen_rtx_CLOBBER (VOIDmode, 6204 gen_rtx_SCRATCH (scratch_mode))))); 6205 emit_move_insn (target, const0_rtx); 6206 emit_move_insn (tmp_reg, const1_rtx); 6207 6208 emit_move_insn (target, 6209 gen_rtx_IF_THEN_ELSE (SImode, 6210 gen_rtx_fmt_ee (new_code, VOIDmode, 6211 gen_rtx_REG (full_cmp_mode, CC_REGNUM), 6212 const0_rtx), 6213 target, tmp_reg)); 6214} 6215 6216/* Generate a vector comparison expression loading either elements of 6217 THEN or ELS into TARGET depending on the comparison COND of CMP_OP1 6218 and CMP_OP2. */ 6219 6220void 6221s390_expand_vcond (rtx target, rtx then, rtx els, 6222 enum rtx_code cond, rtx cmp_op1, rtx cmp_op2) 6223{ 6224 rtx tmp; 6225 machine_mode result_mode; 6226 rtx result_target; 6227 6228 /* We always use an integral type vector to hold the comparison 6229 result. */ 6230 result_mode = GET_MODE (cmp_op1) == V2DFmode ? V2DImode : GET_MODE (cmp_op1); 6231 result_target = gen_reg_rtx (result_mode); 6232 6233 /* Alternatively this could be done by reload by lowering the cmp* 6234 predicates. But it appears to be better for scheduling etc. to 6235 have that in early. */ 6236 if (!REG_P (cmp_op1)) 6237 cmp_op1 = force_reg (GET_MODE (cmp_op1), cmp_op1); 6238 6239 if (!REG_P (cmp_op2)) 6240 cmp_op2 = force_reg (GET_MODE (cmp_op2), cmp_op2); 6241 6242 s390_expand_vec_compare (result_target, cond, 6243 cmp_op1, cmp_op2); 6244 6245 /* If the results are supposed to be either -1 or 0 we are done 6246 since this is what our compare instructions generate anyway. */ 6247 if (all_ones_operand (then, GET_MODE (then)) 6248 && const0_operand (els, GET_MODE (els))) 6249 { 6250 emit_move_insn (target, gen_rtx_SUBREG (GET_MODE (target), 6251 result_target, 0)); 6252 return; 6253 } 6254 6255 /* Otherwise we will do a vsel afterwards. */ 6256 /* This gets triggered e.g. 6257 with gcc.c-torture/compile/pr53410-1.c */ 6258 if (!REG_P (then)) 6259 then = force_reg (GET_MODE (target), then); 6260 6261 if (!REG_P (els)) 6262 els = force_reg (GET_MODE (target), els); 6263 6264 tmp = gen_rtx_fmt_ee (EQ, VOIDmode, 6265 result_target, 6266 CONST0_RTX (result_mode)); 6267 6268 /* We compared the result against zero above so we have to swap then 6269 and els here. */ 6270 tmp = gen_rtx_IF_THEN_ELSE (GET_MODE (target), tmp, els, then); 6271 6272 gcc_assert (GET_MODE (target) == GET_MODE (then)); 6273 emit_insn (gen_rtx_SET (VOIDmode, target, tmp)); 6274} 6275 6276/* Emit the RTX necessary to initialize the vector TARGET with values 6277 in VALS. */ 6278void 6279s390_expand_vec_init (rtx target, rtx vals) 6280{ 6281 machine_mode mode = GET_MODE (target); 6282 machine_mode inner_mode = GET_MODE_INNER (mode); 6283 int n_elts = GET_MODE_NUNITS (mode); 6284 bool all_same = true, all_regs = true, all_const_int = true; 6285 rtx x; 6286 int i; 6287 6288 for (i = 0; i < n_elts; ++i) 6289 { 6290 x = XVECEXP (vals, 0, i); 6291 6292 if (!CONST_INT_P (x)) 6293 all_const_int = false; 6294 6295 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0))) 6296 all_same = false; 6297 6298 if (!REG_P (x)) 6299 all_regs = false; 6300 } 6301 6302 /* Use vector gen mask or vector gen byte mask if possible. */ 6303 if (all_same && all_const_int 6304 && (XVECEXP (vals, 0, 0) == const0_rtx 6305 || s390_contiguous_bitmask_vector_p (XVECEXP (vals, 0, 0), 6306 NULL, NULL) 6307 || s390_bytemask_vector_p (XVECEXP (vals, 0, 0), NULL))) 6308 { 6309 emit_insn (gen_rtx_SET (VOIDmode, target, 6310 gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)))); 6311 return; 6312 } 6313 6314 if (all_same) 6315 { 6316 emit_insn (gen_rtx_SET (VOIDmode, target, 6317 gen_rtx_VEC_DUPLICATE (mode, 6318 XVECEXP (vals, 0, 0)))); 6319 return; 6320 } 6321 6322 if (all_regs && REG_P (target) && n_elts == 2 && inner_mode == DImode) 6323 { 6324 /* Use vector load pair. */ 6325 emit_insn (gen_rtx_SET (VOIDmode, target, 6326 gen_rtx_VEC_CONCAT (mode, 6327 XVECEXP (vals, 0, 0), 6328 XVECEXP (vals, 0, 1)))); 6329 return; 6330 } 6331 6332 /* We are about to set the vector elements one by one. Zero out the 6333 full register first in order to help the data flow framework to 6334 detect it as full VR set. */ 6335 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode))); 6336 6337 /* Unfortunately the vec_init expander is not allowed to fail. So 6338 we have to implement the fallback ourselves. */ 6339 for (i = 0; i < n_elts; i++) 6340 emit_insn (gen_rtx_SET (VOIDmode, target, 6341 gen_rtx_UNSPEC (mode, 6342 gen_rtvec (3, XVECEXP (vals, 0, i), 6343 GEN_INT (i), target), 6344 UNSPEC_VEC_SET))); 6345} 6346 6347/* Structure to hold the initial parameters for a compare_and_swap operation 6348 in HImode and QImode. */ 6349 6350struct alignment_context 6351{ 6352 rtx memsi; /* SI aligned memory location. */ 6353 rtx shift; /* Bit offset with regard to lsb. */ 6354 rtx modemask; /* Mask of the HQImode shifted by SHIFT bits. */ 6355 rtx modemaski; /* ~modemask */ 6356 bool aligned; /* True if memory is aligned, false else. */ 6357}; 6358 6359/* A subroutine of s390_expand_cs_hqi and s390_expand_atomic to initialize 6360 structure AC for transparent simplifying, if the memory alignment is known 6361 to be at least 32bit. MEM is the memory location for the actual operation 6362 and MODE its mode. */ 6363 6364static void 6365init_alignment_context (struct alignment_context *ac, rtx mem, 6366 machine_mode mode) 6367{ 6368 ac->shift = GEN_INT (GET_MODE_SIZE (SImode) - GET_MODE_SIZE (mode)); 6369 ac->aligned = (MEM_ALIGN (mem) >= GET_MODE_BITSIZE (SImode)); 6370 6371 if (ac->aligned) 6372 ac->memsi = adjust_address (mem, SImode, 0); /* Memory is aligned. */ 6373 else 6374 { 6375 /* Alignment is unknown. */ 6376 rtx byteoffset, addr, align; 6377 6378 /* Force the address into a register. */ 6379 addr = force_reg (Pmode, XEXP (mem, 0)); 6380 6381 /* Align it to SImode. */ 6382 align = expand_simple_binop (Pmode, AND, addr, 6383 GEN_INT (-GET_MODE_SIZE (SImode)), 6384 NULL_RTX, 1, OPTAB_DIRECT); 6385 /* Generate MEM. */ 6386 ac->memsi = gen_rtx_MEM (SImode, align); 6387 MEM_VOLATILE_P (ac->memsi) = MEM_VOLATILE_P (mem); 6388 set_mem_alias_set (ac->memsi, ALIAS_SET_MEMORY_BARRIER); 6389 set_mem_align (ac->memsi, GET_MODE_BITSIZE (SImode)); 6390 6391 /* Calculate shiftcount. */ 6392 byteoffset = expand_simple_binop (Pmode, AND, addr, 6393 GEN_INT (GET_MODE_SIZE (SImode) - 1), 6394 NULL_RTX, 1, OPTAB_DIRECT); 6395 /* As we already have some offset, evaluate the remaining distance. */ 6396 ac->shift = expand_simple_binop (SImode, MINUS, ac->shift, byteoffset, 6397 NULL_RTX, 1, OPTAB_DIRECT); 6398 } 6399 6400 /* Shift is the byte count, but we need the bitcount. */ 6401 ac->shift = expand_simple_binop (SImode, ASHIFT, ac->shift, GEN_INT (3), 6402 NULL_RTX, 1, OPTAB_DIRECT); 6403 6404 /* Calculate masks. */ 6405 ac->modemask = expand_simple_binop (SImode, ASHIFT, 6406 GEN_INT (GET_MODE_MASK (mode)), 6407 ac->shift, NULL_RTX, 1, OPTAB_DIRECT); 6408 ac->modemaski = expand_simple_unop (SImode, NOT, ac->modemask, 6409 NULL_RTX, 1); 6410} 6411 6412/* A subroutine of s390_expand_cs_hqi. Insert INS into VAL. If possible, 6413 use a single insv insn into SEQ2. Otherwise, put prep insns in SEQ1 and 6414 perform the merge in SEQ2. */ 6415 6416static rtx 6417s390_two_part_insv (struct alignment_context *ac, rtx *seq1, rtx *seq2, 6418 machine_mode mode, rtx val, rtx ins) 6419{ 6420 rtx tmp; 6421 6422 if (ac->aligned) 6423 { 6424 start_sequence (); 6425 tmp = copy_to_mode_reg (SImode, val); 6426 if (s390_expand_insv (tmp, GEN_INT (GET_MODE_BITSIZE (mode)), 6427 const0_rtx, ins)) 6428 { 6429 *seq1 = NULL; 6430 *seq2 = get_insns (); 6431 end_sequence (); 6432 return tmp; 6433 } 6434 end_sequence (); 6435 } 6436 6437 /* Failed to use insv. Generate a two part shift and mask. */ 6438 start_sequence (); 6439 tmp = s390_expand_mask_and_shift (ins, mode, ac->shift); 6440 *seq1 = get_insns (); 6441 end_sequence (); 6442 6443 start_sequence (); 6444 tmp = expand_simple_binop (SImode, IOR, tmp, val, NULL_RTX, 1, OPTAB_DIRECT); 6445 *seq2 = get_insns (); 6446 end_sequence (); 6447 6448 return tmp; 6449} 6450 6451/* Expand an atomic compare and swap operation for HImode and QImode. MEM is 6452 the memory location, CMP the old value to compare MEM with and NEW_RTX the 6453 value to set if CMP == MEM. */ 6454 6455void 6456s390_expand_cs_hqi (machine_mode mode, rtx btarget, rtx vtarget, rtx mem, 6457 rtx cmp, rtx new_rtx, bool is_weak) 6458{ 6459 struct alignment_context ac; 6460 rtx cmpv, newv, val, cc, seq0, seq1, seq2, seq3; 6461 rtx res = gen_reg_rtx (SImode); 6462 rtx_code_label *csloop = NULL, *csend = NULL; 6463 6464 gcc_assert (MEM_P (mem)); 6465 6466 init_alignment_context (&ac, mem, mode); 6467 6468 /* Load full word. Subsequent loads are performed by CS. */ 6469 val = expand_simple_binop (SImode, AND, ac.memsi, ac.modemaski, 6470 NULL_RTX, 1, OPTAB_DIRECT); 6471 6472 /* Prepare insertions of cmp and new_rtx into the loaded value. When 6473 possible, we try to use insv to make this happen efficiently. If 6474 that fails we'll generate code both inside and outside the loop. */ 6475 cmpv = s390_two_part_insv (&ac, &seq0, &seq2, mode, val, cmp); 6476 newv = s390_two_part_insv (&ac, &seq1, &seq3, mode, val, new_rtx); 6477 6478 if (seq0) 6479 emit_insn (seq0); 6480 if (seq1) 6481 emit_insn (seq1); 6482 6483 /* Start CS loop. */ 6484 if (!is_weak) 6485 { 6486 /* Begin assuming success. */ 6487 emit_move_insn (btarget, const1_rtx); 6488 6489 csloop = gen_label_rtx (); 6490 csend = gen_label_rtx (); 6491 emit_label (csloop); 6492 } 6493 6494 /* val = "<mem>00..0<mem>" 6495 * cmp = "00..0<cmp>00..0" 6496 * new = "00..0<new>00..0" 6497 */ 6498 6499 emit_insn (seq2); 6500 emit_insn (seq3); 6501 6502 cc = s390_emit_compare_and_swap (EQ, res, ac.memsi, cmpv, newv); 6503 if (is_weak) 6504 emit_insn (gen_cstorecc4 (btarget, cc, XEXP (cc, 0), XEXP (cc, 1))); 6505 else 6506 { 6507 rtx tmp; 6508 6509 /* Jump to end if we're done (likely?). */ 6510 s390_emit_jump (csend, cc); 6511 6512 /* Check for changes outside mode, and loop internal if so. 6513 Arrange the moves so that the compare is adjacent to the 6514 branch so that we can generate CRJ. */ 6515 tmp = copy_to_reg (val); 6516 force_expand_binop (SImode, and_optab, res, ac.modemaski, val, 6517 1, OPTAB_DIRECT); 6518 cc = s390_emit_compare (NE, val, tmp); 6519 s390_emit_jump (csloop, cc); 6520 6521 /* Failed. */ 6522 emit_move_insn (btarget, const0_rtx); 6523 emit_label (csend); 6524 } 6525 6526 /* Return the correct part of the bitfield. */ 6527 convert_move (vtarget, expand_simple_binop (SImode, LSHIFTRT, res, ac.shift, 6528 NULL_RTX, 1, OPTAB_DIRECT), 1); 6529} 6530 6531/* Expand an atomic operation CODE of mode MODE. MEM is the memory location 6532 and VAL the value to play with. If AFTER is true then store the value 6533 MEM holds after the operation, if AFTER is false then store the value MEM 6534 holds before the operation. If TARGET is zero then discard that value, else 6535 store it to TARGET. */ 6536 6537void 6538s390_expand_atomic (machine_mode mode, enum rtx_code code, 6539 rtx target, rtx mem, rtx val, bool after) 6540{ 6541 struct alignment_context ac; 6542 rtx cmp; 6543 rtx new_rtx = gen_reg_rtx (SImode); 6544 rtx orig = gen_reg_rtx (SImode); 6545 rtx_code_label *csloop = gen_label_rtx (); 6546 6547 gcc_assert (!target || register_operand (target, VOIDmode)); 6548 gcc_assert (MEM_P (mem)); 6549 6550 init_alignment_context (&ac, mem, mode); 6551 6552 /* Shift val to the correct bit positions. 6553 Preserve "icm", but prevent "ex icm". */ 6554 if (!(ac.aligned && code == SET && MEM_P (val))) 6555 val = s390_expand_mask_and_shift (val, mode, ac.shift); 6556 6557 /* Further preparation insns. */ 6558 if (code == PLUS || code == MINUS) 6559 emit_move_insn (orig, val); 6560 else if (code == MULT || code == AND) /* val = "11..1<val>11..1" */ 6561 val = expand_simple_binop (SImode, XOR, val, ac.modemaski, 6562 NULL_RTX, 1, OPTAB_DIRECT); 6563 6564 /* Load full word. Subsequent loads are performed by CS. */ 6565 cmp = force_reg (SImode, ac.memsi); 6566 6567 /* Start CS loop. */ 6568 emit_label (csloop); 6569 emit_move_insn (new_rtx, cmp); 6570 6571 /* Patch new with val at correct position. */ 6572 switch (code) 6573 { 6574 case PLUS: 6575 case MINUS: 6576 val = expand_simple_binop (SImode, code, new_rtx, orig, 6577 NULL_RTX, 1, OPTAB_DIRECT); 6578 val = expand_simple_binop (SImode, AND, val, ac.modemask, 6579 NULL_RTX, 1, OPTAB_DIRECT); 6580 /* FALLTHRU */ 6581 case SET: 6582 if (ac.aligned && MEM_P (val)) 6583 store_bit_field (new_rtx, GET_MODE_BITSIZE (mode), 0, 6584 0, 0, SImode, val); 6585 else 6586 { 6587 new_rtx = expand_simple_binop (SImode, AND, new_rtx, ac.modemaski, 6588 NULL_RTX, 1, OPTAB_DIRECT); 6589 new_rtx = expand_simple_binop (SImode, IOR, new_rtx, val, 6590 NULL_RTX, 1, OPTAB_DIRECT); 6591 } 6592 break; 6593 case AND: 6594 case IOR: 6595 case XOR: 6596 new_rtx = expand_simple_binop (SImode, code, new_rtx, val, 6597 NULL_RTX, 1, OPTAB_DIRECT); 6598 break; 6599 case MULT: /* NAND */ 6600 new_rtx = expand_simple_binop (SImode, AND, new_rtx, val, 6601 NULL_RTX, 1, OPTAB_DIRECT); 6602 new_rtx = expand_simple_binop (SImode, XOR, new_rtx, ac.modemask, 6603 NULL_RTX, 1, OPTAB_DIRECT); 6604 break; 6605 default: 6606 gcc_unreachable (); 6607 } 6608 6609 s390_emit_jump (csloop, s390_emit_compare_and_swap (NE, cmp, 6610 ac.memsi, cmp, new_rtx)); 6611 6612 /* Return the correct part of the bitfield. */ 6613 if (target) 6614 convert_move (target, expand_simple_binop (SImode, LSHIFTRT, 6615 after ? new_rtx : cmp, ac.shift, 6616 NULL_RTX, 1, OPTAB_DIRECT), 1); 6617} 6618 6619/* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL. 6620 We need to emit DTP-relative relocations. */ 6621 6622static void s390_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED; 6623 6624static void 6625s390_output_dwarf_dtprel (FILE *file, int size, rtx x) 6626{ 6627 switch (size) 6628 { 6629 case 4: 6630 fputs ("\t.long\t", file); 6631 break; 6632 case 8: 6633 fputs ("\t.quad\t", file); 6634 break; 6635 default: 6636 gcc_unreachable (); 6637 } 6638 output_addr_const (file, x); 6639 fputs ("@DTPOFF", file); 6640} 6641 6642/* Return the proper mode for REGNO being represented in the dwarf 6643 unwind table. */ 6644machine_mode 6645s390_dwarf_frame_reg_mode (int regno) 6646{ 6647 machine_mode save_mode = default_dwarf_frame_reg_mode (regno); 6648 6649 /* The rightmost 64 bits of vector registers are call-clobbered. */ 6650 if (GET_MODE_SIZE (save_mode) > 8) 6651 save_mode = DImode; 6652 6653 return save_mode; 6654} 6655 6656#ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING 6657/* Implement TARGET_MANGLE_TYPE. */ 6658 6659static const char * 6660s390_mangle_type (const_tree type) 6661{ 6662 type = TYPE_MAIN_VARIANT (type); 6663 6664 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE 6665 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE) 6666 return NULL; 6667 6668 if (type == s390_builtin_types[BT_BV16QI]) return "U6__boolc"; 6669 if (type == s390_builtin_types[BT_BV8HI]) return "U6__bools"; 6670 if (type == s390_builtin_types[BT_BV4SI]) return "U6__booli"; 6671 if (type == s390_builtin_types[BT_BV2DI]) return "U6__booll"; 6672 6673 if (TYPE_MAIN_VARIANT (type) == long_double_type_node 6674 && TARGET_LONG_DOUBLE_128) 6675 return "g"; 6676 6677 /* For all other types, use normal C++ mangling. */ 6678 return NULL; 6679} 6680#endif 6681 6682/* In the name of slightly smaller debug output, and to cater to 6683 general assembler lossage, recognize various UNSPEC sequences 6684 and turn them back into a direct symbol reference. */ 6685 6686static rtx 6687s390_delegitimize_address (rtx orig_x) 6688{ 6689 rtx x, y; 6690 6691 orig_x = delegitimize_mem_from_attrs (orig_x); 6692 x = orig_x; 6693 6694 /* Extract the symbol ref from: 6695 (plus:SI (reg:SI 12 %r12) 6696 (const:SI (unspec:SI [(symbol_ref/f:SI ("*.LC0"))] 6697 UNSPEC_GOTOFF/PLTOFF))) 6698 and 6699 (plus:SI (reg:SI 12 %r12) 6700 (const:SI (plus:SI (unspec:SI [(symbol_ref:SI ("L"))] 6701 UNSPEC_GOTOFF/PLTOFF) 6702 (const_int 4 [0x4])))) */ 6703 if (GET_CODE (x) == PLUS 6704 && REG_P (XEXP (x, 0)) 6705 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM 6706 && GET_CODE (XEXP (x, 1)) == CONST) 6707 { 6708 HOST_WIDE_INT offset = 0; 6709 6710 /* The const operand. */ 6711 y = XEXP (XEXP (x, 1), 0); 6712 6713 if (GET_CODE (y) == PLUS 6714 && GET_CODE (XEXP (y, 1)) == CONST_INT) 6715 { 6716 offset = INTVAL (XEXP (y, 1)); 6717 y = XEXP (y, 0); 6718 } 6719 6720 if (GET_CODE (y) == UNSPEC 6721 && (XINT (y, 1) == UNSPEC_GOTOFF 6722 || XINT (y, 1) == UNSPEC_PLTOFF)) 6723 return plus_constant (Pmode, XVECEXP (y, 0, 0), offset); 6724 } 6725 6726 if (GET_CODE (x) != MEM) 6727 return orig_x; 6728 6729 x = XEXP (x, 0); 6730 if (GET_CODE (x) == PLUS 6731 && GET_CODE (XEXP (x, 1)) == CONST 6732 && GET_CODE (XEXP (x, 0)) == REG 6733 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM) 6734 { 6735 y = XEXP (XEXP (x, 1), 0); 6736 if (GET_CODE (y) == UNSPEC 6737 && XINT (y, 1) == UNSPEC_GOT) 6738 y = XVECEXP (y, 0, 0); 6739 else 6740 return orig_x; 6741 } 6742 else if (GET_CODE (x) == CONST) 6743 { 6744 /* Extract the symbol ref from: 6745 (mem:QI (const:DI (unspec:DI [(symbol_ref:DI ("foo"))] 6746 UNSPEC_PLT/GOTENT))) */ 6747 6748 y = XEXP (x, 0); 6749 if (GET_CODE (y) == UNSPEC 6750 && (XINT (y, 1) == UNSPEC_GOTENT 6751 || XINT (y, 1) == UNSPEC_PLT)) 6752 y = XVECEXP (y, 0, 0); 6753 else 6754 return orig_x; 6755 } 6756 else 6757 return orig_x; 6758 6759 if (GET_MODE (orig_x) != Pmode) 6760 { 6761 if (GET_MODE (orig_x) == BLKmode) 6762 return orig_x; 6763 y = lowpart_subreg (GET_MODE (orig_x), y, Pmode); 6764 if (y == NULL_RTX) 6765 return orig_x; 6766 } 6767 return y; 6768} 6769 6770/* Output operand OP to stdio stream FILE. 6771 OP is an address (register + offset) which is not used to address data; 6772 instead the rightmost bits are interpreted as the value. */ 6773 6774static void 6775print_shift_count_operand (FILE *file, rtx op) 6776{ 6777 HOST_WIDE_INT offset; 6778 rtx base; 6779 6780 /* Extract base register and offset. */ 6781 if (!s390_decompose_shift_count (op, &base, &offset)) 6782 gcc_unreachable (); 6783 6784 /* Sanity check. */ 6785 if (base) 6786 { 6787 gcc_assert (GET_CODE (base) == REG); 6788 gcc_assert (REGNO (base) < FIRST_PSEUDO_REGISTER); 6789 gcc_assert (REGNO_REG_CLASS (REGNO (base)) == ADDR_REGS); 6790 } 6791 6792 /* Offsets are constricted to twelve bits. */ 6793 fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset & ((1 << 12) - 1)); 6794 if (base) 6795 fprintf (file, "(%s)", reg_names[REGNO (base)]); 6796} 6797 6798/* Assigns the number of NOP halfwords to be emitted before and after the 6799 function label to *HW_BEFORE and *HW_AFTER. Both pointers must not be NULL. 6800 If hotpatching is disabled for the function, the values are set to zero. 6801*/ 6802 6803static void 6804s390_function_num_hotpatch_hw (tree decl, 6805 int *hw_before, 6806 int *hw_after) 6807{ 6808 tree attr; 6809 6810 attr = lookup_attribute ("hotpatch", DECL_ATTRIBUTES (decl)); 6811 6812 /* Handle the arguments of the hotpatch attribute. The values 6813 specified via attribute might override the cmdline argument 6814 values. */ 6815 if (attr) 6816 { 6817 tree args = TREE_VALUE (attr); 6818 6819 *hw_before = TREE_INT_CST_LOW (TREE_VALUE (args)); 6820 *hw_after = TREE_INT_CST_LOW (TREE_VALUE (TREE_CHAIN (args))); 6821 } 6822 else 6823 { 6824 /* Use the values specified by the cmdline arguments. */ 6825 *hw_before = s390_hotpatch_hw_before_label; 6826 *hw_after = s390_hotpatch_hw_after_label; 6827 } 6828} 6829 6830/* Write the extra assembler code needed to declare a function properly. */ 6831 6832void 6833s390_asm_output_function_label (FILE *asm_out_file, const char *fname, 6834 tree decl) 6835{ 6836 int hw_before, hw_after; 6837 6838 s390_function_num_hotpatch_hw (decl, &hw_before, &hw_after); 6839 if (hw_before > 0) 6840 { 6841 unsigned int function_alignment; 6842 int i; 6843 6844 /* Add a trampoline code area before the function label and initialize it 6845 with two-byte nop instructions. This area can be overwritten with code 6846 that jumps to a patched version of the function. */ 6847 asm_fprintf (asm_out_file, "\tnopr\t%%r7" 6848 "\t# pre-label NOPs for hotpatch (%d halfwords)\n", 6849 hw_before); 6850 for (i = 1; i < hw_before; i++) 6851 fputs ("\tnopr\t%r7\n", asm_out_file); 6852 6853 /* Note: The function label must be aligned so that (a) the bytes of the 6854 following nop do not cross a cacheline boundary, and (b) a jump address 6855 (eight bytes for 64 bit targets, 4 bytes for 32 bit targets) can be 6856 stored directly before the label without crossing a cacheline 6857 boundary. All this is necessary to make sure the trampoline code can 6858 be changed atomically. 6859 This alignment is done automatically using the FOUNCTION_BOUNDARY, but 6860 if there are NOPs before the function label, the alignment is placed 6861 before them. So it is necessary to duplicate the alignment after the 6862 NOPs. */ 6863 function_alignment = MAX (8, DECL_ALIGN (decl) / BITS_PER_UNIT); 6864 if (! DECL_USER_ALIGN (decl)) 6865 function_alignment = MAX (function_alignment, 6866 (unsigned int) align_functions); 6867 fputs ("\t# alignment for hotpatch\n", asm_out_file); 6868 ASM_OUTPUT_ALIGN (asm_out_file, floor_log2 (function_alignment)); 6869 } 6870 6871 ASM_OUTPUT_LABEL (asm_out_file, fname); 6872 if (hw_after > 0) 6873 asm_fprintf (asm_out_file, 6874 "\t# post-label NOPs for hotpatch (%d halfwords)\n", 6875 hw_after); 6876} 6877 6878/* Output machine-dependent UNSPECs occurring in address constant X 6879 in assembler syntax to stdio stream FILE. Returns true if the 6880 constant X could be recognized, false otherwise. */ 6881 6882static bool 6883s390_output_addr_const_extra (FILE *file, rtx x) 6884{ 6885 if (GET_CODE (x) == UNSPEC && XVECLEN (x, 0) == 1) 6886 switch (XINT (x, 1)) 6887 { 6888 case UNSPEC_GOTENT: 6889 output_addr_const (file, XVECEXP (x, 0, 0)); 6890 fprintf (file, "@GOTENT"); 6891 return true; 6892 case UNSPEC_GOT: 6893 output_addr_const (file, XVECEXP (x, 0, 0)); 6894 fprintf (file, "@GOT"); 6895 return true; 6896 case UNSPEC_GOTOFF: 6897 output_addr_const (file, XVECEXP (x, 0, 0)); 6898 fprintf (file, "@GOTOFF"); 6899 return true; 6900 case UNSPEC_PLT: 6901 output_addr_const (file, XVECEXP (x, 0, 0)); 6902 fprintf (file, "@PLT"); 6903 return true; 6904 case UNSPEC_PLTOFF: 6905 output_addr_const (file, XVECEXP (x, 0, 0)); 6906 fprintf (file, "@PLTOFF"); 6907 return true; 6908 case UNSPEC_TLSGD: 6909 output_addr_const (file, XVECEXP (x, 0, 0)); 6910 fprintf (file, "@TLSGD"); 6911 return true; 6912 case UNSPEC_TLSLDM: 6913 assemble_name (file, get_some_local_dynamic_name ()); 6914 fprintf (file, "@TLSLDM"); 6915 return true; 6916 case UNSPEC_DTPOFF: 6917 output_addr_const (file, XVECEXP (x, 0, 0)); 6918 fprintf (file, "@DTPOFF"); 6919 return true; 6920 case UNSPEC_NTPOFF: 6921 output_addr_const (file, XVECEXP (x, 0, 0)); 6922 fprintf (file, "@NTPOFF"); 6923 return true; 6924 case UNSPEC_GOTNTPOFF: 6925 output_addr_const (file, XVECEXP (x, 0, 0)); 6926 fprintf (file, "@GOTNTPOFF"); 6927 return true; 6928 case UNSPEC_INDNTPOFF: 6929 output_addr_const (file, XVECEXP (x, 0, 0)); 6930 fprintf (file, "@INDNTPOFF"); 6931 return true; 6932 } 6933 6934 if (GET_CODE (x) == UNSPEC && XVECLEN (x, 0) == 2) 6935 switch (XINT (x, 1)) 6936 { 6937 case UNSPEC_POOL_OFFSET: 6938 x = gen_rtx_MINUS (GET_MODE (x), XVECEXP (x, 0, 0), XVECEXP (x, 0, 1)); 6939 output_addr_const (file, x); 6940 return true; 6941 } 6942 return false; 6943} 6944 6945/* Output address operand ADDR in assembler syntax to 6946 stdio stream FILE. */ 6947 6948void 6949print_operand_address (FILE *file, rtx addr) 6950{ 6951 struct s390_address ad; 6952 6953 if (s390_loadrelative_operand_p (addr, NULL, NULL)) 6954 { 6955 if (!TARGET_Z10) 6956 { 6957 output_operand_lossage ("symbolic memory references are " 6958 "only supported on z10 or later"); 6959 return; 6960 } 6961 output_addr_const (file, addr); 6962 return; 6963 } 6964 6965 if (!s390_decompose_address (addr, &ad) 6966 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base))) 6967 || (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx)))) 6968 output_operand_lossage ("cannot decompose address"); 6969 6970 if (ad.disp) 6971 output_addr_const (file, ad.disp); 6972 else 6973 fprintf (file, "0"); 6974 6975 if (ad.base && ad.indx) 6976 fprintf (file, "(%s,%s)", reg_names[REGNO (ad.indx)], 6977 reg_names[REGNO (ad.base)]); 6978 else if (ad.base) 6979 fprintf (file, "(%s)", reg_names[REGNO (ad.base)]); 6980} 6981 6982/* Output operand X in assembler syntax to stdio stream FILE. 6983 CODE specified the format flag. The following format flags 6984 are recognized: 6985 6986 'C': print opcode suffix for branch condition. 6987 'D': print opcode suffix for inverse branch condition. 6988 'E': print opcode suffix for branch on index instruction. 6989 'G': print the size of the operand in bytes. 6990 'J': print tls_load/tls_gdcall/tls_ldcall suffix 6991 'M': print the second word of a TImode operand. 6992 'N': print the second word of a DImode operand. 6993 'O': print only the displacement of a memory reference or address. 6994 'R': print only the base register of a memory reference or address. 6995 'S': print S-type memory reference (base+displacement). 6996 'Y': print shift count operand. 6997 6998 'b': print integer X as if it's an unsigned byte. 6999 'c': print integer X as if it's an signed byte. 7000 'e': "end" contiguous bitmask X in either DImode or vector inner mode. 7001 'f': "end" contiguous bitmask X in SImode. 7002 'h': print integer X as if it's a signed halfword. 7003 'i': print the first nonzero HImode part of X. 7004 'j': print the first HImode part unequal to -1 of X. 7005 'k': print the first nonzero SImode part of X. 7006 'm': print the first SImode part unequal to -1 of X. 7007 'o': print integer X as if it's an unsigned 32bit word. 7008 's': "start" of contiguous bitmask X in either DImode or vector inner mode. 7009 't': CONST_INT: "start" of contiguous bitmask X in SImode. 7010 CONST_VECTOR: Generate a bitmask for vgbm instruction. 7011 'x': print integer X as if it's an unsigned halfword. 7012 'v': print register number as vector register (v1 instead of f1). 7013*/ 7014 7015void 7016print_operand (FILE *file, rtx x, int code) 7017{ 7018 HOST_WIDE_INT ival; 7019 7020 switch (code) 7021 { 7022 case 'C': 7023 fprintf (file, s390_branch_condition_mnemonic (x, FALSE)); 7024 return; 7025 7026 case 'D': 7027 fprintf (file, s390_branch_condition_mnemonic (x, TRUE)); 7028 return; 7029 7030 case 'E': 7031 if (GET_CODE (x) == LE) 7032 fprintf (file, "l"); 7033 else if (GET_CODE (x) == GT) 7034 fprintf (file, "h"); 7035 else 7036 output_operand_lossage ("invalid comparison operator " 7037 "for 'E' output modifier"); 7038 return; 7039 7040 case 'J': 7041 if (GET_CODE (x) == SYMBOL_REF) 7042 { 7043 fprintf (file, "%s", ":tls_load:"); 7044 output_addr_const (file, x); 7045 } 7046 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSGD) 7047 { 7048 fprintf (file, "%s", ":tls_gdcall:"); 7049 output_addr_const (file, XVECEXP (x, 0, 0)); 7050 } 7051 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSLDM) 7052 { 7053 fprintf (file, "%s", ":tls_ldcall:"); 7054 const char *name = get_some_local_dynamic_name (); 7055 gcc_assert (name); 7056 assemble_name (file, name); 7057 } 7058 else 7059 output_operand_lossage ("invalid reference for 'J' output modifier"); 7060 return; 7061 7062 case 'G': 7063 fprintf (file, "%u", GET_MODE_SIZE (GET_MODE (x))); 7064 return; 7065 7066 case 'O': 7067 { 7068 struct s390_address ad; 7069 int ret; 7070 7071 ret = s390_decompose_address (MEM_P (x) ? XEXP (x, 0) : x, &ad); 7072 7073 if (!ret 7074 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base))) 7075 || ad.indx) 7076 { 7077 output_operand_lossage ("invalid address for 'O' output modifier"); 7078 return; 7079 } 7080 7081 if (ad.disp) 7082 output_addr_const (file, ad.disp); 7083 else 7084 fprintf (file, "0"); 7085 } 7086 return; 7087 7088 case 'R': 7089 { 7090 struct s390_address ad; 7091 int ret; 7092 7093 ret = s390_decompose_address (MEM_P (x) ? XEXP (x, 0) : x, &ad); 7094 7095 if (!ret 7096 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base))) 7097 || ad.indx) 7098 { 7099 output_operand_lossage ("invalid address for 'R' output modifier"); 7100 return; 7101 } 7102 7103 if (ad.base) 7104 fprintf (file, "%s", reg_names[REGNO (ad.base)]); 7105 else 7106 fprintf (file, "0"); 7107 } 7108 return; 7109 7110 case 'S': 7111 { 7112 struct s390_address ad; 7113 int ret; 7114 7115 if (!MEM_P (x)) 7116 { 7117 output_operand_lossage ("memory reference expected for " 7118 "'S' output modifier"); 7119 return; 7120 } 7121 ret = s390_decompose_address (XEXP (x, 0), &ad); 7122 7123 if (!ret 7124 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base))) 7125 || ad.indx) 7126 { 7127 output_operand_lossage ("invalid address for 'S' output modifier"); 7128 return; 7129 } 7130 7131 if (ad.disp) 7132 output_addr_const (file, ad.disp); 7133 else 7134 fprintf (file, "0"); 7135 7136 if (ad.base) 7137 fprintf (file, "(%s)", reg_names[REGNO (ad.base)]); 7138 } 7139 return; 7140 7141 case 'N': 7142 if (GET_CODE (x) == REG) 7143 x = gen_rtx_REG (GET_MODE (x), REGNO (x) + 1); 7144 else if (GET_CODE (x) == MEM) 7145 x = change_address (x, VOIDmode, 7146 plus_constant (Pmode, XEXP (x, 0), 4)); 7147 else 7148 output_operand_lossage ("register or memory expression expected " 7149 "for 'N' output modifier"); 7150 break; 7151 7152 case 'M': 7153 if (GET_CODE (x) == REG) 7154 x = gen_rtx_REG (GET_MODE (x), REGNO (x) + 1); 7155 else if (GET_CODE (x) == MEM) 7156 x = change_address (x, VOIDmode, 7157 plus_constant (Pmode, XEXP (x, 0), 8)); 7158 else 7159 output_operand_lossage ("register or memory expression expected " 7160 "for 'M' output modifier"); 7161 break; 7162 7163 case 'Y': 7164 print_shift_count_operand (file, x); 7165 return; 7166 } 7167 7168 switch (GET_CODE (x)) 7169 { 7170 case REG: 7171 /* Print FP regs as fx instead of vx when they are accessed 7172 through non-vector mode. */ 7173 if (code == 'v' 7174 || VECTOR_NOFP_REG_P (x) 7175 || (FP_REG_P (x) && VECTOR_MODE_P (GET_MODE (x))) 7176 || (VECTOR_REG_P (x) 7177 && (GET_MODE_SIZE (GET_MODE (x)) / 7178 s390_class_max_nregs (FP_REGS, GET_MODE (x))) > 8)) 7179 fprintf (file, "%%v%s", reg_names[REGNO (x)] + 2); 7180 else 7181 fprintf (file, "%s", reg_names[REGNO (x)]); 7182 break; 7183 7184 case MEM: 7185 output_address (XEXP (x, 0)); 7186 break; 7187 7188 case CONST: 7189 case CODE_LABEL: 7190 case LABEL_REF: 7191 case SYMBOL_REF: 7192 output_addr_const (file, x); 7193 break; 7194 7195 case CONST_INT: 7196 ival = INTVAL (x); 7197 switch (code) 7198 { 7199 case 0: 7200 break; 7201 case 'b': 7202 ival &= 0xff; 7203 break; 7204 case 'c': 7205 ival = ((ival & 0xff) ^ 0x80) - 0x80; 7206 break; 7207 case 'x': 7208 ival &= 0xffff; 7209 break; 7210 case 'h': 7211 ival = ((ival & 0xffff) ^ 0x8000) - 0x8000; 7212 break; 7213 case 'i': 7214 ival = s390_extract_part (x, HImode, 0); 7215 break; 7216 case 'j': 7217 ival = s390_extract_part (x, HImode, -1); 7218 break; 7219 case 'k': 7220 ival = s390_extract_part (x, SImode, 0); 7221 break; 7222 case 'm': 7223 ival = s390_extract_part (x, SImode, -1); 7224 break; 7225 case 'o': 7226 ival &= 0xffffffff; 7227 break; 7228 case 'e': case 'f': 7229 case 's': case 't': 7230 { 7231 int pos, len; 7232 bool ok; 7233 7234 len = (code == 's' || code == 'e' ? 64 : 32); 7235 ok = s390_contiguous_bitmask_p (ival, len, &pos, &len); 7236 gcc_assert (ok); 7237 if (code == 's' || code == 't') 7238 ival = 64 - pos - len; 7239 else 7240 ival = 64 - 1 - pos; 7241 } 7242 break; 7243 default: 7244 output_operand_lossage ("invalid constant for output modifier '%c'", code); 7245 } 7246 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ival); 7247 break; 7248 7249 case CONST_DOUBLE: 7250 gcc_assert (GET_MODE (x) == VOIDmode); 7251 if (code == 'b') 7252 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x) & 0xff); 7253 else if (code == 'x') 7254 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x) & 0xffff); 7255 else if (code == 'h') 7256 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 7257 ((CONST_DOUBLE_LOW (x) & 0xffff) ^ 0x8000) - 0x8000); 7258 else 7259 { 7260 if (code == 0) 7261 output_operand_lossage ("invalid constant - try using " 7262 "an output modifier"); 7263 else 7264 output_operand_lossage ("invalid constant for output modifier '%c'", 7265 code); 7266 } 7267 break; 7268 case CONST_VECTOR: 7269 switch (code) 7270 { 7271 case 'h': 7272 gcc_assert (s390_const_vec_duplicate_p (x)); 7273 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 7274 ((INTVAL (XVECEXP (x, 0, 0)) & 0xffff) ^ 0x8000) - 0x8000); 7275 break; 7276 case 'e': 7277 case 's': 7278 { 7279 int start, stop, inner_len; 7280 bool ok; 7281 7282 inner_len = GET_MODE_UNIT_BITSIZE (GET_MODE (x)); 7283 ok = s390_contiguous_bitmask_vector_p (x, &start, &stop); 7284 gcc_assert (ok); 7285 if (code == 's' || code == 't') 7286 ival = inner_len - stop - 1; 7287 else 7288 ival = inner_len - start - 1; 7289 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ival); 7290 } 7291 break; 7292 case 't': 7293 { 7294 unsigned mask; 7295 bool ok = s390_bytemask_vector_p (x, &mask); 7296 gcc_assert (ok); 7297 fprintf (file, "%u", mask); 7298 } 7299 break; 7300 7301 default: 7302 output_operand_lossage ("invalid constant vector for output " 7303 "modifier '%c'", code); 7304 } 7305 break; 7306 7307 default: 7308 if (code == 0) 7309 output_operand_lossage ("invalid expression - try using " 7310 "an output modifier"); 7311 else 7312 output_operand_lossage ("invalid expression for output " 7313 "modifier '%c'", code); 7314 break; 7315 } 7316} 7317 7318/* Target hook for assembling integer objects. We need to define it 7319 here to work a round a bug in some versions of GAS, which couldn't 7320 handle values smaller than INT_MIN when printed in decimal. */ 7321 7322static bool 7323s390_assemble_integer (rtx x, unsigned int size, int aligned_p) 7324{ 7325 if (size == 8 && aligned_p 7326 && GET_CODE (x) == CONST_INT && INTVAL (x) < INT_MIN) 7327 { 7328 fprintf (asm_out_file, "\t.quad\t" HOST_WIDE_INT_PRINT_HEX "\n", 7329 INTVAL (x)); 7330 return true; 7331 } 7332 return default_assemble_integer (x, size, aligned_p); 7333} 7334 7335/* Returns true if register REGNO is used for forming 7336 a memory address in expression X. */ 7337 7338static bool 7339reg_used_in_mem_p (int regno, rtx x) 7340{ 7341 enum rtx_code code = GET_CODE (x); 7342 int i, j; 7343 const char *fmt; 7344 7345 if (code == MEM) 7346 { 7347 if (refers_to_regno_p (regno, XEXP (x, 0))) 7348 return true; 7349 } 7350 else if (code == SET 7351 && GET_CODE (SET_DEST (x)) == PC) 7352 { 7353 if (refers_to_regno_p (regno, SET_SRC (x))) 7354 return true; 7355 } 7356 7357 fmt = GET_RTX_FORMAT (code); 7358 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--) 7359 { 7360 if (fmt[i] == 'e' 7361 && reg_used_in_mem_p (regno, XEXP (x, i))) 7362 return true; 7363 7364 else if (fmt[i] == 'E') 7365 for (j = 0; j < XVECLEN (x, i); j++) 7366 if (reg_used_in_mem_p (regno, XVECEXP (x, i, j))) 7367 return true; 7368 } 7369 return false; 7370} 7371 7372/* Returns true if expression DEP_RTX sets an address register 7373 used by instruction INSN to address memory. */ 7374 7375static bool 7376addr_generation_dependency_p (rtx dep_rtx, rtx_insn *insn) 7377{ 7378 rtx target, pat; 7379 7380 if (NONJUMP_INSN_P (dep_rtx)) 7381 dep_rtx = PATTERN (dep_rtx); 7382 7383 if (GET_CODE (dep_rtx) == SET) 7384 { 7385 target = SET_DEST (dep_rtx); 7386 if (GET_CODE (target) == STRICT_LOW_PART) 7387 target = XEXP (target, 0); 7388 while (GET_CODE (target) == SUBREG) 7389 target = SUBREG_REG (target); 7390 7391 if (GET_CODE (target) == REG) 7392 { 7393 int regno = REGNO (target); 7394 7395 if (s390_safe_attr_type (insn) == TYPE_LA) 7396 { 7397 pat = PATTERN (insn); 7398 if (GET_CODE (pat) == PARALLEL) 7399 { 7400 gcc_assert (XVECLEN (pat, 0) == 2); 7401 pat = XVECEXP (pat, 0, 0); 7402 } 7403 gcc_assert (GET_CODE (pat) == SET); 7404 return refers_to_regno_p (regno, SET_SRC (pat)); 7405 } 7406 else if (get_attr_atype (insn) == ATYPE_AGEN) 7407 return reg_used_in_mem_p (regno, PATTERN (insn)); 7408 } 7409 } 7410 return false; 7411} 7412 7413/* Return 1, if dep_insn sets register used in insn in the agen unit. */ 7414 7415int 7416s390_agen_dep_p (rtx_insn *dep_insn, rtx_insn *insn) 7417{ 7418 rtx dep_rtx = PATTERN (dep_insn); 7419 int i; 7420 7421 if (GET_CODE (dep_rtx) == SET 7422 && addr_generation_dependency_p (dep_rtx, insn)) 7423 return 1; 7424 else if (GET_CODE (dep_rtx) == PARALLEL) 7425 { 7426 for (i = 0; i < XVECLEN (dep_rtx, 0); i++) 7427 { 7428 if (addr_generation_dependency_p (XVECEXP (dep_rtx, 0, i), insn)) 7429 return 1; 7430 } 7431 } 7432 return 0; 7433} 7434 7435 7436/* A C statement (sans semicolon) to update the integer scheduling priority 7437 INSN_PRIORITY (INSN). Increase the priority to execute the INSN earlier, 7438 reduce the priority to execute INSN later. Do not define this macro if 7439 you do not need to adjust the scheduling priorities of insns. 7440 7441 A STD instruction should be scheduled earlier, 7442 in order to use the bypass. */ 7443static int 7444s390_adjust_priority (rtx_insn *insn, int priority) 7445{ 7446 if (! INSN_P (insn)) 7447 return priority; 7448 7449 if (s390_tune != PROCESSOR_2084_Z990 7450 && s390_tune != PROCESSOR_2094_Z9_109 7451 && s390_tune != PROCESSOR_2097_Z10 7452 && s390_tune != PROCESSOR_2817_Z196 7453 && s390_tune != PROCESSOR_2827_ZEC12 7454 && s390_tune != PROCESSOR_2964_Z13) 7455 return priority; 7456 7457 switch (s390_safe_attr_type (insn)) 7458 { 7459 case TYPE_FSTOREDF: 7460 case TYPE_FSTORESF: 7461 priority = priority << 3; 7462 break; 7463 case TYPE_STORE: 7464 case TYPE_STM: 7465 priority = priority << 1; 7466 break; 7467 default: 7468 break; 7469 } 7470 return priority; 7471} 7472 7473 7474/* The number of instructions that can be issued per cycle. */ 7475 7476static int 7477s390_issue_rate (void) 7478{ 7479 switch (s390_tune) 7480 { 7481 case PROCESSOR_2084_Z990: 7482 case PROCESSOR_2094_Z9_109: 7483 case PROCESSOR_2817_Z196: 7484 return 3; 7485 case PROCESSOR_2097_Z10: 7486 return 2; 7487 /* Starting with EC12 we use the sched_reorder hook to take care 7488 of instruction dispatch constraints. The algorithm only 7489 picks the best instruction and assumes only a single 7490 instruction gets issued per cycle. */ 7491 case PROCESSOR_2827_ZEC12: 7492 default: 7493 return 1; 7494 } 7495} 7496 7497static int 7498s390_first_cycle_multipass_dfa_lookahead (void) 7499{ 7500 return 4; 7501} 7502 7503/* Annotate every literal pool reference in X by an UNSPEC_LTREF expression. 7504 Fix up MEMs as required. */ 7505 7506static void 7507annotate_constant_pool_refs (rtx *x) 7508{ 7509 int i, j; 7510 const char *fmt; 7511 7512 gcc_assert (GET_CODE (*x) != SYMBOL_REF 7513 || !CONSTANT_POOL_ADDRESS_P (*x)); 7514 7515 /* Literal pool references can only occur inside a MEM ... */ 7516 if (GET_CODE (*x) == MEM) 7517 { 7518 rtx memref = XEXP (*x, 0); 7519 7520 if (GET_CODE (memref) == SYMBOL_REF 7521 && CONSTANT_POOL_ADDRESS_P (memref)) 7522 { 7523 rtx base = cfun->machine->base_reg; 7524 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, memref, base), 7525 UNSPEC_LTREF); 7526 7527 *x = replace_equiv_address (*x, addr); 7528 return; 7529 } 7530 7531 if (GET_CODE (memref) == CONST 7532 && GET_CODE (XEXP (memref, 0)) == PLUS 7533 && GET_CODE (XEXP (XEXP (memref, 0), 1)) == CONST_INT 7534 && GET_CODE (XEXP (XEXP (memref, 0), 0)) == SYMBOL_REF 7535 && CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (memref, 0), 0))) 7536 { 7537 HOST_WIDE_INT off = INTVAL (XEXP (XEXP (memref, 0), 1)); 7538 rtx sym = XEXP (XEXP (memref, 0), 0); 7539 rtx base = cfun->machine->base_reg; 7540 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, sym, base), 7541 UNSPEC_LTREF); 7542 7543 *x = replace_equiv_address (*x, plus_constant (Pmode, addr, off)); 7544 return; 7545 } 7546 } 7547 7548 /* ... or a load-address type pattern. */ 7549 if (GET_CODE (*x) == SET) 7550 { 7551 rtx addrref = SET_SRC (*x); 7552 7553 if (GET_CODE (addrref) == SYMBOL_REF 7554 && CONSTANT_POOL_ADDRESS_P (addrref)) 7555 { 7556 rtx base = cfun->machine->base_reg; 7557 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, addrref, base), 7558 UNSPEC_LTREF); 7559 7560 SET_SRC (*x) = addr; 7561 return; 7562 } 7563 7564 if (GET_CODE (addrref) == CONST 7565 && GET_CODE (XEXP (addrref, 0)) == PLUS 7566 && GET_CODE (XEXP (XEXP (addrref, 0), 1)) == CONST_INT 7567 && GET_CODE (XEXP (XEXP (addrref, 0), 0)) == SYMBOL_REF 7568 && CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (addrref, 0), 0))) 7569 { 7570 HOST_WIDE_INT off = INTVAL (XEXP (XEXP (addrref, 0), 1)); 7571 rtx sym = XEXP (XEXP (addrref, 0), 0); 7572 rtx base = cfun->machine->base_reg; 7573 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, sym, base), 7574 UNSPEC_LTREF); 7575 7576 SET_SRC (*x) = plus_constant (Pmode, addr, off); 7577 return; 7578 } 7579 } 7580 7581 /* Annotate LTREL_BASE as well. */ 7582 if (GET_CODE (*x) == UNSPEC 7583 && XINT (*x, 1) == UNSPEC_LTREL_BASE) 7584 { 7585 rtx base = cfun->machine->base_reg; 7586 *x = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, XVECEXP (*x, 0, 0), base), 7587 UNSPEC_LTREL_BASE); 7588 return; 7589 } 7590 7591 fmt = GET_RTX_FORMAT (GET_CODE (*x)); 7592 for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--) 7593 { 7594 if (fmt[i] == 'e') 7595 { 7596 annotate_constant_pool_refs (&XEXP (*x, i)); 7597 } 7598 else if (fmt[i] == 'E') 7599 { 7600 for (j = 0; j < XVECLEN (*x, i); j++) 7601 annotate_constant_pool_refs (&XVECEXP (*x, i, j)); 7602 } 7603 } 7604} 7605 7606/* Split all branches that exceed the maximum distance. 7607 Returns true if this created a new literal pool entry. */ 7608 7609static int 7610s390_split_branches (void) 7611{ 7612 rtx temp_reg = gen_rtx_REG (Pmode, RETURN_REGNUM); 7613 int new_literal = 0, ret; 7614 rtx_insn *insn; 7615 rtx pat, target; 7616 rtx *label; 7617 7618 /* We need correct insn addresses. */ 7619 7620 shorten_branches (get_insns ()); 7621 7622 /* Find all branches that exceed 64KB, and split them. */ 7623 7624 for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) 7625 { 7626 if (! JUMP_P (insn) || tablejump_p (insn, NULL, NULL)) 7627 continue; 7628 7629 pat = PATTERN (insn); 7630 if (GET_CODE (pat) == PARALLEL) 7631 pat = XVECEXP (pat, 0, 0); 7632 if (GET_CODE (pat) != SET || SET_DEST (pat) != pc_rtx) 7633 continue; 7634 7635 if (GET_CODE (SET_SRC (pat)) == LABEL_REF) 7636 { 7637 label = &SET_SRC (pat); 7638 } 7639 else if (GET_CODE (SET_SRC (pat)) == IF_THEN_ELSE) 7640 { 7641 if (GET_CODE (XEXP (SET_SRC (pat), 1)) == LABEL_REF) 7642 label = &XEXP (SET_SRC (pat), 1); 7643 else if (GET_CODE (XEXP (SET_SRC (pat), 2)) == LABEL_REF) 7644 label = &XEXP (SET_SRC (pat), 2); 7645 else 7646 continue; 7647 } 7648 else 7649 continue; 7650 7651 if (get_attr_length (insn) <= 4) 7652 continue; 7653 7654 /* We are going to use the return register as scratch register, 7655 make sure it will be saved/restored by the prologue/epilogue. */ 7656 cfun_frame_layout.save_return_addr_p = 1; 7657 7658 if (!flag_pic) 7659 { 7660 new_literal = 1; 7661 rtx mem = force_const_mem (Pmode, *label); 7662 rtx_insn *set_insn = emit_insn_before (gen_rtx_SET (Pmode, temp_reg, mem), insn); 7663 INSN_ADDRESSES_NEW (set_insn, -1); 7664 annotate_constant_pool_refs (&PATTERN (set_insn)); 7665 7666 target = temp_reg; 7667 } 7668 else 7669 { 7670 new_literal = 1; 7671 target = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, *label), 7672 UNSPEC_LTREL_OFFSET); 7673 target = gen_rtx_CONST (Pmode, target); 7674 target = force_const_mem (Pmode, target); 7675 rtx_insn *set_insn = emit_insn_before (gen_rtx_SET (Pmode, temp_reg, target), insn); 7676 INSN_ADDRESSES_NEW (set_insn, -1); 7677 annotate_constant_pool_refs (&PATTERN (set_insn)); 7678 7679 target = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, XEXP (target, 0), 7680 cfun->machine->base_reg), 7681 UNSPEC_LTREL_BASE); 7682 target = gen_rtx_PLUS (Pmode, temp_reg, target); 7683 } 7684 7685 ret = validate_change (insn, label, target, 0); 7686 gcc_assert (ret); 7687 } 7688 7689 return new_literal; 7690} 7691 7692 7693/* Find an annotated literal pool symbol referenced in RTX X, 7694 and store it at REF. Will abort if X contains references to 7695 more than one such pool symbol; multiple references to the same 7696 symbol are allowed, however. 7697 7698 The rtx pointed to by REF must be initialized to NULL_RTX 7699 by the caller before calling this routine. */ 7700 7701static void 7702find_constant_pool_ref (rtx x, rtx *ref) 7703{ 7704 int i, j; 7705 const char *fmt; 7706 7707 /* Ignore LTREL_BASE references. */ 7708 if (GET_CODE (x) == UNSPEC 7709 && XINT (x, 1) == UNSPEC_LTREL_BASE) 7710 return; 7711 /* Likewise POOL_ENTRY insns. */ 7712 if (GET_CODE (x) == UNSPEC_VOLATILE 7713 && XINT (x, 1) == UNSPECV_POOL_ENTRY) 7714 return; 7715 7716 gcc_assert (GET_CODE (x) != SYMBOL_REF 7717 || !CONSTANT_POOL_ADDRESS_P (x)); 7718 7719 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_LTREF) 7720 { 7721 rtx sym = XVECEXP (x, 0, 0); 7722 gcc_assert (GET_CODE (sym) == SYMBOL_REF 7723 && CONSTANT_POOL_ADDRESS_P (sym)); 7724 7725 if (*ref == NULL_RTX) 7726 *ref = sym; 7727 else 7728 gcc_assert (*ref == sym); 7729 7730 return; 7731 } 7732 7733 fmt = GET_RTX_FORMAT (GET_CODE (x)); 7734 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--) 7735 { 7736 if (fmt[i] == 'e') 7737 { 7738 find_constant_pool_ref (XEXP (x, i), ref); 7739 } 7740 else if (fmt[i] == 'E') 7741 { 7742 for (j = 0; j < XVECLEN (x, i); j++) 7743 find_constant_pool_ref (XVECEXP (x, i, j), ref); 7744 } 7745 } 7746} 7747 7748/* Replace every reference to the annotated literal pool 7749 symbol REF in X by its base plus OFFSET. */ 7750 7751static void 7752replace_constant_pool_ref (rtx *x, rtx ref, rtx offset) 7753{ 7754 int i, j; 7755 const char *fmt; 7756 7757 gcc_assert (*x != ref); 7758 7759 if (GET_CODE (*x) == UNSPEC 7760 && XINT (*x, 1) == UNSPEC_LTREF 7761 && XVECEXP (*x, 0, 0) == ref) 7762 { 7763 *x = gen_rtx_PLUS (Pmode, XVECEXP (*x, 0, 1), offset); 7764 return; 7765 } 7766 7767 if (GET_CODE (*x) == PLUS 7768 && GET_CODE (XEXP (*x, 1)) == CONST_INT 7769 && GET_CODE (XEXP (*x, 0)) == UNSPEC 7770 && XINT (XEXP (*x, 0), 1) == UNSPEC_LTREF 7771 && XVECEXP (XEXP (*x, 0), 0, 0) == ref) 7772 { 7773 rtx addr = gen_rtx_PLUS (Pmode, XVECEXP (XEXP (*x, 0), 0, 1), offset); 7774 *x = plus_constant (Pmode, addr, INTVAL (XEXP (*x, 1))); 7775 return; 7776 } 7777 7778 fmt = GET_RTX_FORMAT (GET_CODE (*x)); 7779 for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--) 7780 { 7781 if (fmt[i] == 'e') 7782 { 7783 replace_constant_pool_ref (&XEXP (*x, i), ref, offset); 7784 } 7785 else if (fmt[i] == 'E') 7786 { 7787 for (j = 0; j < XVECLEN (*x, i); j++) 7788 replace_constant_pool_ref (&XVECEXP (*x, i, j), ref, offset); 7789 } 7790 } 7791} 7792 7793/* Check whether X contains an UNSPEC_LTREL_BASE. 7794 Return its constant pool symbol if found, NULL_RTX otherwise. */ 7795 7796static rtx 7797find_ltrel_base (rtx x) 7798{ 7799 int i, j; 7800 const char *fmt; 7801 7802 if (GET_CODE (x) == UNSPEC 7803 && XINT (x, 1) == UNSPEC_LTREL_BASE) 7804 return XVECEXP (x, 0, 0); 7805 7806 fmt = GET_RTX_FORMAT (GET_CODE (x)); 7807 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--) 7808 { 7809 if (fmt[i] == 'e') 7810 { 7811 rtx fnd = find_ltrel_base (XEXP (x, i)); 7812 if (fnd) 7813 return fnd; 7814 } 7815 else if (fmt[i] == 'E') 7816 { 7817 for (j = 0; j < XVECLEN (x, i); j++) 7818 { 7819 rtx fnd = find_ltrel_base (XVECEXP (x, i, j)); 7820 if (fnd) 7821 return fnd; 7822 } 7823 } 7824 } 7825 7826 return NULL_RTX; 7827} 7828 7829/* Replace any occurrence of UNSPEC_LTREL_BASE in X with its base. */ 7830 7831static void 7832replace_ltrel_base (rtx *x) 7833{ 7834 int i, j; 7835 const char *fmt; 7836 7837 if (GET_CODE (*x) == UNSPEC 7838 && XINT (*x, 1) == UNSPEC_LTREL_BASE) 7839 { 7840 *x = XVECEXP (*x, 0, 1); 7841 return; 7842 } 7843 7844 fmt = GET_RTX_FORMAT (GET_CODE (*x)); 7845 for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--) 7846 { 7847 if (fmt[i] == 'e') 7848 { 7849 replace_ltrel_base (&XEXP (*x, i)); 7850 } 7851 else if (fmt[i] == 'E') 7852 { 7853 for (j = 0; j < XVECLEN (*x, i); j++) 7854 replace_ltrel_base (&XVECEXP (*x, i, j)); 7855 } 7856 } 7857} 7858 7859 7860/* We keep a list of constants which we have to add to internal 7861 constant tables in the middle of large functions. */ 7862 7863#define NR_C_MODES 32 7864machine_mode constant_modes[NR_C_MODES] = 7865{ 7866 TFmode, TImode, TDmode, 7867 V16QImode, V8HImode, V4SImode, V2DImode, V1TImode, 7868 V4SFmode, V2DFmode, V1TFmode, 7869 DFmode, DImode, DDmode, 7870 V8QImode, V4HImode, V2SImode, V1DImode, V2SFmode, V1DFmode, 7871 SFmode, SImode, SDmode, 7872 V4QImode, V2HImode, V1SImode, V1SFmode, 7873 HImode, 7874 V2QImode, V1HImode, 7875 QImode, 7876 V1QImode 7877}; 7878 7879struct constant 7880{ 7881 struct constant *next; 7882 rtx value; 7883 rtx_code_label *label; 7884}; 7885 7886struct constant_pool 7887{ 7888 struct constant_pool *next; 7889 rtx_insn *first_insn; 7890 rtx_insn *pool_insn; 7891 bitmap insns; 7892 rtx_insn *emit_pool_after; 7893 7894 struct constant *constants[NR_C_MODES]; 7895 struct constant *execute; 7896 rtx_code_label *label; 7897 int size; 7898}; 7899 7900/* Allocate new constant_pool structure. */ 7901 7902static struct constant_pool * 7903s390_alloc_pool (void) 7904{ 7905 struct constant_pool *pool; 7906 int i; 7907 7908 pool = (struct constant_pool *) xmalloc (sizeof *pool); 7909 pool->next = NULL; 7910 for (i = 0; i < NR_C_MODES; i++) 7911 pool->constants[i] = NULL; 7912 7913 pool->execute = NULL; 7914 pool->label = gen_label_rtx (); 7915 pool->first_insn = NULL; 7916 pool->pool_insn = NULL; 7917 pool->insns = BITMAP_ALLOC (NULL); 7918 pool->size = 0; 7919 pool->emit_pool_after = NULL; 7920 7921 return pool; 7922} 7923 7924/* Create new constant pool covering instructions starting at INSN 7925 and chain it to the end of POOL_LIST. */ 7926 7927static struct constant_pool * 7928s390_start_pool (struct constant_pool **pool_list, rtx_insn *insn) 7929{ 7930 struct constant_pool *pool, **prev; 7931 7932 pool = s390_alloc_pool (); 7933 pool->first_insn = insn; 7934 7935 for (prev = pool_list; *prev; prev = &(*prev)->next) 7936 ; 7937 *prev = pool; 7938 7939 return pool; 7940} 7941 7942/* End range of instructions covered by POOL at INSN and emit 7943 placeholder insn representing the pool. */ 7944 7945static void 7946s390_end_pool (struct constant_pool *pool, rtx_insn *insn) 7947{ 7948 rtx pool_size = GEN_INT (pool->size + 8 /* alignment slop */); 7949 7950 if (!insn) 7951 insn = get_last_insn (); 7952 7953 pool->pool_insn = emit_insn_after (gen_pool (pool_size), insn); 7954 INSN_ADDRESSES_NEW (pool->pool_insn, -1); 7955} 7956 7957/* Add INSN to the list of insns covered by POOL. */ 7958 7959static void 7960s390_add_pool_insn (struct constant_pool *pool, rtx insn) 7961{ 7962 bitmap_set_bit (pool->insns, INSN_UID (insn)); 7963} 7964 7965/* Return pool out of POOL_LIST that covers INSN. */ 7966 7967static struct constant_pool * 7968s390_find_pool (struct constant_pool *pool_list, rtx insn) 7969{ 7970 struct constant_pool *pool; 7971 7972 for (pool = pool_list; pool; pool = pool->next) 7973 if (bitmap_bit_p (pool->insns, INSN_UID (insn))) 7974 break; 7975 7976 return pool; 7977} 7978 7979/* Add constant VAL of mode MODE to the constant pool POOL. */ 7980 7981static void 7982s390_add_constant (struct constant_pool *pool, rtx val, machine_mode mode) 7983{ 7984 struct constant *c; 7985 int i; 7986 7987 for (i = 0; i < NR_C_MODES; i++) 7988 if (constant_modes[i] == mode) 7989 break; 7990 gcc_assert (i != NR_C_MODES); 7991 7992 for (c = pool->constants[i]; c != NULL; c = c->next) 7993 if (rtx_equal_p (val, c->value)) 7994 break; 7995 7996 if (c == NULL) 7997 { 7998 c = (struct constant *) xmalloc (sizeof *c); 7999 c->value = val; 8000 c->label = gen_label_rtx (); 8001 c->next = pool->constants[i]; 8002 pool->constants[i] = c; 8003 pool->size += GET_MODE_SIZE (mode); 8004 } 8005} 8006 8007/* Return an rtx that represents the offset of X from the start of 8008 pool POOL. */ 8009 8010static rtx 8011s390_pool_offset (struct constant_pool *pool, rtx x) 8012{ 8013 rtx label; 8014 8015 label = gen_rtx_LABEL_REF (GET_MODE (x), pool->label); 8016 x = gen_rtx_UNSPEC (GET_MODE (x), gen_rtvec (2, x, label), 8017 UNSPEC_POOL_OFFSET); 8018 return gen_rtx_CONST (GET_MODE (x), x); 8019} 8020 8021/* Find constant VAL of mode MODE in the constant pool POOL. 8022 Return an RTX describing the distance from the start of 8023 the pool to the location of the new constant. */ 8024 8025static rtx 8026s390_find_constant (struct constant_pool *pool, rtx val, 8027 machine_mode mode) 8028{ 8029 struct constant *c; 8030 int i; 8031 8032 for (i = 0; i < NR_C_MODES; i++) 8033 if (constant_modes[i] == mode) 8034 break; 8035 gcc_assert (i != NR_C_MODES); 8036 8037 for (c = pool->constants[i]; c != NULL; c = c->next) 8038 if (rtx_equal_p (val, c->value)) 8039 break; 8040 8041 gcc_assert (c); 8042 8043 return s390_pool_offset (pool, gen_rtx_LABEL_REF (Pmode, c->label)); 8044} 8045 8046/* Check whether INSN is an execute. Return the label_ref to its 8047 execute target template if so, NULL_RTX otherwise. */ 8048 8049static rtx 8050s390_execute_label (rtx insn) 8051{ 8052 if (NONJUMP_INSN_P (insn) 8053 && GET_CODE (PATTERN (insn)) == PARALLEL 8054 && GET_CODE (XVECEXP (PATTERN (insn), 0, 0)) == UNSPEC 8055 && XINT (XVECEXP (PATTERN (insn), 0, 0), 1) == UNSPEC_EXECUTE) 8056 return XVECEXP (XVECEXP (PATTERN (insn), 0, 0), 0, 2); 8057 8058 return NULL_RTX; 8059} 8060 8061/* Add execute target for INSN to the constant pool POOL. */ 8062 8063static void 8064s390_add_execute (struct constant_pool *pool, rtx insn) 8065{ 8066 struct constant *c; 8067 8068 for (c = pool->execute; c != NULL; c = c->next) 8069 if (INSN_UID (insn) == INSN_UID (c->value)) 8070 break; 8071 8072 if (c == NULL) 8073 { 8074 c = (struct constant *) xmalloc (sizeof *c); 8075 c->value = insn; 8076 c->label = gen_label_rtx (); 8077 c->next = pool->execute; 8078 pool->execute = c; 8079 pool->size += 6; 8080 } 8081} 8082 8083/* Find execute target for INSN in the constant pool POOL. 8084 Return an RTX describing the distance from the start of 8085 the pool to the location of the execute target. */ 8086 8087static rtx 8088s390_find_execute (struct constant_pool *pool, rtx insn) 8089{ 8090 struct constant *c; 8091 8092 for (c = pool->execute; c != NULL; c = c->next) 8093 if (INSN_UID (insn) == INSN_UID (c->value)) 8094 break; 8095 8096 gcc_assert (c); 8097 8098 return s390_pool_offset (pool, gen_rtx_LABEL_REF (Pmode, c->label)); 8099} 8100 8101/* For an execute INSN, extract the execute target template. */ 8102 8103static rtx 8104s390_execute_target (rtx insn) 8105{ 8106 rtx pattern = PATTERN (insn); 8107 gcc_assert (s390_execute_label (insn)); 8108 8109 if (XVECLEN (pattern, 0) == 2) 8110 { 8111 pattern = copy_rtx (XVECEXP (pattern, 0, 1)); 8112 } 8113 else 8114 { 8115 rtvec vec = rtvec_alloc (XVECLEN (pattern, 0) - 1); 8116 int i; 8117 8118 for (i = 0; i < XVECLEN (pattern, 0) - 1; i++) 8119 RTVEC_ELT (vec, i) = copy_rtx (XVECEXP (pattern, 0, i + 1)); 8120 8121 pattern = gen_rtx_PARALLEL (VOIDmode, vec); 8122 } 8123 8124 return pattern; 8125} 8126 8127/* Indicate that INSN cannot be duplicated. This is the case for 8128 execute insns that carry a unique label. */ 8129 8130static bool 8131s390_cannot_copy_insn_p (rtx_insn *insn) 8132{ 8133 rtx label = s390_execute_label (insn); 8134 return label && label != const0_rtx; 8135} 8136 8137/* Dump out the constants in POOL. If REMOTE_LABEL is true, 8138 do not emit the pool base label. */ 8139 8140static void 8141s390_dump_pool (struct constant_pool *pool, bool remote_label) 8142{ 8143 struct constant *c; 8144 rtx_insn *insn = pool->pool_insn; 8145 int i; 8146 8147 /* Switch to rodata section. */ 8148 if (TARGET_CPU_ZARCH) 8149 { 8150 insn = emit_insn_after (gen_pool_section_start (), insn); 8151 INSN_ADDRESSES_NEW (insn, -1); 8152 } 8153 8154 /* Ensure minimum pool alignment. */ 8155 if (TARGET_CPU_ZARCH) 8156 insn = emit_insn_after (gen_pool_align (GEN_INT (8)), insn); 8157 else 8158 insn = emit_insn_after (gen_pool_align (GEN_INT (4)), insn); 8159 INSN_ADDRESSES_NEW (insn, -1); 8160 8161 /* Emit pool base label. */ 8162 if (!remote_label) 8163 { 8164 insn = emit_label_after (pool->label, insn); 8165 INSN_ADDRESSES_NEW (insn, -1); 8166 } 8167 8168 /* Dump constants in descending alignment requirement order, 8169 ensuring proper alignment for every constant. */ 8170 for (i = 0; i < NR_C_MODES; i++) 8171 for (c = pool->constants[i]; c; c = c->next) 8172 { 8173 /* Convert UNSPEC_LTREL_OFFSET unspecs to pool-relative references. */ 8174 rtx value = copy_rtx (c->value); 8175 if (GET_CODE (value) == CONST 8176 && GET_CODE (XEXP (value, 0)) == UNSPEC 8177 && XINT (XEXP (value, 0), 1) == UNSPEC_LTREL_OFFSET 8178 && XVECLEN (XEXP (value, 0), 0) == 1) 8179 value = s390_pool_offset (pool, XVECEXP (XEXP (value, 0), 0, 0)); 8180 8181 insn = emit_label_after (c->label, insn); 8182 INSN_ADDRESSES_NEW (insn, -1); 8183 8184 value = gen_rtx_UNSPEC_VOLATILE (constant_modes[i], 8185 gen_rtvec (1, value), 8186 UNSPECV_POOL_ENTRY); 8187 insn = emit_insn_after (value, insn); 8188 INSN_ADDRESSES_NEW (insn, -1); 8189 } 8190 8191 /* Ensure minimum alignment for instructions. */ 8192 insn = emit_insn_after (gen_pool_align (GEN_INT (2)), insn); 8193 INSN_ADDRESSES_NEW (insn, -1); 8194 8195 /* Output in-pool execute template insns. */ 8196 for (c = pool->execute; c; c = c->next) 8197 { 8198 insn = emit_label_after (c->label, insn); 8199 INSN_ADDRESSES_NEW (insn, -1); 8200 8201 insn = emit_insn_after (s390_execute_target (c->value), insn); 8202 INSN_ADDRESSES_NEW (insn, -1); 8203 } 8204 8205 /* Switch back to previous section. */ 8206 if (TARGET_CPU_ZARCH) 8207 { 8208 insn = emit_insn_after (gen_pool_section_end (), insn); 8209 INSN_ADDRESSES_NEW (insn, -1); 8210 } 8211 8212 insn = emit_barrier_after (insn); 8213 INSN_ADDRESSES_NEW (insn, -1); 8214 8215 /* Remove placeholder insn. */ 8216 remove_insn (pool->pool_insn); 8217} 8218 8219/* Free all memory used by POOL. */ 8220 8221static void 8222s390_free_pool (struct constant_pool *pool) 8223{ 8224 struct constant *c, *next; 8225 int i; 8226 8227 for (i = 0; i < NR_C_MODES; i++) 8228 for (c = pool->constants[i]; c; c = next) 8229 { 8230 next = c->next; 8231 free (c); 8232 } 8233 8234 for (c = pool->execute; c; c = next) 8235 { 8236 next = c->next; 8237 free (c); 8238 } 8239 8240 BITMAP_FREE (pool->insns); 8241 free (pool); 8242} 8243 8244 8245/* Collect main literal pool. Return NULL on overflow. */ 8246 8247static struct constant_pool * 8248s390_mainpool_start (void) 8249{ 8250 struct constant_pool *pool; 8251 rtx_insn *insn; 8252 8253 pool = s390_alloc_pool (); 8254 8255 for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) 8256 { 8257 if (NONJUMP_INSN_P (insn) 8258 && GET_CODE (PATTERN (insn)) == SET 8259 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC_VOLATILE 8260 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPECV_MAIN_POOL) 8261 { 8262 /* There might be two main_pool instructions if base_reg 8263 is call-clobbered; one for shrink-wrapped code and one 8264 for the rest. We want to keep the first. */ 8265 if (pool->pool_insn) 8266 { 8267 insn = PREV_INSN (insn); 8268 delete_insn (NEXT_INSN (insn)); 8269 continue; 8270 } 8271 pool->pool_insn = insn; 8272 } 8273 8274 if (!TARGET_CPU_ZARCH && s390_execute_label (insn)) 8275 { 8276 s390_add_execute (pool, insn); 8277 } 8278 else if (NONJUMP_INSN_P (insn) || CALL_P (insn)) 8279 { 8280 rtx pool_ref = NULL_RTX; 8281 find_constant_pool_ref (PATTERN (insn), &pool_ref); 8282 if (pool_ref) 8283 { 8284 rtx constant = get_pool_constant (pool_ref); 8285 machine_mode mode = get_pool_mode (pool_ref); 8286 s390_add_constant (pool, constant, mode); 8287 } 8288 } 8289 8290 /* If hot/cold partitioning is enabled we have to make sure that 8291 the literal pool is emitted in the same section where the 8292 initialization of the literal pool base pointer takes place. 8293 emit_pool_after is only used in the non-overflow case on non 8294 Z cpus where we can emit the literal pool at the end of the 8295 function body within the text section. */ 8296 if (NOTE_P (insn) 8297 && NOTE_KIND (insn) == NOTE_INSN_SWITCH_TEXT_SECTIONS 8298 && !pool->emit_pool_after) 8299 pool->emit_pool_after = PREV_INSN (insn); 8300 } 8301 8302 gcc_assert (pool->pool_insn || pool->size == 0); 8303 8304 if (pool->size >= 4096) 8305 { 8306 /* We're going to chunkify the pool, so remove the main 8307 pool placeholder insn. */ 8308 remove_insn (pool->pool_insn); 8309 8310 s390_free_pool (pool); 8311 pool = NULL; 8312 } 8313 8314 /* If the functions ends with the section where the literal pool 8315 should be emitted set the marker to its end. */ 8316 if (pool && !pool->emit_pool_after) 8317 pool->emit_pool_after = get_last_insn (); 8318 8319 return pool; 8320} 8321 8322/* POOL holds the main literal pool as collected by s390_mainpool_start. 8323 Modify the current function to output the pool constants as well as 8324 the pool register setup instruction. */ 8325 8326static void 8327s390_mainpool_finish (struct constant_pool *pool) 8328{ 8329 rtx base_reg = cfun->machine->base_reg; 8330 8331 /* If the pool is empty, we're done. */ 8332 if (pool->size == 0) 8333 { 8334 /* We don't actually need a base register after all. */ 8335 cfun->machine->base_reg = NULL_RTX; 8336 8337 if (pool->pool_insn) 8338 remove_insn (pool->pool_insn); 8339 s390_free_pool (pool); 8340 return; 8341 } 8342 8343 /* We need correct insn addresses. */ 8344 shorten_branches (get_insns ()); 8345 8346 /* On zSeries, we use a LARL to load the pool register. The pool is 8347 located in the .rodata section, so we emit it after the function. */ 8348 if (TARGET_CPU_ZARCH) 8349 { 8350 rtx set = gen_main_base_64 (base_reg, pool->label); 8351 rtx_insn *insn = emit_insn_after (set, pool->pool_insn); 8352 INSN_ADDRESSES_NEW (insn, -1); 8353 remove_insn (pool->pool_insn); 8354 8355 insn = get_last_insn (); 8356 pool->pool_insn = emit_insn_after (gen_pool (const0_rtx), insn); 8357 INSN_ADDRESSES_NEW (pool->pool_insn, -1); 8358 8359 s390_dump_pool (pool, 0); 8360 } 8361 8362 /* On S/390, if the total size of the function's code plus literal pool 8363 does not exceed 4096 bytes, we use BASR to set up a function base 8364 pointer, and emit the literal pool at the end of the function. */ 8365 else if (INSN_ADDRESSES (INSN_UID (pool->emit_pool_after)) 8366 + pool->size + 8 /* alignment slop */ < 4096) 8367 { 8368 rtx set = gen_main_base_31_small (base_reg, pool->label); 8369 rtx_insn *insn = emit_insn_after (set, pool->pool_insn); 8370 INSN_ADDRESSES_NEW (insn, -1); 8371 remove_insn (pool->pool_insn); 8372 8373 insn = emit_label_after (pool->label, insn); 8374 INSN_ADDRESSES_NEW (insn, -1); 8375 8376 /* emit_pool_after will be set by s390_mainpool_start to the 8377 last insn of the section where the literal pool should be 8378 emitted. */ 8379 insn = pool->emit_pool_after; 8380 8381 pool->pool_insn = emit_insn_after (gen_pool (const0_rtx), insn); 8382 INSN_ADDRESSES_NEW (pool->pool_insn, -1); 8383 8384 s390_dump_pool (pool, 1); 8385 } 8386 8387 /* Otherwise, we emit an inline literal pool and use BASR to branch 8388 over it, setting up the pool register at the same time. */ 8389 else 8390 { 8391 rtx_code_label *pool_end = gen_label_rtx (); 8392 8393 rtx pat = gen_main_base_31_large (base_reg, pool->label, pool_end); 8394 rtx_insn *insn = emit_jump_insn_after (pat, pool->pool_insn); 8395 JUMP_LABEL (insn) = pool_end; 8396 INSN_ADDRESSES_NEW (insn, -1); 8397 remove_insn (pool->pool_insn); 8398 8399 insn = emit_label_after (pool->label, insn); 8400 INSN_ADDRESSES_NEW (insn, -1); 8401 8402 pool->pool_insn = emit_insn_after (gen_pool (const0_rtx), insn); 8403 INSN_ADDRESSES_NEW (pool->pool_insn, -1); 8404 8405 insn = emit_label_after (pool_end, pool->pool_insn); 8406 INSN_ADDRESSES_NEW (insn, -1); 8407 8408 s390_dump_pool (pool, 1); 8409 } 8410 8411 8412 /* Replace all literal pool references. */ 8413 8414 for (rtx_insn *insn = get_insns (); insn; insn = NEXT_INSN (insn)) 8415 { 8416 if (INSN_P (insn)) 8417 replace_ltrel_base (&PATTERN (insn)); 8418 8419 if (NONJUMP_INSN_P (insn) || CALL_P (insn)) 8420 { 8421 rtx addr, pool_ref = NULL_RTX; 8422 find_constant_pool_ref (PATTERN (insn), &pool_ref); 8423 if (pool_ref) 8424 { 8425 if (s390_execute_label (insn)) 8426 addr = s390_find_execute (pool, insn); 8427 else 8428 addr = s390_find_constant (pool, get_pool_constant (pool_ref), 8429 get_pool_mode (pool_ref)); 8430 8431 replace_constant_pool_ref (&PATTERN (insn), pool_ref, addr); 8432 INSN_CODE (insn) = -1; 8433 } 8434 } 8435 } 8436 8437 8438 /* Free the pool. */ 8439 s390_free_pool (pool); 8440} 8441 8442/* POOL holds the main literal pool as collected by s390_mainpool_start. 8443 We have decided we cannot use this pool, so revert all changes 8444 to the current function that were done by s390_mainpool_start. */ 8445static void 8446s390_mainpool_cancel (struct constant_pool *pool) 8447{ 8448 /* We didn't actually change the instruction stream, so simply 8449 free the pool memory. */ 8450 s390_free_pool (pool); 8451} 8452 8453 8454/* Chunkify the literal pool. */ 8455 8456#define S390_POOL_CHUNK_MIN 0xc00 8457#define S390_POOL_CHUNK_MAX 0xe00 8458 8459static struct constant_pool * 8460s390_chunkify_start (void) 8461{ 8462 struct constant_pool *curr_pool = NULL, *pool_list = NULL; 8463 int extra_size = 0; 8464 bitmap far_labels; 8465 rtx pending_ltrel = NULL_RTX; 8466 rtx_insn *insn; 8467 8468 rtx (*gen_reload_base) (rtx, rtx) = 8469 TARGET_CPU_ZARCH? gen_reload_base_64 : gen_reload_base_31; 8470 8471 8472 /* We need correct insn addresses. */ 8473 8474 shorten_branches (get_insns ()); 8475 8476 /* Scan all insns and move literals to pool chunks. */ 8477 8478 for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) 8479 { 8480 bool section_switch_p = false; 8481 8482 /* Check for pending LTREL_BASE. */ 8483 if (INSN_P (insn)) 8484 { 8485 rtx ltrel_base = find_ltrel_base (PATTERN (insn)); 8486 if (ltrel_base) 8487 { 8488 gcc_assert (ltrel_base == pending_ltrel); 8489 pending_ltrel = NULL_RTX; 8490 } 8491 } 8492 8493 if (!TARGET_CPU_ZARCH && s390_execute_label (insn)) 8494 { 8495 if (!curr_pool) 8496 curr_pool = s390_start_pool (&pool_list, insn); 8497 8498 s390_add_execute (curr_pool, insn); 8499 s390_add_pool_insn (curr_pool, insn); 8500 } 8501 else if (NONJUMP_INSN_P (insn) || CALL_P (insn)) 8502 { 8503 rtx pool_ref = NULL_RTX; 8504 find_constant_pool_ref (PATTERN (insn), &pool_ref); 8505 if (pool_ref) 8506 { 8507 rtx constant = get_pool_constant (pool_ref); 8508 machine_mode mode = get_pool_mode (pool_ref); 8509 8510 if (!curr_pool) 8511 curr_pool = s390_start_pool (&pool_list, insn); 8512 8513 s390_add_constant (curr_pool, constant, mode); 8514 s390_add_pool_insn (curr_pool, insn); 8515 8516 /* Don't split the pool chunk between a LTREL_OFFSET load 8517 and the corresponding LTREL_BASE. */ 8518 if (GET_CODE (constant) == CONST 8519 && GET_CODE (XEXP (constant, 0)) == UNSPEC 8520 && XINT (XEXP (constant, 0), 1) == UNSPEC_LTREL_OFFSET) 8521 { 8522 gcc_assert (!pending_ltrel); 8523 pending_ltrel = pool_ref; 8524 } 8525 } 8526 } 8527 8528 if (JUMP_P (insn) || JUMP_TABLE_DATA_P (insn) || LABEL_P (insn)) 8529 { 8530 if (curr_pool) 8531 s390_add_pool_insn (curr_pool, insn); 8532 /* An LTREL_BASE must follow within the same basic block. */ 8533 gcc_assert (!pending_ltrel); 8534 } 8535 8536 if (NOTE_P (insn)) 8537 switch (NOTE_KIND (insn)) 8538 { 8539 case NOTE_INSN_SWITCH_TEXT_SECTIONS: 8540 section_switch_p = true; 8541 break; 8542 case NOTE_INSN_VAR_LOCATION: 8543 case NOTE_INSN_CALL_ARG_LOCATION: 8544 continue; 8545 default: 8546 break; 8547 } 8548 8549 if (!curr_pool 8550 || INSN_ADDRESSES_SIZE () <= (size_t) INSN_UID (insn) 8551 || INSN_ADDRESSES (INSN_UID (insn)) == -1) 8552 continue; 8553 8554 if (TARGET_CPU_ZARCH) 8555 { 8556 if (curr_pool->size < S390_POOL_CHUNK_MAX) 8557 continue; 8558 8559 s390_end_pool (curr_pool, NULL); 8560 curr_pool = NULL; 8561 } 8562 else 8563 { 8564 int chunk_size = INSN_ADDRESSES (INSN_UID (insn)) 8565 - INSN_ADDRESSES (INSN_UID (curr_pool->first_insn)) 8566 + extra_size; 8567 8568 /* We will later have to insert base register reload insns. 8569 Those will have an effect on code size, which we need to 8570 consider here. This calculation makes rather pessimistic 8571 worst-case assumptions. */ 8572 if (LABEL_P (insn)) 8573 extra_size += 6; 8574 8575 if (chunk_size < S390_POOL_CHUNK_MIN 8576 && curr_pool->size < S390_POOL_CHUNK_MIN 8577 && !section_switch_p) 8578 continue; 8579 8580 /* Pool chunks can only be inserted after BARRIERs ... */ 8581 if (BARRIER_P (insn)) 8582 { 8583 s390_end_pool (curr_pool, insn); 8584 curr_pool = NULL; 8585 extra_size = 0; 8586 } 8587 8588 /* ... so if we don't find one in time, create one. */ 8589 else if (chunk_size > S390_POOL_CHUNK_MAX 8590 || curr_pool->size > S390_POOL_CHUNK_MAX 8591 || section_switch_p) 8592 { 8593 rtx_insn *label, *jump, *barrier, *next, *prev; 8594 8595 if (!section_switch_p) 8596 { 8597 /* We can insert the barrier only after a 'real' insn. */ 8598 if (! NONJUMP_INSN_P (insn) && ! CALL_P (insn)) 8599 continue; 8600 if (get_attr_length (insn) == 0) 8601 continue; 8602 /* Don't separate LTREL_BASE from the corresponding 8603 LTREL_OFFSET load. */ 8604 if (pending_ltrel) 8605 continue; 8606 next = insn; 8607 do 8608 { 8609 insn = next; 8610 next = NEXT_INSN (insn); 8611 } 8612 while (next 8613 && NOTE_P (next) 8614 && (NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION 8615 || NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)); 8616 } 8617 else 8618 { 8619 gcc_assert (!pending_ltrel); 8620 8621 /* The old pool has to end before the section switch 8622 note in order to make it part of the current 8623 section. */ 8624 insn = PREV_INSN (insn); 8625 } 8626 8627 label = gen_label_rtx (); 8628 prev = insn; 8629 if (prev && NOTE_P (prev)) 8630 prev = prev_nonnote_insn (prev); 8631 if (prev) 8632 jump = emit_jump_insn_after_setloc (gen_jump (label), insn, 8633 INSN_LOCATION (prev)); 8634 else 8635 jump = emit_jump_insn_after_noloc (gen_jump (label), insn); 8636 barrier = emit_barrier_after (jump); 8637 insn = emit_label_after (label, barrier); 8638 JUMP_LABEL (jump) = label; 8639 LABEL_NUSES (label) = 1; 8640 8641 INSN_ADDRESSES_NEW (jump, -1); 8642 INSN_ADDRESSES_NEW (barrier, -1); 8643 INSN_ADDRESSES_NEW (insn, -1); 8644 8645 s390_end_pool (curr_pool, barrier); 8646 curr_pool = NULL; 8647 extra_size = 0; 8648 } 8649 } 8650 } 8651 8652 if (curr_pool) 8653 s390_end_pool (curr_pool, NULL); 8654 gcc_assert (!pending_ltrel); 8655 8656 /* Find all labels that are branched into 8657 from an insn belonging to a different chunk. */ 8658 8659 far_labels = BITMAP_ALLOC (NULL); 8660 8661 for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) 8662 { 8663 rtx_jump_table_data *table; 8664 8665 /* Labels marked with LABEL_PRESERVE_P can be target 8666 of non-local jumps, so we have to mark them. 8667 The same holds for named labels. 8668 8669 Don't do that, however, if it is the label before 8670 a jump table. */ 8671 8672 if (LABEL_P (insn) 8673 && (LABEL_PRESERVE_P (insn) || LABEL_NAME (insn))) 8674 { 8675 rtx_insn *vec_insn = NEXT_INSN (insn); 8676 if (! vec_insn || ! JUMP_TABLE_DATA_P (vec_insn)) 8677 bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (insn)); 8678 } 8679 /* Check potential targets in a table jump (casesi_jump). */ 8680 else if (tablejump_p (insn, NULL, &table)) 8681 { 8682 rtx vec_pat = PATTERN (table); 8683 int i, diff_p = GET_CODE (vec_pat) == ADDR_DIFF_VEC; 8684 8685 for (i = 0; i < XVECLEN (vec_pat, diff_p); i++) 8686 { 8687 rtx label = XEXP (XVECEXP (vec_pat, diff_p, i), 0); 8688 8689 if (s390_find_pool (pool_list, label) 8690 != s390_find_pool (pool_list, insn)) 8691 bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (label)); 8692 } 8693 } 8694 /* If we have a direct jump (conditional or unconditional), 8695 check all potential targets. */ 8696 else if (JUMP_P (insn)) 8697 { 8698 rtx pat = PATTERN (insn); 8699 8700 if (GET_CODE (pat) == PARALLEL) 8701 pat = XVECEXP (pat, 0, 0); 8702 8703 if (GET_CODE (pat) == SET) 8704 { 8705 rtx label = JUMP_LABEL (insn); 8706 if (label && !ANY_RETURN_P (label)) 8707 { 8708 if (s390_find_pool (pool_list, label) 8709 != s390_find_pool (pool_list, insn)) 8710 bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (label)); 8711 } 8712 } 8713 } 8714 } 8715 8716 /* Insert base register reload insns before every pool. */ 8717 8718 for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next) 8719 { 8720 rtx new_insn = gen_reload_base (cfun->machine->base_reg, 8721 curr_pool->label); 8722 rtx_insn *insn = curr_pool->first_insn; 8723 INSN_ADDRESSES_NEW (emit_insn_before (new_insn, insn), -1); 8724 } 8725 8726 /* Insert base register reload insns at every far label. */ 8727 8728 for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) 8729 if (LABEL_P (insn) 8730 && bitmap_bit_p (far_labels, CODE_LABEL_NUMBER (insn))) 8731 { 8732 struct constant_pool *pool = s390_find_pool (pool_list, insn); 8733 if (pool) 8734 { 8735 rtx new_insn = gen_reload_base (cfun->machine->base_reg, 8736 pool->label); 8737 INSN_ADDRESSES_NEW (emit_insn_after (new_insn, insn), -1); 8738 } 8739 } 8740 8741 8742 BITMAP_FREE (far_labels); 8743 8744 8745 /* Recompute insn addresses. */ 8746 8747 init_insn_lengths (); 8748 shorten_branches (get_insns ()); 8749 8750 return pool_list; 8751} 8752 8753/* POOL_LIST is a chunk list as prepared by s390_chunkify_start. 8754 After we have decided to use this list, finish implementing 8755 all changes to the current function as required. */ 8756 8757static void 8758s390_chunkify_finish (struct constant_pool *pool_list) 8759{ 8760 struct constant_pool *curr_pool = NULL; 8761 rtx_insn *insn; 8762 8763 8764 /* Replace all literal pool references. */ 8765 8766 for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) 8767 { 8768 if (INSN_P (insn)) 8769 replace_ltrel_base (&PATTERN (insn)); 8770 8771 curr_pool = s390_find_pool (pool_list, insn); 8772 if (!curr_pool) 8773 continue; 8774 8775 if (NONJUMP_INSN_P (insn) || CALL_P (insn)) 8776 { 8777 rtx addr, pool_ref = NULL_RTX; 8778 find_constant_pool_ref (PATTERN (insn), &pool_ref); 8779 if (pool_ref) 8780 { 8781 if (s390_execute_label (insn)) 8782 addr = s390_find_execute (curr_pool, insn); 8783 else 8784 addr = s390_find_constant (curr_pool, 8785 get_pool_constant (pool_ref), 8786 get_pool_mode (pool_ref)); 8787 8788 replace_constant_pool_ref (&PATTERN (insn), pool_ref, addr); 8789 INSN_CODE (insn) = -1; 8790 } 8791 } 8792 } 8793 8794 /* Dump out all literal pools. */ 8795 8796 for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next) 8797 s390_dump_pool (curr_pool, 0); 8798 8799 /* Free pool list. */ 8800 8801 while (pool_list) 8802 { 8803 struct constant_pool *next = pool_list->next; 8804 s390_free_pool (pool_list); 8805 pool_list = next; 8806 } 8807} 8808 8809/* POOL_LIST is a chunk list as prepared by s390_chunkify_start. 8810 We have decided we cannot use this list, so revert all changes 8811 to the current function that were done by s390_chunkify_start. */ 8812 8813static void 8814s390_chunkify_cancel (struct constant_pool *pool_list) 8815{ 8816 struct constant_pool *curr_pool = NULL; 8817 rtx_insn *insn; 8818 8819 /* Remove all pool placeholder insns. */ 8820 8821 for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next) 8822 { 8823 /* Did we insert an extra barrier? Remove it. */ 8824 rtx_insn *barrier = PREV_INSN (curr_pool->pool_insn); 8825 rtx_insn *jump = barrier? PREV_INSN (barrier) : NULL; 8826 rtx_insn *label = NEXT_INSN (curr_pool->pool_insn); 8827 8828 if (jump && JUMP_P (jump) 8829 && barrier && BARRIER_P (barrier) 8830 && label && LABEL_P (label) 8831 && GET_CODE (PATTERN (jump)) == SET 8832 && SET_DEST (PATTERN (jump)) == pc_rtx 8833 && GET_CODE (SET_SRC (PATTERN (jump))) == LABEL_REF 8834 && XEXP (SET_SRC (PATTERN (jump)), 0) == label) 8835 { 8836 remove_insn (jump); 8837 remove_insn (barrier); 8838 remove_insn (label); 8839 } 8840 8841 remove_insn (curr_pool->pool_insn); 8842 } 8843 8844 /* Remove all base register reload insns. */ 8845 8846 for (insn = get_insns (); insn; ) 8847 { 8848 rtx_insn *next_insn = NEXT_INSN (insn); 8849 8850 if (NONJUMP_INSN_P (insn) 8851 && GET_CODE (PATTERN (insn)) == SET 8852 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC 8853 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_RELOAD_BASE) 8854 remove_insn (insn); 8855 8856 insn = next_insn; 8857 } 8858 8859 /* Free pool list. */ 8860 8861 while (pool_list) 8862 { 8863 struct constant_pool *next = pool_list->next; 8864 s390_free_pool (pool_list); 8865 pool_list = next; 8866 } 8867} 8868 8869/* Output the constant pool entry EXP in mode MODE with alignment ALIGN. */ 8870 8871void 8872s390_output_pool_entry (rtx exp, machine_mode mode, unsigned int align) 8873{ 8874 REAL_VALUE_TYPE r; 8875 8876 switch (GET_MODE_CLASS (mode)) 8877 { 8878 case MODE_FLOAT: 8879 case MODE_DECIMAL_FLOAT: 8880 gcc_assert (GET_CODE (exp) == CONST_DOUBLE); 8881 8882 REAL_VALUE_FROM_CONST_DOUBLE (r, exp); 8883 assemble_real (r, mode, align); 8884 break; 8885 8886 case MODE_INT: 8887 assemble_integer (exp, GET_MODE_SIZE (mode), align, 1); 8888 mark_symbol_refs_as_used (exp); 8889 break; 8890 8891 case MODE_VECTOR_INT: 8892 case MODE_VECTOR_FLOAT: 8893 { 8894 int i; 8895 machine_mode inner_mode; 8896 gcc_assert (GET_CODE (exp) == CONST_VECTOR); 8897 8898 inner_mode = GET_MODE_INNER (GET_MODE (exp)); 8899 for (i = 0; i < XVECLEN (exp, 0); i++) 8900 s390_output_pool_entry (XVECEXP (exp, 0, i), 8901 inner_mode, 8902 i == 0 8903 ? align 8904 : GET_MODE_BITSIZE (inner_mode)); 8905 } 8906 break; 8907 8908 default: 8909 gcc_unreachable (); 8910 } 8911} 8912 8913 8914/* Return an RTL expression representing the value of the return address 8915 for the frame COUNT steps up from the current frame. FRAME is the 8916 frame pointer of that frame. */ 8917 8918rtx 8919s390_return_addr_rtx (int count, rtx frame ATTRIBUTE_UNUSED) 8920{ 8921 int offset; 8922 rtx addr; 8923 8924 /* Without backchain, we fail for all but the current frame. */ 8925 8926 if (!TARGET_BACKCHAIN && count > 0) 8927 return NULL_RTX; 8928 8929 /* For the current frame, we need to make sure the initial 8930 value of RETURN_REGNUM is actually saved. */ 8931 8932 if (count == 0) 8933 { 8934 /* On non-z architectures branch splitting could overwrite r14. */ 8935 if (TARGET_CPU_ZARCH) 8936 return get_hard_reg_initial_val (Pmode, RETURN_REGNUM); 8937 else 8938 { 8939 cfun_frame_layout.save_return_addr_p = true; 8940 return gen_rtx_MEM (Pmode, return_address_pointer_rtx); 8941 } 8942 } 8943 8944 if (TARGET_PACKED_STACK) 8945 offset = -2 * UNITS_PER_LONG; 8946 else 8947 offset = RETURN_REGNUM * UNITS_PER_LONG; 8948 8949 addr = plus_constant (Pmode, frame, offset); 8950 addr = memory_address (Pmode, addr); 8951 return gen_rtx_MEM (Pmode, addr); 8952} 8953 8954/* Return an RTL expression representing the back chain stored in 8955 the current stack frame. */ 8956 8957rtx 8958s390_back_chain_rtx (void) 8959{ 8960 rtx chain; 8961 8962 gcc_assert (TARGET_BACKCHAIN); 8963 8964 if (TARGET_PACKED_STACK) 8965 chain = plus_constant (Pmode, stack_pointer_rtx, 8966 STACK_POINTER_OFFSET - UNITS_PER_LONG); 8967 else 8968 chain = stack_pointer_rtx; 8969 8970 chain = gen_rtx_MEM (Pmode, chain); 8971 return chain; 8972} 8973 8974/* Find first call clobbered register unused in a function. 8975 This could be used as base register in a leaf function 8976 or for holding the return address before epilogue. */ 8977 8978static int 8979find_unused_clobbered_reg (void) 8980{ 8981 int i; 8982 for (i = 0; i < 6; i++) 8983 if (!df_regs_ever_live_p (i)) 8984 return i; 8985 return 0; 8986} 8987 8988 8989/* Helper function for s390_regs_ever_clobbered. Sets the fields in DATA for all 8990 clobbered hard regs in SETREG. */ 8991 8992static void 8993s390_reg_clobbered_rtx (rtx setreg, const_rtx set_insn ATTRIBUTE_UNUSED, void *data) 8994{ 8995 char *regs_ever_clobbered = (char *)data; 8996 unsigned int i, regno; 8997 machine_mode mode = GET_MODE (setreg); 8998 8999 if (GET_CODE (setreg) == SUBREG) 9000 { 9001 rtx inner = SUBREG_REG (setreg); 9002 if (!GENERAL_REG_P (inner) && !FP_REG_P (inner)) 9003 return; 9004 regno = subreg_regno (setreg); 9005 } 9006 else if (GENERAL_REG_P (setreg) || FP_REG_P (setreg)) 9007 regno = REGNO (setreg); 9008 else 9009 return; 9010 9011 for (i = regno; 9012 i < regno + HARD_REGNO_NREGS (regno, mode); 9013 i++) 9014 regs_ever_clobbered[i] = 1; 9015} 9016 9017/* Walks through all basic blocks of the current function looking 9018 for clobbered hard regs using s390_reg_clobbered_rtx. The fields 9019 of the passed integer array REGS_EVER_CLOBBERED are set to one for 9020 each of those regs. */ 9021 9022static void 9023s390_regs_ever_clobbered (char regs_ever_clobbered[]) 9024{ 9025 basic_block cur_bb; 9026 rtx_insn *cur_insn; 9027 unsigned int i; 9028 9029 memset (regs_ever_clobbered, 0, 32); 9030 9031 /* For non-leaf functions we have to consider all call clobbered regs to be 9032 clobbered. */ 9033 if (!crtl->is_leaf) 9034 { 9035 for (i = 0; i < 32; i++) 9036 regs_ever_clobbered[i] = call_really_used_regs[i]; 9037 } 9038 9039 /* Make the "magic" eh_return registers live if necessary. For regs_ever_live 9040 this work is done by liveness analysis (mark_regs_live_at_end). 9041 Special care is needed for functions containing landing pads. Landing pads 9042 may use the eh registers, but the code which sets these registers is not 9043 contained in that function. Hence s390_regs_ever_clobbered is not able to 9044 deal with this automatically. */ 9045 if (crtl->calls_eh_return || cfun->machine->has_landing_pad_p) 9046 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM ; i++) 9047 if (crtl->calls_eh_return 9048 || (cfun->machine->has_landing_pad_p 9049 && df_regs_ever_live_p (EH_RETURN_DATA_REGNO (i)))) 9050 regs_ever_clobbered[EH_RETURN_DATA_REGNO (i)] = 1; 9051 9052 /* For nonlocal gotos all call-saved registers have to be saved. 9053 This flag is also set for the unwinding code in libgcc. 9054 See expand_builtin_unwind_init. For regs_ever_live this is done by 9055 reload. */ 9056 if (crtl->saves_all_registers) 9057 for (i = 0; i < 32; i++) 9058 if (!call_really_used_regs[i]) 9059 regs_ever_clobbered[i] = 1; 9060 9061 FOR_EACH_BB_FN (cur_bb, cfun) 9062 { 9063 FOR_BB_INSNS (cur_bb, cur_insn) 9064 { 9065 rtx pat; 9066 9067 if (!INSN_P (cur_insn)) 9068 continue; 9069 9070 pat = PATTERN (cur_insn); 9071 9072 /* Ignore GPR restore insns. */ 9073 if (epilogue_completed && RTX_FRAME_RELATED_P (cur_insn)) 9074 { 9075 if (GET_CODE (pat) == SET 9076 && GENERAL_REG_P (SET_DEST (pat))) 9077 { 9078 /* lgdr */ 9079 if (GET_MODE (SET_SRC (pat)) == DImode 9080 && FP_REG_P (SET_SRC (pat))) 9081 continue; 9082 9083 /* l / lg */ 9084 if (GET_CODE (SET_SRC (pat)) == MEM) 9085 continue; 9086 } 9087 9088 /* lm / lmg */ 9089 if (GET_CODE (pat) == PARALLEL 9090 && load_multiple_operation (pat, VOIDmode)) 9091 continue; 9092 } 9093 9094 note_stores (pat, 9095 s390_reg_clobbered_rtx, 9096 regs_ever_clobbered); 9097 } 9098 } 9099} 9100 9101/* Determine the frame area which actually has to be accessed 9102 in the function epilogue. The values are stored at the 9103 given pointers AREA_BOTTOM (address of the lowest used stack 9104 address) and AREA_TOP (address of the first item which does 9105 not belong to the stack frame). */ 9106 9107static void 9108s390_frame_area (int *area_bottom, int *area_top) 9109{ 9110 int b, t; 9111 9112 b = INT_MAX; 9113 t = INT_MIN; 9114 9115 if (cfun_frame_layout.first_restore_gpr != -1) 9116 { 9117 b = (cfun_frame_layout.gprs_offset 9118 + cfun_frame_layout.first_restore_gpr * UNITS_PER_LONG); 9119 t = b + (cfun_frame_layout.last_restore_gpr 9120 - cfun_frame_layout.first_restore_gpr + 1) * UNITS_PER_LONG; 9121 } 9122 9123 if (TARGET_64BIT && cfun_save_high_fprs_p) 9124 { 9125 b = MIN (b, cfun_frame_layout.f8_offset); 9126 t = MAX (t, (cfun_frame_layout.f8_offset 9127 + cfun_frame_layout.high_fprs * 8)); 9128 } 9129 9130 if (!TARGET_64BIT) 9131 { 9132 if (cfun_fpr_save_p (FPR4_REGNUM)) 9133 { 9134 b = MIN (b, cfun_frame_layout.f4_offset); 9135 t = MAX (t, cfun_frame_layout.f4_offset + 8); 9136 } 9137 if (cfun_fpr_save_p (FPR6_REGNUM)) 9138 { 9139 b = MIN (b, cfun_frame_layout.f4_offset + 8); 9140 t = MAX (t, cfun_frame_layout.f4_offset + 16); 9141 } 9142 } 9143 *area_bottom = b; 9144 *area_top = t; 9145} 9146/* Update gpr_save_slots in the frame layout trying to make use of 9147 FPRs as GPR save slots. 9148 This is a helper routine of s390_register_info. */ 9149 9150static void 9151s390_register_info_gprtofpr () 9152{ 9153 int save_reg_slot = FPR0_REGNUM; 9154 int i, j; 9155 9156 if (!TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf) 9157 return; 9158 9159 for (i = 15; i >= 6; i--) 9160 { 9161 if (cfun_gpr_save_slot (i) == SAVE_SLOT_NONE) 9162 continue; 9163 9164 /* Advance to the next FP register which can be used as a 9165 GPR save slot. */ 9166 while ((!call_really_used_regs[save_reg_slot] 9167 || df_regs_ever_live_p (save_reg_slot) 9168 || cfun_fpr_save_p (save_reg_slot)) 9169 && FP_REGNO_P (save_reg_slot)) 9170 save_reg_slot++; 9171 if (!FP_REGNO_P (save_reg_slot)) 9172 { 9173 /* We only want to use ldgr/lgdr if we can get rid of 9174 stm/lm entirely. So undo the gpr slot allocation in 9175 case we ran out of FPR save slots. */ 9176 for (j = 6; j <= 15; j++) 9177 if (FP_REGNO_P (cfun_gpr_save_slot (j))) 9178 cfun_gpr_save_slot (j) = SAVE_SLOT_STACK; 9179 break; 9180 } 9181 cfun_gpr_save_slot (i) = save_reg_slot++; 9182 } 9183} 9184 9185/* Set the bits in fpr_bitmap for FPRs which need to be saved due to 9186 stdarg. 9187 This is a helper routine for s390_register_info. */ 9188 9189static void 9190s390_register_info_stdarg_fpr () 9191{ 9192 int i; 9193 int min_fpr; 9194 int max_fpr; 9195 9196 /* Save the FP argument regs for stdarg. f0, f2 for 31 bit and 9197 f0-f4 for 64 bit. */ 9198 if (!cfun->stdarg 9199 || !TARGET_HARD_FLOAT 9200 || !cfun->va_list_fpr_size 9201 || crtl->args.info.fprs >= FP_ARG_NUM_REG) 9202 return; 9203 9204 min_fpr = crtl->args.info.fprs; 9205 max_fpr = min_fpr + cfun->va_list_fpr_size - 1; 9206 if (max_fpr >= FP_ARG_NUM_REG) 9207 max_fpr = FP_ARG_NUM_REG - 1; 9208 9209 /* FPR argument regs start at f0. */ 9210 min_fpr += FPR0_REGNUM; 9211 max_fpr += FPR0_REGNUM; 9212 9213 for (i = min_fpr; i <= max_fpr; i++) 9214 cfun_set_fpr_save (i); 9215} 9216 9217/* Reserve the GPR save slots for GPRs which need to be saved due to 9218 stdarg. 9219 This is a helper routine for s390_register_info. */ 9220 9221static void 9222s390_register_info_stdarg_gpr () 9223{ 9224 int i; 9225 int min_gpr; 9226 int max_gpr; 9227 9228 if (!cfun->stdarg 9229 || !cfun->va_list_gpr_size 9230 || crtl->args.info.gprs >= GP_ARG_NUM_REG) 9231 return; 9232 9233 min_gpr = crtl->args.info.gprs; 9234 max_gpr = min_gpr + cfun->va_list_gpr_size - 1; 9235 if (max_gpr >= GP_ARG_NUM_REG) 9236 max_gpr = GP_ARG_NUM_REG - 1; 9237 9238 /* GPR argument regs start at r2. */ 9239 min_gpr += GPR2_REGNUM; 9240 max_gpr += GPR2_REGNUM; 9241 9242 /* If r6 was supposed to be saved into an FPR and now needs to go to 9243 the stack for vararg we have to adjust the restore range to make 9244 sure that the restore is done from stack as well. */ 9245 if (FP_REGNO_P (cfun_gpr_save_slot (GPR6_REGNUM)) 9246 && min_gpr <= GPR6_REGNUM 9247 && max_gpr >= GPR6_REGNUM) 9248 { 9249 if (cfun_frame_layout.first_restore_gpr == -1 9250 || cfun_frame_layout.first_restore_gpr > GPR6_REGNUM) 9251 cfun_frame_layout.first_restore_gpr = GPR6_REGNUM; 9252 if (cfun_frame_layout.last_restore_gpr == -1 9253 || cfun_frame_layout.last_restore_gpr < GPR6_REGNUM) 9254 cfun_frame_layout.last_restore_gpr = GPR6_REGNUM; 9255 } 9256 9257 if (cfun_frame_layout.first_save_gpr == -1 9258 || cfun_frame_layout.first_save_gpr > min_gpr) 9259 cfun_frame_layout.first_save_gpr = min_gpr; 9260 9261 if (cfun_frame_layout.last_save_gpr == -1 9262 || cfun_frame_layout.last_save_gpr < max_gpr) 9263 cfun_frame_layout.last_save_gpr = max_gpr; 9264 9265 for (i = min_gpr; i <= max_gpr; i++) 9266 cfun_gpr_save_slot (i) = SAVE_SLOT_STACK; 9267} 9268 9269/* Calculate the save and restore ranges for stm(g) and lm(g) in the 9270 prologue and epilogue. */ 9271 9272static void 9273s390_register_info_set_ranges () 9274{ 9275 int i, j; 9276 9277 /* Find the first and the last save slot supposed to use the stack 9278 to set the restore range. 9279 Vararg regs might be marked as save to stack but only the 9280 call-saved regs really need restoring (i.e. r6). This code 9281 assumes that the vararg regs have not yet been recorded in 9282 cfun_gpr_save_slot. */ 9283 for (i = 0; i < 16 && cfun_gpr_save_slot (i) != SAVE_SLOT_STACK; i++); 9284 for (j = 15; j > i && cfun_gpr_save_slot (j) != SAVE_SLOT_STACK; j--); 9285 cfun_frame_layout.first_restore_gpr = (i == 16) ? -1 : i; 9286 cfun_frame_layout.last_restore_gpr = (i == 16) ? -1 : j; 9287 cfun_frame_layout.first_save_gpr = (i == 16) ? -1 : i; 9288 cfun_frame_layout.last_save_gpr = (i == 16) ? -1 : j; 9289} 9290 9291/* The GPR and FPR save slots in cfun->machine->frame_layout are set 9292 for registers which need to be saved in function prologue. 9293 This function can be used until the insns emitted for save/restore 9294 of the regs are visible in the RTL stream. */ 9295 9296static void 9297s390_register_info () 9298{ 9299 int i; 9300 char clobbered_regs[32]; 9301 9302 gcc_assert (!epilogue_completed); 9303 9304 if (reload_completed) 9305 /* After reload we rely on our own routine to determine which 9306 registers need saving. */ 9307 s390_regs_ever_clobbered (clobbered_regs); 9308 else 9309 /* During reload we use regs_ever_live as a base since reload 9310 does changes in there which we otherwise would not be aware 9311 of. */ 9312 for (i = 0; i < 32; i++) 9313 clobbered_regs[i] = df_regs_ever_live_p (i); 9314 9315 for (i = 0; i < 32; i++) 9316 clobbered_regs[i] = clobbered_regs[i] && !global_regs[i]; 9317 9318 /* Mark the call-saved FPRs which need to be saved. 9319 This needs to be done before checking the special GPRs since the 9320 stack pointer usage depends on whether high FPRs have to be saved 9321 or not. */ 9322 cfun_frame_layout.fpr_bitmap = 0; 9323 cfun_frame_layout.high_fprs = 0; 9324 for (i = FPR0_REGNUM; i <= FPR15_REGNUM; i++) 9325 if (clobbered_regs[i] && !call_really_used_regs[i]) 9326 { 9327 cfun_set_fpr_save (i); 9328 if (i >= FPR8_REGNUM) 9329 cfun_frame_layout.high_fprs++; 9330 } 9331 9332 if (flag_pic) 9333 clobbered_regs[PIC_OFFSET_TABLE_REGNUM] 9334 |= !!df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM); 9335 9336 clobbered_regs[BASE_REGNUM] 9337 |= (cfun->machine->base_reg 9338 && REGNO (cfun->machine->base_reg) == BASE_REGNUM); 9339 9340 clobbered_regs[HARD_FRAME_POINTER_REGNUM] 9341 |= !!frame_pointer_needed; 9342 9343 /* On pre z900 machines this might take until machine dependent 9344 reorg to decide. 9345 save_return_addr_p will only be set on non-zarch machines so 9346 there is no risk that r14 goes into an FPR instead of a stack 9347 slot. */ 9348 clobbered_regs[RETURN_REGNUM] 9349 |= (!crtl->is_leaf 9350 || TARGET_TPF_PROFILING 9351 || cfun->machine->split_branches_pending_p 9352 || cfun_frame_layout.save_return_addr_p 9353 || crtl->calls_eh_return); 9354 9355 clobbered_regs[STACK_POINTER_REGNUM] 9356 |= (!crtl->is_leaf 9357 || TARGET_TPF_PROFILING 9358 || cfun_save_high_fprs_p 9359 || get_frame_size () > 0 9360 || (reload_completed && cfun_frame_layout.frame_size > 0) 9361 || cfun->calls_alloca); 9362 9363 memset (cfun_frame_layout.gpr_save_slots, SAVE_SLOT_NONE, 16); 9364 9365 for (i = 6; i < 16; i++) 9366 if (clobbered_regs[i]) 9367 cfun_gpr_save_slot (i) = SAVE_SLOT_STACK; 9368 9369 s390_register_info_stdarg_fpr (); 9370 s390_register_info_gprtofpr (); 9371 s390_register_info_set_ranges (); 9372 /* stdarg functions might need to save GPRs 2 to 6. This might 9373 override the GPR->FPR save decision made by 9374 s390_register_info_gprtofpr for r6 since vararg regs must go to 9375 the stack. */ 9376 s390_register_info_stdarg_gpr (); 9377} 9378 9379/* This function is called by s390_optimize_prologue in order to get 9380 rid of unnecessary GPR save/restore instructions. The register info 9381 for the GPRs is re-computed and the ranges are re-calculated. */ 9382 9383static void 9384s390_optimize_register_info () 9385{ 9386 char clobbered_regs[32]; 9387 int i; 9388 9389 gcc_assert (epilogue_completed); 9390 gcc_assert (!cfun->machine->split_branches_pending_p); 9391 9392 s390_regs_ever_clobbered (clobbered_regs); 9393 9394 for (i = 0; i < 32; i++) 9395 clobbered_regs[i] = clobbered_regs[i] && !global_regs[i]; 9396 9397 /* There is still special treatment needed for cases invisible to 9398 s390_regs_ever_clobbered. */ 9399 clobbered_regs[RETURN_REGNUM] 9400 |= (TARGET_TPF_PROFILING 9401 /* When expanding builtin_return_addr in ESA mode we do not 9402 know whether r14 will later be needed as scratch reg when 9403 doing branch splitting. So the builtin always accesses the 9404 r14 save slot and we need to stick to the save/restore 9405 decision for r14 even if it turns out that it didn't get 9406 clobbered. */ 9407 || cfun_frame_layout.save_return_addr_p 9408 || crtl->calls_eh_return); 9409 9410 memset (cfun_frame_layout.gpr_save_slots, SAVE_SLOT_NONE, 6); 9411 9412 for (i = 6; i < 16; i++) 9413 if (!clobbered_regs[i]) 9414 cfun_gpr_save_slot (i) = SAVE_SLOT_NONE; 9415 9416 s390_register_info_set_ranges (); 9417 s390_register_info_stdarg_gpr (); 9418} 9419 9420/* Fill cfun->machine with info about frame of current function. */ 9421 9422static void 9423s390_frame_info (void) 9424{ 9425 HOST_WIDE_INT lowest_offset; 9426 9427 cfun_frame_layout.first_save_gpr_slot = cfun_frame_layout.first_save_gpr; 9428 cfun_frame_layout.last_save_gpr_slot = cfun_frame_layout.last_save_gpr; 9429 9430 /* The va_arg builtin uses a constant distance of 16 * 9431 UNITS_PER_LONG (r0-r15) to reach the FPRs from the reg_save_area 9432 pointer. So even if we are going to save the stack pointer in an 9433 FPR we need the stack space in order to keep the offsets 9434 correct. */ 9435 if (cfun->stdarg && cfun_save_arg_fprs_p) 9436 { 9437 cfun_frame_layout.last_save_gpr_slot = STACK_POINTER_REGNUM; 9438 9439 if (cfun_frame_layout.first_save_gpr_slot == -1) 9440 cfun_frame_layout.first_save_gpr_slot = STACK_POINTER_REGNUM; 9441 } 9442 9443 cfun_frame_layout.frame_size = get_frame_size (); 9444 if (!TARGET_64BIT && cfun_frame_layout.frame_size > 0x7fff0000) 9445 fatal_error (input_location, 9446 "total size of local variables exceeds architecture limit"); 9447 9448 if (!TARGET_PACKED_STACK) 9449 { 9450 /* Fixed stack layout. */ 9451 cfun_frame_layout.backchain_offset = 0; 9452 cfun_frame_layout.f0_offset = 16 * UNITS_PER_LONG; 9453 cfun_frame_layout.f4_offset = cfun_frame_layout.f0_offset + 2 * 8; 9454 cfun_frame_layout.f8_offset = -cfun_frame_layout.high_fprs * 8; 9455 cfun_frame_layout.gprs_offset = (cfun_frame_layout.first_save_gpr_slot 9456 * UNITS_PER_LONG); 9457 } 9458 else if (TARGET_BACKCHAIN) 9459 { 9460 /* Kernel stack layout - packed stack, backchain, no float */ 9461 gcc_assert (TARGET_SOFT_FLOAT); 9462 cfun_frame_layout.backchain_offset = (STACK_POINTER_OFFSET 9463 - UNITS_PER_LONG); 9464 9465 /* The distance between the backchain and the return address 9466 save slot must not change. So we always need a slot for the 9467 stack pointer which resides in between. */ 9468 cfun_frame_layout.last_save_gpr_slot = STACK_POINTER_REGNUM; 9469 9470 cfun_frame_layout.gprs_offset 9471 = cfun_frame_layout.backchain_offset - cfun_gprs_save_area_size; 9472 9473 /* FPRs will not be saved. Nevertheless pick sane values to 9474 keep area calculations valid. */ 9475 cfun_frame_layout.f0_offset = 9476 cfun_frame_layout.f4_offset = 9477 cfun_frame_layout.f8_offset = cfun_frame_layout.gprs_offset; 9478 } 9479 else 9480 { 9481 int num_fprs; 9482 9483 /* Packed stack layout without backchain. */ 9484 9485 /* With stdarg FPRs need their dedicated slots. */ 9486 num_fprs = (TARGET_64BIT && cfun->stdarg ? 2 9487 : (cfun_fpr_save_p (FPR4_REGNUM) + 9488 cfun_fpr_save_p (FPR6_REGNUM))); 9489 cfun_frame_layout.f4_offset = STACK_POINTER_OFFSET - 8 * num_fprs; 9490 9491 num_fprs = (cfun->stdarg ? 2 9492 : (cfun_fpr_save_p (FPR0_REGNUM) 9493 + cfun_fpr_save_p (FPR2_REGNUM))); 9494 cfun_frame_layout.f0_offset = cfun_frame_layout.f4_offset - 8 * num_fprs; 9495 9496 cfun_frame_layout.gprs_offset 9497 = cfun_frame_layout.f0_offset - cfun_gprs_save_area_size; 9498 9499 cfun_frame_layout.f8_offset = (cfun_frame_layout.gprs_offset 9500 - cfun_frame_layout.high_fprs * 8); 9501 } 9502 9503 if (cfun_save_high_fprs_p) 9504 cfun_frame_layout.frame_size += cfun_frame_layout.high_fprs * 8; 9505 9506 if (!crtl->is_leaf) 9507 cfun_frame_layout.frame_size += crtl->outgoing_args_size; 9508 9509 /* In the following cases we have to allocate a STACK_POINTER_OFFSET 9510 sized area at the bottom of the stack. This is required also for 9511 leaf functions. When GCC generates a local stack reference it 9512 will always add STACK_POINTER_OFFSET to all these references. */ 9513 if (crtl->is_leaf 9514 && !TARGET_TPF_PROFILING 9515 && cfun_frame_layout.frame_size == 0 9516 && !cfun->calls_alloca) 9517 return; 9518 9519 /* Calculate the number of bytes we have used in our own register 9520 save area. With the packed stack layout we can re-use the 9521 remaining bytes for normal stack elements. */ 9522 9523 if (TARGET_PACKED_STACK) 9524 lowest_offset = MIN (MIN (cfun_frame_layout.f0_offset, 9525 cfun_frame_layout.f4_offset), 9526 cfun_frame_layout.gprs_offset); 9527 else 9528 lowest_offset = 0; 9529 9530 if (TARGET_BACKCHAIN) 9531 lowest_offset = MIN (lowest_offset, cfun_frame_layout.backchain_offset); 9532 9533 cfun_frame_layout.frame_size += STACK_POINTER_OFFSET - lowest_offset; 9534 9535 /* If under 31 bit an odd number of gprs has to be saved we have to 9536 adjust the frame size to sustain 8 byte alignment of stack 9537 frames. */ 9538 cfun_frame_layout.frame_size = ((cfun_frame_layout.frame_size + 9539 STACK_BOUNDARY / BITS_PER_UNIT - 1) 9540 & ~(STACK_BOUNDARY / BITS_PER_UNIT - 1)); 9541} 9542 9543/* Generate frame layout. Fills in register and frame data for the current 9544 function in cfun->machine. This routine can be called multiple times; 9545 it will re-do the complete frame layout every time. */ 9546 9547static void 9548s390_init_frame_layout (void) 9549{ 9550 HOST_WIDE_INT frame_size; 9551 int base_used; 9552 9553 gcc_assert (!reload_completed); 9554 9555 /* On S/390 machines, we may need to perform branch splitting, which 9556 will require both base and return address register. We have no 9557 choice but to assume we're going to need them until right at the 9558 end of the machine dependent reorg phase. */ 9559 if (!TARGET_CPU_ZARCH) 9560 cfun->machine->split_branches_pending_p = true; 9561 9562 do 9563 { 9564 frame_size = cfun_frame_layout.frame_size; 9565 9566 /* Try to predict whether we'll need the base register. */ 9567 base_used = cfun->machine->split_branches_pending_p 9568 || crtl->uses_const_pool 9569 || (!DISP_IN_RANGE (frame_size) 9570 && !CONST_OK_FOR_K (frame_size)); 9571 9572 /* Decide which register to use as literal pool base. In small 9573 leaf functions, try to use an unused call-clobbered register 9574 as base register to avoid save/restore overhead. */ 9575 if (!base_used) 9576 cfun->machine->base_reg = NULL_RTX; 9577 else if (crtl->is_leaf && !df_regs_ever_live_p (5)) 9578 cfun->machine->base_reg = gen_rtx_REG (Pmode, 5); 9579 else 9580 cfun->machine->base_reg = gen_rtx_REG (Pmode, BASE_REGNUM); 9581 9582 s390_register_info (); 9583 s390_frame_info (); 9584 } 9585 while (frame_size != cfun_frame_layout.frame_size); 9586} 9587 9588/* Remove the FPR clobbers from a tbegin insn if it can be proven that 9589 the TX is nonescaping. A transaction is considered escaping if 9590 there is at least one path from tbegin returning CC0 to the 9591 function exit block without an tend. 9592 9593 The check so far has some limitations: 9594 - only single tbegin/tend BBs are supported 9595 - the first cond jump after tbegin must separate the CC0 path from ~CC0 9596 - when CC is copied to a GPR and the CC0 check is done with the GPR 9597 this is not supported 9598*/ 9599 9600static void 9601s390_optimize_nonescaping_tx (void) 9602{ 9603 const unsigned int CC0 = 1 << 3; 9604 basic_block tbegin_bb = NULL; 9605 basic_block tend_bb = NULL; 9606 basic_block bb; 9607 rtx_insn *insn; 9608 bool result = true; 9609 int bb_index; 9610 rtx_insn *tbegin_insn = NULL; 9611 9612 if (!cfun->machine->tbegin_p) 9613 return; 9614 9615 for (bb_index = 0; bb_index < n_basic_blocks_for_fn (cfun); bb_index++) 9616 { 9617 bb = BASIC_BLOCK_FOR_FN (cfun, bb_index); 9618 9619 if (!bb) 9620 continue; 9621 9622 FOR_BB_INSNS (bb, insn) 9623 { 9624 rtx ite, cc, pat, target; 9625 unsigned HOST_WIDE_INT mask; 9626 9627 if (!INSN_P (insn) || INSN_CODE (insn) <= 0) 9628 continue; 9629 9630 pat = PATTERN (insn); 9631 9632 if (GET_CODE (pat) == PARALLEL) 9633 pat = XVECEXP (pat, 0, 0); 9634 9635 if (GET_CODE (pat) != SET 9636 || GET_CODE (SET_SRC (pat)) != UNSPEC_VOLATILE) 9637 continue; 9638 9639 if (XINT (SET_SRC (pat), 1) == UNSPECV_TBEGIN) 9640 { 9641 rtx_insn *tmp; 9642 9643 tbegin_insn = insn; 9644 9645 /* Just return if the tbegin doesn't have clobbers. */ 9646 if (GET_CODE (PATTERN (insn)) != PARALLEL) 9647 return; 9648 9649 if (tbegin_bb != NULL) 9650 return; 9651 9652 /* Find the next conditional jump. */ 9653 for (tmp = NEXT_INSN (insn); 9654 tmp != NULL_RTX; 9655 tmp = NEXT_INSN (tmp)) 9656 { 9657 if (reg_set_p (gen_rtx_REG (CCmode, CC_REGNUM), tmp)) 9658 return; 9659 if (!JUMP_P (tmp)) 9660 continue; 9661 9662 ite = SET_SRC (PATTERN (tmp)); 9663 if (GET_CODE (ite) != IF_THEN_ELSE) 9664 continue; 9665 9666 cc = XEXP (XEXP (ite, 0), 0); 9667 if (!REG_P (cc) || !CC_REGNO_P (REGNO (cc)) 9668 || GET_MODE (cc) != CCRAWmode 9669 || GET_CODE (XEXP (XEXP (ite, 0), 1)) != CONST_INT) 9670 return; 9671 9672 if (bb->succs->length () != 2) 9673 return; 9674 9675 mask = INTVAL (XEXP (XEXP (ite, 0), 1)); 9676 if (GET_CODE (XEXP (ite, 0)) == NE) 9677 mask ^= 0xf; 9678 9679 if (mask == CC0) 9680 target = XEXP (ite, 1); 9681 else if (mask == (CC0 ^ 0xf)) 9682 target = XEXP (ite, 2); 9683 else 9684 return; 9685 9686 { 9687 edge_iterator ei; 9688 edge e1, e2; 9689 9690 ei = ei_start (bb->succs); 9691 e1 = ei_safe_edge (ei); 9692 ei_next (&ei); 9693 e2 = ei_safe_edge (ei); 9694 9695 if (e2->flags & EDGE_FALLTHRU) 9696 { 9697 e2 = e1; 9698 e1 = ei_safe_edge (ei); 9699 } 9700 9701 if (!(e1->flags & EDGE_FALLTHRU)) 9702 return; 9703 9704 tbegin_bb = (target == pc_rtx) ? e1->dest : e2->dest; 9705 } 9706 if (tmp == BB_END (bb)) 9707 break; 9708 } 9709 } 9710 9711 if (XINT (SET_SRC (pat), 1) == UNSPECV_TEND) 9712 { 9713 if (tend_bb != NULL) 9714 return; 9715 tend_bb = bb; 9716 } 9717 } 9718 } 9719 9720 /* Either we successfully remove the FPR clobbers here or we are not 9721 able to do anything for this TX. Both cases don't qualify for 9722 another look. */ 9723 cfun->machine->tbegin_p = false; 9724 9725 if (tbegin_bb == NULL || tend_bb == NULL) 9726 return; 9727 9728 calculate_dominance_info (CDI_POST_DOMINATORS); 9729 result = dominated_by_p (CDI_POST_DOMINATORS, tbegin_bb, tend_bb); 9730 free_dominance_info (CDI_POST_DOMINATORS); 9731 9732 if (!result) 9733 return; 9734 9735 PATTERN (tbegin_insn) = gen_rtx_PARALLEL (VOIDmode, 9736 gen_rtvec (2, 9737 XVECEXP (PATTERN (tbegin_insn), 0, 0), 9738 XVECEXP (PATTERN (tbegin_insn), 0, 1))); 9739 INSN_CODE (tbegin_insn) = -1; 9740 df_insn_rescan (tbegin_insn); 9741 9742 return; 9743} 9744 9745/* Return true if it is legal to put a value with MODE into REGNO. */ 9746 9747bool 9748s390_hard_regno_mode_ok (unsigned int regno, machine_mode mode) 9749{ 9750 if (!TARGET_VX && VECTOR_NOFP_REGNO_P (regno)) 9751 return false; 9752 9753 switch (REGNO_REG_CLASS (regno)) 9754 { 9755 case VEC_REGS: 9756 return ((GET_MODE_CLASS (mode) == MODE_INT 9757 && s390_class_max_nregs (VEC_REGS, mode) == 1) 9758 || mode == DFmode 9759 || s390_vector_mode_supported_p (mode)); 9760 break; 9761 case FP_REGS: 9762 if (TARGET_VX 9763 && ((GET_MODE_CLASS (mode) == MODE_INT 9764 && s390_class_max_nregs (FP_REGS, mode) == 1) 9765 || mode == DFmode 9766 || s390_vector_mode_supported_p (mode))) 9767 return true; 9768 9769 if (REGNO_PAIR_OK (regno, mode)) 9770 { 9771 if (mode == SImode || mode == DImode) 9772 return true; 9773 9774 if (FLOAT_MODE_P (mode) && GET_MODE_CLASS (mode) != MODE_VECTOR_FLOAT) 9775 return true; 9776 } 9777 break; 9778 case ADDR_REGS: 9779 if (FRAME_REGNO_P (regno) && mode == Pmode) 9780 return true; 9781 9782 /* fallthrough */ 9783 case GENERAL_REGS: 9784 if (REGNO_PAIR_OK (regno, mode)) 9785 { 9786 if (TARGET_ZARCH 9787 || (mode != TFmode && mode != TCmode && mode != TDmode)) 9788 return true; 9789 } 9790 break; 9791 case CC_REGS: 9792 if (GET_MODE_CLASS (mode) == MODE_CC) 9793 return true; 9794 break; 9795 case ACCESS_REGS: 9796 if (REGNO_PAIR_OK (regno, mode)) 9797 { 9798 if (mode == SImode || mode == Pmode) 9799 return true; 9800 } 9801 break; 9802 default: 9803 return false; 9804 } 9805 9806 return false; 9807} 9808 9809/* Return nonzero if register OLD_REG can be renamed to register NEW_REG. */ 9810 9811bool 9812s390_hard_regno_rename_ok (unsigned int old_reg, unsigned int new_reg) 9813{ 9814 /* Once we've decided upon a register to use as base register, it must 9815 no longer be used for any other purpose. */ 9816 if (cfun->machine->base_reg) 9817 if (REGNO (cfun->machine->base_reg) == old_reg 9818 || REGNO (cfun->machine->base_reg) == new_reg) 9819 return false; 9820 9821 /* Prevent regrename from using call-saved regs which haven't 9822 actually been saved. This is necessary since regrename assumes 9823 the backend save/restore decisions are based on 9824 df_regs_ever_live. Since we have our own routine we have to tell 9825 regrename manually about it. */ 9826 if (GENERAL_REGNO_P (new_reg) 9827 && !call_really_used_regs[new_reg] 9828 && cfun_gpr_save_slot (new_reg) == SAVE_SLOT_NONE) 9829 return false; 9830 9831 return true; 9832} 9833 9834/* Return nonzero if register REGNO can be used as a scratch register 9835 in peephole2. */ 9836 9837static bool 9838s390_hard_regno_scratch_ok (unsigned int regno) 9839{ 9840 /* See s390_hard_regno_rename_ok. */ 9841 if (GENERAL_REGNO_P (regno) 9842 && !call_really_used_regs[regno] 9843 && cfun_gpr_save_slot (regno) == SAVE_SLOT_NONE) 9844 return false; 9845 9846 return true; 9847} 9848 9849/* Maximum number of registers to represent a value of mode MODE 9850 in a register of class RCLASS. */ 9851 9852int 9853s390_class_max_nregs (enum reg_class rclass, machine_mode mode) 9854{ 9855 int reg_size; 9856 bool reg_pair_required_p = false; 9857 9858 switch (rclass) 9859 { 9860 case FP_REGS: 9861 case VEC_REGS: 9862 reg_size = TARGET_VX ? 16 : 8; 9863 9864 /* TF and TD modes would fit into a VR but we put them into a 9865 register pair since we do not have 128bit FP instructions on 9866 full VRs. */ 9867 if (TARGET_VX 9868 && SCALAR_FLOAT_MODE_P (mode) 9869 && GET_MODE_SIZE (mode) >= 16) 9870 reg_pair_required_p = true; 9871 9872 /* Even if complex types would fit into a single FPR/VR we force 9873 them into a register pair to deal with the parts more easily. 9874 (FIXME: What about complex ints?) */ 9875 if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT) 9876 reg_pair_required_p = true; 9877 break; 9878 case ACCESS_REGS: 9879 reg_size = 4; 9880 break; 9881 default: 9882 reg_size = UNITS_PER_WORD; 9883 break; 9884 } 9885 9886 if (reg_pair_required_p) 9887 return 2 * ((GET_MODE_SIZE (mode) / 2 + reg_size - 1) / reg_size); 9888 9889 return (GET_MODE_SIZE (mode) + reg_size - 1) / reg_size; 9890} 9891 9892/* Return TRUE if changing mode from FROM to TO should not be allowed 9893 for register class CLASS. */ 9894 9895int 9896s390_cannot_change_mode_class (machine_mode from_mode, 9897 machine_mode to_mode, 9898 enum reg_class rclass) 9899{ 9900 machine_mode small_mode; 9901 machine_mode big_mode; 9902 9903 if (GET_MODE_SIZE (from_mode) == GET_MODE_SIZE (to_mode)) 9904 return 0; 9905 9906 if (GET_MODE_SIZE (from_mode) < GET_MODE_SIZE (to_mode)) 9907 { 9908 small_mode = from_mode; 9909 big_mode = to_mode; 9910 } 9911 else 9912 { 9913 small_mode = to_mode; 9914 big_mode = from_mode; 9915 } 9916 9917 /* Values residing in VRs are little-endian style. All modes are 9918 placed left-aligned in an VR. This means that we cannot allow 9919 switching between modes with differing sizes. Also if the vector 9920 facility is available we still place TFmode values in VR register 9921 pairs, since the only instructions we have operating on TFmodes 9922 only deal with register pairs. Therefore we have to allow DFmode 9923 subregs of TFmodes to enable the TFmode splitters. */ 9924 if (reg_classes_intersect_p (VEC_REGS, rclass) 9925 && (GET_MODE_SIZE (small_mode) < 8 9926 || s390_class_max_nregs (VEC_REGS, big_mode) == 1)) 9927 return 1; 9928 9929 /* Likewise for access registers, since they have only half the 9930 word size on 64-bit. */ 9931 if (reg_classes_intersect_p (ACCESS_REGS, rclass)) 9932 return 1; 9933 9934 return 0; 9935} 9936 9937/* Return true if we use LRA instead of reload pass. */ 9938static bool 9939s390_lra_p (void) 9940{ 9941 return s390_lra_flag; 9942} 9943 9944/* Return true if register FROM can be eliminated via register TO. */ 9945 9946static bool 9947s390_can_eliminate (const int from, const int to) 9948{ 9949 /* On zSeries machines, we have not marked the base register as fixed. 9950 Instead, we have an elimination rule BASE_REGNUM -> BASE_REGNUM. 9951 If a function requires the base register, we say here that this 9952 elimination cannot be performed. This will cause reload to free 9953 up the base register (as if it were fixed). On the other hand, 9954 if the current function does *not* require the base register, we 9955 say here the elimination succeeds, which in turn allows reload 9956 to allocate the base register for any other purpose. */ 9957 if (from == BASE_REGNUM && to == BASE_REGNUM) 9958 { 9959 if (TARGET_CPU_ZARCH) 9960 { 9961 s390_init_frame_layout (); 9962 return cfun->machine->base_reg == NULL_RTX; 9963 } 9964 9965 return false; 9966 } 9967 9968 /* Everything else must point into the stack frame. */ 9969 gcc_assert (to == STACK_POINTER_REGNUM 9970 || to == HARD_FRAME_POINTER_REGNUM); 9971 9972 gcc_assert (from == FRAME_POINTER_REGNUM 9973 || from == ARG_POINTER_REGNUM 9974 || from == RETURN_ADDRESS_POINTER_REGNUM); 9975 9976 /* Make sure we actually saved the return address. */ 9977 if (from == RETURN_ADDRESS_POINTER_REGNUM) 9978 if (!crtl->calls_eh_return 9979 && !cfun->stdarg 9980 && !cfun_frame_layout.save_return_addr_p) 9981 return false; 9982 9983 return true; 9984} 9985 9986/* Return offset between register FROM and TO initially after prolog. */ 9987 9988HOST_WIDE_INT 9989s390_initial_elimination_offset (int from, int to) 9990{ 9991 HOST_WIDE_INT offset; 9992 9993 /* ??? Why are we called for non-eliminable pairs? */ 9994 if (!s390_can_eliminate (from, to)) 9995 return 0; 9996 9997 switch (from) 9998 { 9999 case FRAME_POINTER_REGNUM: 10000 offset = (get_frame_size() 10001 + STACK_POINTER_OFFSET 10002 + crtl->outgoing_args_size); 10003 break; 10004 10005 case ARG_POINTER_REGNUM: 10006 s390_init_frame_layout (); 10007 offset = cfun_frame_layout.frame_size + STACK_POINTER_OFFSET; 10008 break; 10009 10010 case RETURN_ADDRESS_POINTER_REGNUM: 10011 s390_init_frame_layout (); 10012 10013 if (cfun_frame_layout.first_save_gpr_slot == -1) 10014 { 10015 /* If it turns out that for stdarg nothing went into the reg 10016 save area we also do not need the return address 10017 pointer. */ 10018 if (cfun->stdarg && !cfun_save_arg_fprs_p) 10019 return 0; 10020 10021 gcc_unreachable (); 10022 } 10023 10024 /* In order to make the following work it is not necessary for 10025 r14 to have a save slot. It is sufficient if one other GPR 10026 got one. Since the GPRs are always stored without gaps we 10027 are able to calculate where the r14 save slot would 10028 reside. */ 10029 offset = (cfun_frame_layout.frame_size + cfun_frame_layout.gprs_offset + 10030 (RETURN_REGNUM - cfun_frame_layout.first_save_gpr_slot) * 10031 UNITS_PER_LONG); 10032 break; 10033 10034 case BASE_REGNUM: 10035 offset = 0; 10036 break; 10037 10038 default: 10039 gcc_unreachable (); 10040 } 10041 10042 return offset; 10043} 10044 10045/* Emit insn to save fpr REGNUM at offset OFFSET relative 10046 to register BASE. Return generated insn. */ 10047 10048static rtx 10049save_fpr (rtx base, int offset, int regnum) 10050{ 10051 rtx addr; 10052 addr = gen_rtx_MEM (DFmode, plus_constant (Pmode, base, offset)); 10053 10054 if (regnum >= 16 && regnum <= (16 + FP_ARG_NUM_REG)) 10055 set_mem_alias_set (addr, get_varargs_alias_set ()); 10056 else 10057 set_mem_alias_set (addr, get_frame_alias_set ()); 10058 10059 return emit_move_insn (addr, gen_rtx_REG (DFmode, regnum)); 10060} 10061 10062/* Emit insn to restore fpr REGNUM from offset OFFSET relative 10063 to register BASE. Return generated insn. */ 10064 10065static rtx 10066restore_fpr (rtx base, int offset, int regnum) 10067{ 10068 rtx addr; 10069 addr = gen_rtx_MEM (DFmode, plus_constant (Pmode, base, offset)); 10070 set_mem_alias_set (addr, get_frame_alias_set ()); 10071 10072 return emit_move_insn (gen_rtx_REG (DFmode, regnum), addr); 10073} 10074 10075/* Return true if REGNO is a global register, but not one 10076 of the special ones that need to be saved/restored in anyway. */ 10077 10078static inline bool 10079global_not_special_regno_p (int regno) 10080{ 10081 return (global_regs[regno] 10082 /* These registers are special and need to be 10083 restored in any case. */ 10084 && !(regno == STACK_POINTER_REGNUM 10085 || regno == RETURN_REGNUM 10086 || regno == BASE_REGNUM 10087 || (flag_pic && regno == (int)PIC_OFFSET_TABLE_REGNUM))); 10088} 10089 10090/* Generate insn to save registers FIRST to LAST into 10091 the register save area located at offset OFFSET 10092 relative to register BASE. */ 10093 10094static rtx 10095save_gprs (rtx base, int offset, int first, int last) 10096{ 10097 rtx addr, insn, note; 10098 int i; 10099 10100 addr = plus_constant (Pmode, base, offset); 10101 addr = gen_rtx_MEM (Pmode, addr); 10102 10103 set_mem_alias_set (addr, get_frame_alias_set ()); 10104 10105 /* Special-case single register. */ 10106 if (first == last) 10107 { 10108 if (TARGET_64BIT) 10109 insn = gen_movdi (addr, gen_rtx_REG (Pmode, first)); 10110 else 10111 insn = gen_movsi (addr, gen_rtx_REG (Pmode, first)); 10112 10113 if (!global_not_special_regno_p (first)) 10114 RTX_FRAME_RELATED_P (insn) = 1; 10115 return insn; 10116 } 10117 10118 10119 insn = gen_store_multiple (addr, 10120 gen_rtx_REG (Pmode, first), 10121 GEN_INT (last - first + 1)); 10122 10123 if (first <= 6 && cfun->stdarg) 10124 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++) 10125 { 10126 rtx mem = XEXP (XVECEXP (PATTERN (insn), 0, i), 0); 10127 10128 if (first + i <= 6) 10129 set_mem_alias_set (mem, get_varargs_alias_set ()); 10130 } 10131 10132 /* We need to set the FRAME_RELATED flag on all SETs 10133 inside the store-multiple pattern. 10134 10135 However, we must not emit DWARF records for registers 2..5 10136 if they are stored for use by variable arguments ... 10137 10138 ??? Unfortunately, it is not enough to simply not the 10139 FRAME_RELATED flags for those SETs, because the first SET 10140 of the PARALLEL is always treated as if it had the flag 10141 set, even if it does not. Therefore we emit a new pattern 10142 without those registers as REG_FRAME_RELATED_EXPR note. */ 10143 10144 if (first >= 6 && !global_not_special_regno_p (first)) 10145 { 10146 rtx pat = PATTERN (insn); 10147 10148 for (i = 0; i < XVECLEN (pat, 0); i++) 10149 if (GET_CODE (XVECEXP (pat, 0, i)) == SET 10150 && !global_not_special_regno_p (REGNO (SET_SRC (XVECEXP (pat, 10151 0, i))))) 10152 RTX_FRAME_RELATED_P (XVECEXP (pat, 0, i)) = 1; 10153 10154 RTX_FRAME_RELATED_P (insn) = 1; 10155 } 10156 else if (last >= 6) 10157 { 10158 int start; 10159 10160 for (start = first >= 6 ? first : 6; start <= last; start++) 10161 if (!global_not_special_regno_p (start)) 10162 break; 10163 10164 if (start > last) 10165 return insn; 10166 10167 addr = plus_constant (Pmode, base, 10168 offset + (start - first) * UNITS_PER_LONG); 10169 10170 if (start == last) 10171 { 10172 if (TARGET_64BIT) 10173 note = gen_movdi (gen_rtx_MEM (Pmode, addr), 10174 gen_rtx_REG (Pmode, start)); 10175 else 10176 note = gen_movsi (gen_rtx_MEM (Pmode, addr), 10177 gen_rtx_REG (Pmode, start)); 10178 note = PATTERN (note); 10179 10180 add_reg_note (insn, REG_FRAME_RELATED_EXPR, note); 10181 RTX_FRAME_RELATED_P (insn) = 1; 10182 10183 return insn; 10184 } 10185 10186 note = gen_store_multiple (gen_rtx_MEM (Pmode, addr), 10187 gen_rtx_REG (Pmode, start), 10188 GEN_INT (last - start + 1)); 10189 note = PATTERN (note); 10190 10191 add_reg_note (insn, REG_FRAME_RELATED_EXPR, note); 10192 10193 for (i = 0; i < XVECLEN (note, 0); i++) 10194 if (GET_CODE (XVECEXP (note, 0, i)) == SET 10195 && !global_not_special_regno_p (REGNO (SET_SRC (XVECEXP (note, 10196 0, i))))) 10197 RTX_FRAME_RELATED_P (XVECEXP (note, 0, i)) = 1; 10198 10199 RTX_FRAME_RELATED_P (insn) = 1; 10200 } 10201 10202 return insn; 10203} 10204 10205/* Generate insn to restore registers FIRST to LAST from 10206 the register save area located at offset OFFSET 10207 relative to register BASE. */ 10208 10209static rtx 10210restore_gprs (rtx base, int offset, int first, int last) 10211{ 10212 rtx addr, insn; 10213 10214 addr = plus_constant (Pmode, base, offset); 10215 addr = gen_rtx_MEM (Pmode, addr); 10216 set_mem_alias_set (addr, get_frame_alias_set ()); 10217 10218 /* Special-case single register. */ 10219 if (first == last) 10220 { 10221 if (TARGET_64BIT) 10222 insn = gen_movdi (gen_rtx_REG (Pmode, first), addr); 10223 else 10224 insn = gen_movsi (gen_rtx_REG (Pmode, first), addr); 10225 10226 RTX_FRAME_RELATED_P (insn) = 1; 10227 return insn; 10228 } 10229 10230 insn = gen_load_multiple (gen_rtx_REG (Pmode, first), 10231 addr, 10232 GEN_INT (last - first + 1)); 10233 RTX_FRAME_RELATED_P (insn) = 1; 10234 return insn; 10235} 10236 10237/* Return insn sequence to load the GOT register. */ 10238 10239static GTY(()) rtx got_symbol; 10240rtx_insn * 10241s390_load_got (void) 10242{ 10243 rtx_insn *insns; 10244 10245 /* We cannot use pic_offset_table_rtx here since we use this 10246 function also for non-pic if __tls_get_offset is called and in 10247 that case PIC_OFFSET_TABLE_REGNUM as well as pic_offset_table_rtx 10248 aren't usable. */ 10249 rtx got_rtx = gen_rtx_REG (Pmode, 12); 10250 10251 if (!got_symbol) 10252 { 10253 got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_"); 10254 SYMBOL_REF_FLAGS (got_symbol) = SYMBOL_FLAG_LOCAL; 10255 } 10256 10257 start_sequence (); 10258 10259 if (TARGET_CPU_ZARCH) 10260 { 10261 emit_move_insn (got_rtx, got_symbol); 10262 } 10263 else 10264 { 10265 rtx offset; 10266 10267 offset = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, got_symbol), 10268 UNSPEC_LTREL_OFFSET); 10269 offset = gen_rtx_CONST (Pmode, offset); 10270 offset = force_const_mem (Pmode, offset); 10271 10272 emit_move_insn (got_rtx, offset); 10273 10274 offset = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (offset, 0)), 10275 UNSPEC_LTREL_BASE); 10276 offset = gen_rtx_PLUS (Pmode, got_rtx, offset); 10277 10278 emit_move_insn (got_rtx, offset); 10279 } 10280 10281 insns = get_insns (); 10282 end_sequence (); 10283 return insns; 10284} 10285 10286/* This ties together stack memory (MEM with an alias set of frame_alias_set) 10287 and the change to the stack pointer. */ 10288 10289static void 10290s390_emit_stack_tie (void) 10291{ 10292 rtx mem = gen_frame_mem (BLKmode, 10293 gen_rtx_REG (Pmode, STACK_POINTER_REGNUM)); 10294 10295 emit_insn (gen_stack_tie (mem)); 10296} 10297 10298/* Copy GPRS into FPR save slots. */ 10299 10300static void 10301s390_save_gprs_to_fprs (void) 10302{ 10303 int i; 10304 10305 if (!TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf) 10306 return; 10307 10308 for (i = 6; i < 16; i++) 10309 { 10310 if (FP_REGNO_P (cfun_gpr_save_slot (i))) 10311 { 10312 rtx_insn *insn = 10313 emit_move_insn (gen_rtx_REG (DImode, cfun_gpr_save_slot (i)), 10314 gen_rtx_REG (DImode, i)); 10315 RTX_FRAME_RELATED_P (insn) = 1; 10316 /* This prevents dwarf2cfi from interpreting the set. Doing 10317 so it might emit def_cfa_register infos setting an FPR as 10318 new CFA. */ 10319 add_reg_note (insn, REG_CFA_REGISTER, PATTERN (insn)); 10320 } 10321 } 10322} 10323 10324/* Restore GPRs from FPR save slots. */ 10325 10326static void 10327s390_restore_gprs_from_fprs (void) 10328{ 10329 int i; 10330 10331 if (!TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf) 10332 return; 10333 10334 for (i = 6; i < 16; i++) 10335 { 10336 rtx_insn *insn; 10337 10338 if (!FP_REGNO_P (cfun_gpr_save_slot (i))) 10339 continue; 10340 10341 rtx fpr = gen_rtx_REG (DImode, cfun_gpr_save_slot (i)); 10342 10343 if (i == STACK_POINTER_REGNUM) 10344 insn = emit_insn (gen_stack_restore_from_fpr (fpr)); 10345 else 10346 insn = emit_move_insn (gen_rtx_REG (DImode, i), fpr); 10347 10348 df_set_regs_ever_live (i, true); 10349 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, i)); 10350 if (i == STACK_POINTER_REGNUM) 10351 add_reg_note (insn, REG_CFA_DEF_CFA, 10352 plus_constant (Pmode, stack_pointer_rtx, 10353 STACK_POINTER_OFFSET)); 10354 RTX_FRAME_RELATED_P (insn) = 1; 10355 } 10356} 10357 10358 10359/* A pass run immediately before shrink-wrapping and prologue and epilogue 10360 generation. */ 10361 10362namespace { 10363 10364const pass_data pass_data_s390_early_mach = 10365{ 10366 RTL_PASS, /* type */ 10367 "early_mach", /* name */ 10368 OPTGROUP_NONE, /* optinfo_flags */ 10369 TV_MACH_DEP, /* tv_id */ 10370 0, /* properties_required */ 10371 0, /* properties_provided */ 10372 0, /* properties_destroyed */ 10373 0, /* todo_flags_start */ 10374 ( TODO_df_verify | TODO_df_finish ), /* todo_flags_finish */ 10375}; 10376 10377class pass_s390_early_mach : public rtl_opt_pass 10378{ 10379public: 10380 pass_s390_early_mach (gcc::context *ctxt) 10381 : rtl_opt_pass (pass_data_s390_early_mach, ctxt) 10382 {} 10383 10384 /* opt_pass methods: */ 10385 virtual unsigned int execute (function *); 10386 10387}; // class pass_s390_early_mach 10388 10389unsigned int 10390pass_s390_early_mach::execute (function *fun) 10391{ 10392 rtx_insn *insn; 10393 10394 /* Try to get rid of the FPR clobbers. */ 10395 s390_optimize_nonescaping_tx (); 10396 10397 /* Re-compute register info. */ 10398 s390_register_info (); 10399 10400 /* If we're using a base register, ensure that it is always valid for 10401 the first non-prologue instruction. */ 10402 if (fun->machine->base_reg) 10403 emit_insn_at_entry (gen_main_pool (fun->machine->base_reg)); 10404 10405 /* Annotate all constant pool references to let the scheduler know 10406 they implicitly use the base register. */ 10407 for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) 10408 if (INSN_P (insn)) 10409 { 10410 annotate_constant_pool_refs (&PATTERN (insn)); 10411 df_insn_rescan (insn); 10412 } 10413 return 0; 10414} 10415 10416} // anon namespace 10417 10418/* Expand the prologue into a bunch of separate insns. */ 10419 10420void 10421s390_emit_prologue (void) 10422{ 10423 rtx insn, addr; 10424 rtx temp_reg; 10425 int i; 10426 int offset; 10427 int next_fpr = 0; 10428 10429 /* Choose best register to use for temp use within prologue. 10430 See below for why TPF must use the register 1. */ 10431 10432 if (!has_hard_reg_initial_val (Pmode, RETURN_REGNUM) 10433 && !crtl->is_leaf 10434 && !TARGET_TPF_PROFILING) 10435 temp_reg = gen_rtx_REG (Pmode, RETURN_REGNUM); 10436 else 10437 temp_reg = gen_rtx_REG (Pmode, 1); 10438 10439 s390_save_gprs_to_fprs (); 10440 10441 /* Save call saved gprs. */ 10442 if (cfun_frame_layout.first_save_gpr != -1) 10443 { 10444 insn = save_gprs (stack_pointer_rtx, 10445 cfun_frame_layout.gprs_offset + 10446 UNITS_PER_LONG * (cfun_frame_layout.first_save_gpr 10447 - cfun_frame_layout.first_save_gpr_slot), 10448 cfun_frame_layout.first_save_gpr, 10449 cfun_frame_layout.last_save_gpr); 10450 emit_insn (insn); 10451 } 10452 10453 /* Dummy insn to mark literal pool slot. */ 10454 10455 if (cfun->machine->base_reg) 10456 emit_insn (gen_main_pool (cfun->machine->base_reg)); 10457 10458 offset = cfun_frame_layout.f0_offset; 10459 10460 /* Save f0 and f2. */ 10461 for (i = FPR0_REGNUM; i <= FPR0_REGNUM + 1; i++) 10462 { 10463 if (cfun_fpr_save_p (i)) 10464 { 10465 save_fpr (stack_pointer_rtx, offset, i); 10466 offset += 8; 10467 } 10468 else if (!TARGET_PACKED_STACK || cfun->stdarg) 10469 offset += 8; 10470 } 10471 10472 /* Save f4 and f6. */ 10473 offset = cfun_frame_layout.f4_offset; 10474 for (i = FPR4_REGNUM; i <= FPR4_REGNUM + 1; i++) 10475 { 10476 if (cfun_fpr_save_p (i)) 10477 { 10478 insn = save_fpr (stack_pointer_rtx, offset, i); 10479 offset += 8; 10480 10481 /* If f4 and f6 are call clobbered they are saved due to 10482 stdargs and therefore are not frame related. */ 10483 if (!call_really_used_regs[i]) 10484 RTX_FRAME_RELATED_P (insn) = 1; 10485 } 10486 else if (!TARGET_PACKED_STACK || call_really_used_regs[i]) 10487 offset += 8; 10488 } 10489 10490 if (TARGET_PACKED_STACK 10491 && cfun_save_high_fprs_p 10492 && cfun_frame_layout.f8_offset + cfun_frame_layout.high_fprs * 8 > 0) 10493 { 10494 offset = (cfun_frame_layout.f8_offset 10495 + (cfun_frame_layout.high_fprs - 1) * 8); 10496 10497 for (i = FPR15_REGNUM; i >= FPR8_REGNUM && offset >= 0; i--) 10498 if (cfun_fpr_save_p (i)) 10499 { 10500 insn = save_fpr (stack_pointer_rtx, offset, i); 10501 10502 RTX_FRAME_RELATED_P (insn) = 1; 10503 offset -= 8; 10504 } 10505 if (offset >= cfun_frame_layout.f8_offset) 10506 next_fpr = i; 10507 } 10508 10509 if (!TARGET_PACKED_STACK) 10510 next_fpr = cfun_save_high_fprs_p ? FPR15_REGNUM : 0; 10511 10512 if (flag_stack_usage_info) 10513 current_function_static_stack_size = cfun_frame_layout.frame_size; 10514 10515 /* Decrement stack pointer. */ 10516 10517 if (cfun_frame_layout.frame_size > 0) 10518 { 10519 rtx frame_off = GEN_INT (-cfun_frame_layout.frame_size); 10520 rtx real_frame_off; 10521 10522 if (s390_stack_size) 10523 { 10524 HOST_WIDE_INT stack_guard; 10525 10526 if (s390_stack_guard) 10527 stack_guard = s390_stack_guard; 10528 else 10529 { 10530 /* If no value for stack guard is provided the smallest power of 2 10531 larger than the current frame size is chosen. */ 10532 stack_guard = 1; 10533 while (stack_guard < cfun_frame_layout.frame_size) 10534 stack_guard <<= 1; 10535 } 10536 10537 if (cfun_frame_layout.frame_size >= s390_stack_size) 10538 { 10539 warning (0, "frame size of function %qs is %wd" 10540 " bytes exceeding user provided stack limit of " 10541 "%d bytes. " 10542 "An unconditional trap is added.", 10543 current_function_name(), cfun_frame_layout.frame_size, 10544 s390_stack_size); 10545 emit_insn (gen_trap ()); 10546 } 10547 else 10548 { 10549 /* stack_guard has to be smaller than s390_stack_size. 10550 Otherwise we would emit an AND with zero which would 10551 not match the test under mask pattern. */ 10552 if (stack_guard >= s390_stack_size) 10553 { 10554 warning (0, "frame size of function %qs is %wd" 10555 " bytes which is more than half the stack size. " 10556 "The dynamic check would not be reliable. " 10557 "No check emitted for this function.", 10558 current_function_name(), 10559 cfun_frame_layout.frame_size); 10560 } 10561 else 10562 { 10563 HOST_WIDE_INT stack_check_mask = ((s390_stack_size - 1) 10564 & ~(stack_guard - 1)); 10565 10566 rtx t = gen_rtx_AND (Pmode, stack_pointer_rtx, 10567 GEN_INT (stack_check_mask)); 10568 if (TARGET_64BIT) 10569 emit_insn (gen_ctrapdi4 (gen_rtx_EQ (VOIDmode, 10570 t, const0_rtx), 10571 t, const0_rtx, const0_rtx)); 10572 else 10573 emit_insn (gen_ctrapsi4 (gen_rtx_EQ (VOIDmode, 10574 t, const0_rtx), 10575 t, const0_rtx, const0_rtx)); 10576 } 10577 } 10578 } 10579 10580 if (s390_warn_framesize > 0 10581 && cfun_frame_layout.frame_size >= s390_warn_framesize) 10582 warning (0, "frame size of %qs is %wd bytes", 10583 current_function_name (), cfun_frame_layout.frame_size); 10584 10585 if (s390_warn_dynamicstack_p && cfun->calls_alloca) 10586 warning (0, "%qs uses dynamic stack allocation", current_function_name ()); 10587 10588 /* Save incoming stack pointer into temp reg. */ 10589 if (TARGET_BACKCHAIN || next_fpr) 10590 insn = emit_insn (gen_move_insn (temp_reg, stack_pointer_rtx)); 10591 10592 /* Subtract frame size from stack pointer. */ 10593 10594 if (DISP_IN_RANGE (INTVAL (frame_off))) 10595 { 10596 insn = gen_rtx_SET (VOIDmode, stack_pointer_rtx, 10597 gen_rtx_PLUS (Pmode, stack_pointer_rtx, 10598 frame_off)); 10599 insn = emit_insn (insn); 10600 } 10601 else 10602 { 10603 if (!CONST_OK_FOR_K (INTVAL (frame_off))) 10604 frame_off = force_const_mem (Pmode, frame_off); 10605 10606 insn = emit_insn (gen_add2_insn (stack_pointer_rtx, frame_off)); 10607 annotate_constant_pool_refs (&PATTERN (insn)); 10608 } 10609 10610 RTX_FRAME_RELATED_P (insn) = 1; 10611 real_frame_off = GEN_INT (-cfun_frame_layout.frame_size); 10612 add_reg_note (insn, REG_FRAME_RELATED_EXPR, 10613 gen_rtx_SET (VOIDmode, stack_pointer_rtx, 10614 gen_rtx_PLUS (Pmode, stack_pointer_rtx, 10615 real_frame_off))); 10616 10617 /* Set backchain. */ 10618 10619 if (TARGET_BACKCHAIN) 10620 { 10621 if (cfun_frame_layout.backchain_offset) 10622 addr = gen_rtx_MEM (Pmode, 10623 plus_constant (Pmode, stack_pointer_rtx, 10624 cfun_frame_layout.backchain_offset)); 10625 else 10626 addr = gen_rtx_MEM (Pmode, stack_pointer_rtx); 10627 set_mem_alias_set (addr, get_frame_alias_set ()); 10628 insn = emit_insn (gen_move_insn (addr, temp_reg)); 10629 } 10630 10631 /* If we support non-call exceptions (e.g. for Java), 10632 we need to make sure the backchain pointer is set up 10633 before any possibly trapping memory access. */ 10634 if (TARGET_BACKCHAIN && cfun->can_throw_non_call_exceptions) 10635 { 10636 addr = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode)); 10637 emit_clobber (addr); 10638 } 10639 } 10640 10641 /* Save fprs 8 - 15 (64 bit ABI). */ 10642 10643 if (cfun_save_high_fprs_p && next_fpr) 10644 { 10645 /* If the stack might be accessed through a different register 10646 we have to make sure that the stack pointer decrement is not 10647 moved below the use of the stack slots. */ 10648 s390_emit_stack_tie (); 10649 10650 insn = emit_insn (gen_add2_insn (temp_reg, 10651 GEN_INT (cfun_frame_layout.f8_offset))); 10652 10653 offset = 0; 10654 10655 for (i = FPR8_REGNUM; i <= next_fpr; i++) 10656 if (cfun_fpr_save_p (i)) 10657 { 10658 rtx addr = plus_constant (Pmode, stack_pointer_rtx, 10659 cfun_frame_layout.frame_size 10660 + cfun_frame_layout.f8_offset 10661 + offset); 10662 10663 insn = save_fpr (temp_reg, offset, i); 10664 offset += 8; 10665 RTX_FRAME_RELATED_P (insn) = 1; 10666 add_reg_note (insn, REG_FRAME_RELATED_EXPR, 10667 gen_rtx_SET (VOIDmode, 10668 gen_rtx_MEM (DFmode, addr), 10669 gen_rtx_REG (DFmode, i))); 10670 } 10671 } 10672 10673 /* Set frame pointer, if needed. */ 10674 10675 if (frame_pointer_needed) 10676 { 10677 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx); 10678 RTX_FRAME_RELATED_P (insn) = 1; 10679 } 10680 10681 /* Set up got pointer, if needed. */ 10682 10683 if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)) 10684 { 10685 rtx_insn *insns = s390_load_got (); 10686 10687 for (rtx_insn *insn = insns; insn; insn = NEXT_INSN (insn)) 10688 annotate_constant_pool_refs (&PATTERN (insn)); 10689 10690 emit_insn (insns); 10691 } 10692 10693 if (TARGET_TPF_PROFILING) 10694 { 10695 /* Generate a BAS instruction to serve as a function 10696 entry intercept to facilitate the use of tracing 10697 algorithms located at the branch target. */ 10698 emit_insn (gen_prologue_tpf ()); 10699 10700 /* Emit a blockage here so that all code 10701 lies between the profiling mechanisms. */ 10702 emit_insn (gen_blockage ()); 10703 } 10704} 10705 10706/* Expand the epilogue into a bunch of separate insns. */ 10707 10708void 10709s390_emit_epilogue (bool sibcall) 10710{ 10711 rtx frame_pointer, return_reg, cfa_restores = NULL_RTX; 10712 int area_bottom, area_top, offset = 0; 10713 int next_offset; 10714 rtvec p; 10715 int i; 10716 10717 if (TARGET_TPF_PROFILING) 10718 { 10719 10720 /* Generate a BAS instruction to serve as a function 10721 entry intercept to facilitate the use of tracing 10722 algorithms located at the branch target. */ 10723 10724 /* Emit a blockage here so that all code 10725 lies between the profiling mechanisms. */ 10726 emit_insn (gen_blockage ()); 10727 10728 emit_insn (gen_epilogue_tpf ()); 10729 } 10730 10731 /* Check whether to use frame or stack pointer for restore. */ 10732 10733 frame_pointer = (frame_pointer_needed 10734 ? hard_frame_pointer_rtx : stack_pointer_rtx); 10735 10736 s390_frame_area (&area_bottom, &area_top); 10737 10738 /* Check whether we can access the register save area. 10739 If not, increment the frame pointer as required. */ 10740 10741 if (area_top <= area_bottom) 10742 { 10743 /* Nothing to restore. */ 10744 } 10745 else if (DISP_IN_RANGE (cfun_frame_layout.frame_size + area_bottom) 10746 && DISP_IN_RANGE (cfun_frame_layout.frame_size + area_top - 1)) 10747 { 10748 /* Area is in range. */ 10749 offset = cfun_frame_layout.frame_size; 10750 } 10751 else 10752 { 10753 rtx insn, frame_off, cfa; 10754 10755 offset = area_bottom < 0 ? -area_bottom : 0; 10756 frame_off = GEN_INT (cfun_frame_layout.frame_size - offset); 10757 10758 cfa = gen_rtx_SET (VOIDmode, frame_pointer, 10759 gen_rtx_PLUS (Pmode, frame_pointer, frame_off)); 10760 if (DISP_IN_RANGE (INTVAL (frame_off))) 10761 { 10762 insn = gen_rtx_SET (VOIDmode, frame_pointer, 10763 gen_rtx_PLUS (Pmode, frame_pointer, frame_off)); 10764 insn = emit_insn (insn); 10765 } 10766 else 10767 { 10768 if (!CONST_OK_FOR_K (INTVAL (frame_off))) 10769 frame_off = force_const_mem (Pmode, frame_off); 10770 10771 insn = emit_insn (gen_add2_insn (frame_pointer, frame_off)); 10772 annotate_constant_pool_refs (&PATTERN (insn)); 10773 } 10774 add_reg_note (insn, REG_CFA_ADJUST_CFA, cfa); 10775 RTX_FRAME_RELATED_P (insn) = 1; 10776 } 10777 10778 /* Restore call saved fprs. */ 10779 10780 if (TARGET_64BIT) 10781 { 10782 if (cfun_save_high_fprs_p) 10783 { 10784 next_offset = cfun_frame_layout.f8_offset; 10785 for (i = FPR8_REGNUM; i <= FPR15_REGNUM; i++) 10786 { 10787 if (cfun_fpr_save_p (i)) 10788 { 10789 restore_fpr (frame_pointer, 10790 offset + next_offset, i); 10791 cfa_restores 10792 = alloc_reg_note (REG_CFA_RESTORE, 10793 gen_rtx_REG (DFmode, i), cfa_restores); 10794 next_offset += 8; 10795 } 10796 } 10797 } 10798 10799 } 10800 else 10801 { 10802 next_offset = cfun_frame_layout.f4_offset; 10803 /* f4, f6 */ 10804 for (i = FPR4_REGNUM; i <= FPR4_REGNUM + 1; i++) 10805 { 10806 if (cfun_fpr_save_p (i)) 10807 { 10808 restore_fpr (frame_pointer, 10809 offset + next_offset, i); 10810 cfa_restores 10811 = alloc_reg_note (REG_CFA_RESTORE, 10812 gen_rtx_REG (DFmode, i), cfa_restores); 10813 next_offset += 8; 10814 } 10815 else if (!TARGET_PACKED_STACK) 10816 next_offset += 8; 10817 } 10818 10819 } 10820 10821 /* Return register. */ 10822 10823 return_reg = gen_rtx_REG (Pmode, RETURN_REGNUM); 10824 10825 /* Restore call saved gprs. */ 10826 10827 if (cfun_frame_layout.first_restore_gpr != -1) 10828 { 10829 rtx insn, addr; 10830 int i; 10831 10832 /* Check for global register and save them 10833 to stack location from where they get restored. */ 10834 10835 for (i = cfun_frame_layout.first_restore_gpr; 10836 i <= cfun_frame_layout.last_restore_gpr; 10837 i++) 10838 { 10839 if (global_not_special_regno_p (i)) 10840 { 10841 addr = plus_constant (Pmode, frame_pointer, 10842 offset + cfun_frame_layout.gprs_offset 10843 + (i - cfun_frame_layout.first_save_gpr_slot) 10844 * UNITS_PER_LONG); 10845 addr = gen_rtx_MEM (Pmode, addr); 10846 set_mem_alias_set (addr, get_frame_alias_set ()); 10847 emit_move_insn (addr, gen_rtx_REG (Pmode, i)); 10848 } 10849 else 10850 cfa_restores 10851 = alloc_reg_note (REG_CFA_RESTORE, 10852 gen_rtx_REG (Pmode, i), cfa_restores); 10853 } 10854 10855 if (! sibcall) 10856 { 10857 /* Fetch return address from stack before load multiple, 10858 this will do good for scheduling. 10859 10860 Only do this if we already decided that r14 needs to be 10861 saved to a stack slot. (And not just because r14 happens to 10862 be in between two GPRs which need saving.) Otherwise it 10863 would be difficult to take that decision back in 10864 s390_optimize_prologue. */ 10865 if (cfun_gpr_save_slot (RETURN_REGNUM) == SAVE_SLOT_STACK) 10866 { 10867 int return_regnum = find_unused_clobbered_reg(); 10868 if (!return_regnum) 10869 return_regnum = 4; 10870 return_reg = gen_rtx_REG (Pmode, return_regnum); 10871 10872 addr = plus_constant (Pmode, frame_pointer, 10873 offset + cfun_frame_layout.gprs_offset 10874 + (RETURN_REGNUM 10875 - cfun_frame_layout.first_save_gpr_slot) 10876 * UNITS_PER_LONG); 10877 addr = gen_rtx_MEM (Pmode, addr); 10878 set_mem_alias_set (addr, get_frame_alias_set ()); 10879 emit_move_insn (return_reg, addr); 10880 10881 /* Once we did that optimization we have to make sure 10882 s390_optimize_prologue does not try to remove the 10883 store of r14 since we will not be able to find the 10884 load issued here. */ 10885 cfun_frame_layout.save_return_addr_p = true; 10886 } 10887 } 10888 10889 insn = restore_gprs (frame_pointer, 10890 offset + cfun_frame_layout.gprs_offset 10891 + (cfun_frame_layout.first_restore_gpr 10892 - cfun_frame_layout.first_save_gpr_slot) 10893 * UNITS_PER_LONG, 10894 cfun_frame_layout.first_restore_gpr, 10895 cfun_frame_layout.last_restore_gpr); 10896 insn = emit_insn (insn); 10897 REG_NOTES (insn) = cfa_restores; 10898 add_reg_note (insn, REG_CFA_DEF_CFA, 10899 plus_constant (Pmode, stack_pointer_rtx, 10900 STACK_POINTER_OFFSET)); 10901 RTX_FRAME_RELATED_P (insn) = 1; 10902 } 10903 10904 s390_restore_gprs_from_fprs (); 10905 10906 if (! sibcall) 10907 { 10908 10909 /* Return to caller. */ 10910 10911 p = rtvec_alloc (2); 10912 10913 RTVEC_ELT (p, 0) = ret_rtx; 10914 RTVEC_ELT (p, 1) = gen_rtx_USE (VOIDmode, return_reg); 10915 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p)); 10916 } 10917} 10918 10919/* Implement TARGET_SET_UP_BY_PROLOGUE. */ 10920 10921static void 10922s300_set_up_by_prologue (hard_reg_set_container *regs) 10923{ 10924 if (cfun->machine->base_reg 10925 && !call_really_used_regs[REGNO (cfun->machine->base_reg)]) 10926 SET_HARD_REG_BIT (regs->set, REGNO (cfun->machine->base_reg)); 10927} 10928 10929/* Return true if the function can use simple_return to return outside 10930 of a shrink-wrapped region. At present shrink-wrapping is supported 10931 in all cases. */ 10932 10933bool 10934s390_can_use_simple_return_insn (void) 10935{ 10936 return true; 10937} 10938 10939/* Return true if the epilogue is guaranteed to contain only a return 10940 instruction and if a direct return can therefore be used instead. 10941 One of the main advantages of using direct return instructions 10942 is that we can then use conditional returns. */ 10943 10944bool 10945s390_can_use_return_insn (void) 10946{ 10947 int i; 10948 10949 if (!reload_completed) 10950 return false; 10951 10952 if (crtl->profile) 10953 return false; 10954 10955 if (TARGET_TPF_PROFILING) 10956 return false; 10957 10958 for (i = 0; i < 16; i++) 10959 if (cfun_gpr_save_slot (i) != SAVE_SLOT_NONE) 10960 return false; 10961 10962 /* For 31 bit this is not covered by the frame_size check below 10963 since f4, f6 are saved in the register save area without needing 10964 additional stack space. */ 10965 if (!TARGET_64BIT 10966 && (cfun_fpr_save_p (FPR4_REGNUM) || cfun_fpr_save_p (FPR6_REGNUM))) 10967 return false; 10968 10969 if (cfun->machine->base_reg 10970 && !call_really_used_regs[REGNO (cfun->machine->base_reg)]) 10971 return false; 10972 10973 return cfun_frame_layout.frame_size == 0; 10974} 10975 10976/* The VX ABI differs for vararg functions. Therefore we need the 10977 prototype of the callee to be available when passing vector type 10978 values. */ 10979static const char * 10980s390_invalid_arg_for_unprototyped_fn (const_tree typelist, const_tree funcdecl, const_tree val) 10981{ 10982 return ((TARGET_VX_ABI 10983 && typelist == 0 10984 && VECTOR_TYPE_P (TREE_TYPE (val)) 10985 && (funcdecl == NULL_TREE 10986 || (TREE_CODE (funcdecl) == FUNCTION_DECL 10987 && DECL_BUILT_IN_CLASS (funcdecl) != BUILT_IN_MD))) 10988 ? N_("Vector argument passed to unprototyped function") 10989 : NULL); 10990} 10991 10992 10993/* Return the size in bytes of a function argument of 10994 type TYPE and/or mode MODE. At least one of TYPE or 10995 MODE must be specified. */ 10996 10997static int 10998s390_function_arg_size (machine_mode mode, const_tree type) 10999{ 11000 if (type) 11001 return int_size_in_bytes (type); 11002 11003 /* No type info available for some library calls ... */ 11004 if (mode != BLKmode) 11005 return GET_MODE_SIZE (mode); 11006 11007 /* If we have neither type nor mode, abort */ 11008 gcc_unreachable (); 11009} 11010 11011/* Return true if a function argument of type TYPE and mode MODE 11012 is to be passed in a vector register, if available. */ 11013 11014bool 11015s390_function_arg_vector (machine_mode mode, const_tree type) 11016{ 11017 if (!TARGET_VX_ABI) 11018 return false; 11019 11020 if (s390_function_arg_size (mode, type) > 16) 11021 return false; 11022 11023 /* No type info available for some library calls ... */ 11024 if (!type) 11025 return VECTOR_MODE_P (mode); 11026 11027 /* The ABI says that record types with a single member are treated 11028 just like that member would be. */ 11029 while (TREE_CODE (type) == RECORD_TYPE) 11030 { 11031 tree field, single = NULL_TREE; 11032 11033 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field)) 11034 { 11035 if (TREE_CODE (field) != FIELD_DECL) 11036 continue; 11037 11038 if (single == NULL_TREE) 11039 single = TREE_TYPE (field); 11040 else 11041 return false; 11042 } 11043 11044 if (single == NULL_TREE) 11045 return false; 11046 else 11047 { 11048 /* If the field declaration adds extra byte due to 11049 e.g. padding this is not accepted as vector type. */ 11050 if (int_size_in_bytes (single) <= 0 11051 || int_size_in_bytes (single) != int_size_in_bytes (type)) 11052 return false; 11053 type = single; 11054 } 11055 } 11056 11057 return VECTOR_TYPE_P (type); 11058} 11059 11060/* Return true if a function argument of type TYPE and mode MODE 11061 is to be passed in a floating-point register, if available. */ 11062 11063static bool 11064s390_function_arg_float (machine_mode mode, const_tree type) 11065{ 11066 if (s390_function_arg_size (mode, type) > 8) 11067 return false; 11068 11069 /* Soft-float changes the ABI: no floating-point registers are used. */ 11070 if (TARGET_SOFT_FLOAT) 11071 return false; 11072 11073 /* No type info available for some library calls ... */ 11074 if (!type) 11075 return mode == SFmode || mode == DFmode || mode == SDmode || mode == DDmode; 11076 11077 /* The ABI says that record types with a single member are treated 11078 just like that member would be. */ 11079 while (TREE_CODE (type) == RECORD_TYPE) 11080 { 11081 tree field, single = NULL_TREE; 11082 11083 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field)) 11084 { 11085 if (TREE_CODE (field) != FIELD_DECL) 11086 continue; 11087 11088 if (single == NULL_TREE) 11089 single = TREE_TYPE (field); 11090 else 11091 return false; 11092 } 11093 11094 if (single == NULL_TREE) 11095 return false; 11096 else 11097 type = single; 11098 } 11099 11100 return TREE_CODE (type) == REAL_TYPE; 11101} 11102 11103/* Return true if a function argument of type TYPE and mode MODE 11104 is to be passed in an integer register, or a pair of integer 11105 registers, if available. */ 11106 11107static bool 11108s390_function_arg_integer (machine_mode mode, const_tree type) 11109{ 11110 int size = s390_function_arg_size (mode, type); 11111 if (size > 8) 11112 return false; 11113 11114 /* No type info available for some library calls ... */ 11115 if (!type) 11116 return GET_MODE_CLASS (mode) == MODE_INT 11117 || (TARGET_SOFT_FLOAT && SCALAR_FLOAT_MODE_P (mode)); 11118 11119 /* We accept small integral (and similar) types. */ 11120 if (INTEGRAL_TYPE_P (type) 11121 || POINTER_TYPE_P (type) 11122 || TREE_CODE (type) == NULLPTR_TYPE 11123 || TREE_CODE (type) == OFFSET_TYPE 11124 || (TARGET_SOFT_FLOAT && TREE_CODE (type) == REAL_TYPE)) 11125 return true; 11126 11127 /* We also accept structs of size 1, 2, 4, 8 that are not 11128 passed in floating-point registers. */ 11129 if (AGGREGATE_TYPE_P (type) 11130 && exact_log2 (size) >= 0 11131 && !s390_function_arg_float (mode, type)) 11132 return true; 11133 11134 return false; 11135} 11136 11137/* Return 1 if a function argument of type TYPE and mode MODE 11138 is to be passed by reference. The ABI specifies that only 11139 structures of size 1, 2, 4, or 8 bytes are passed by value, 11140 all other structures (and complex numbers) are passed by 11141 reference. */ 11142 11143static bool 11144s390_pass_by_reference (cumulative_args_t ca ATTRIBUTE_UNUSED, 11145 machine_mode mode, const_tree type, 11146 bool named ATTRIBUTE_UNUSED) 11147{ 11148 int size = s390_function_arg_size (mode, type); 11149 11150 if (s390_function_arg_vector (mode, type)) 11151 return false; 11152 11153 if (size > 8) 11154 return true; 11155 11156 if (type) 11157 { 11158 if (AGGREGATE_TYPE_P (type) && exact_log2 (size) < 0) 11159 return true; 11160 11161 if (TREE_CODE (type) == COMPLEX_TYPE 11162 || TREE_CODE (type) == VECTOR_TYPE) 11163 return true; 11164 } 11165 11166 return false; 11167} 11168 11169/* Update the data in CUM to advance over an argument of mode MODE and 11170 data type TYPE. (TYPE is null for libcalls where that information 11171 may not be available.). The boolean NAMED specifies whether the 11172 argument is a named argument (as opposed to an unnamed argument 11173 matching an ellipsis). */ 11174 11175static void 11176s390_function_arg_advance (cumulative_args_t cum_v, machine_mode mode, 11177 const_tree type, bool named) 11178{ 11179 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); 11180 11181 if (s390_function_arg_vector (mode, type)) 11182 { 11183 /* We are called for unnamed vector stdarg arguments which are 11184 passed on the stack. In this case this hook does not have to 11185 do anything since stack arguments are tracked by common 11186 code. */ 11187 if (!named) 11188 return; 11189 cum->vrs += 1; 11190 } 11191 else if (s390_function_arg_float (mode, type)) 11192 { 11193 cum->fprs += 1; 11194 } 11195 else if (s390_function_arg_integer (mode, type)) 11196 { 11197 int size = s390_function_arg_size (mode, type); 11198 cum->gprs += ((size + UNITS_PER_LONG - 1) / UNITS_PER_LONG); 11199 } 11200 else 11201 gcc_unreachable (); 11202} 11203 11204/* Define where to put the arguments to a function. 11205 Value is zero to push the argument on the stack, 11206 or a hard register in which to store the argument. 11207 11208 MODE is the argument's machine mode. 11209 TYPE is the data type of the argument (as a tree). 11210 This is null for libcalls where that information may 11211 not be available. 11212 CUM is a variable of type CUMULATIVE_ARGS which gives info about 11213 the preceding args and about the function being called. 11214 NAMED is nonzero if this argument is a named parameter 11215 (otherwise it is an extra parameter matching an ellipsis). 11216 11217 On S/390, we use general purpose registers 2 through 6 to 11218 pass integer, pointer, and certain structure arguments, and 11219 floating point registers 0 and 2 (0, 2, 4, and 6 on 64-bit) 11220 to pass floating point arguments. All remaining arguments 11221 are pushed to the stack. */ 11222 11223static rtx 11224s390_function_arg (cumulative_args_t cum_v, machine_mode mode, 11225 const_tree type, bool named) 11226{ 11227 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); 11228 11229 if (!named) 11230 s390_check_type_for_vector_abi (type, true, false); 11231 11232 if (s390_function_arg_vector (mode, type)) 11233 { 11234 /* Vector arguments being part of the ellipsis are passed on the 11235 stack. */ 11236 if (!named || (cum->vrs + 1 > VEC_ARG_NUM_REG)) 11237 return NULL_RTX; 11238 11239 return gen_rtx_REG (mode, cum->vrs + FIRST_VEC_ARG_REGNO); 11240 } 11241 else if (s390_function_arg_float (mode, type)) 11242 { 11243 if (cum->fprs + 1 > FP_ARG_NUM_REG) 11244 return NULL_RTX; 11245 else 11246 return gen_rtx_REG (mode, cum->fprs + 16); 11247 } 11248 else if (s390_function_arg_integer (mode, type)) 11249 { 11250 int size = s390_function_arg_size (mode, type); 11251 int n_gprs = (size + UNITS_PER_LONG - 1) / UNITS_PER_LONG; 11252 11253 if (cum->gprs + n_gprs > GP_ARG_NUM_REG) 11254 return NULL_RTX; 11255 else if (n_gprs == 1 || UNITS_PER_WORD == UNITS_PER_LONG) 11256 return gen_rtx_REG (mode, cum->gprs + 2); 11257 else if (n_gprs == 2) 11258 { 11259 rtvec p = rtvec_alloc (2); 11260 11261 RTVEC_ELT (p, 0) 11262 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, cum->gprs + 2), 11263 const0_rtx); 11264 RTVEC_ELT (p, 1) 11265 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, cum->gprs + 3), 11266 GEN_INT (4)); 11267 11268 return gen_rtx_PARALLEL (mode, p); 11269 } 11270 } 11271 11272 /* After the real arguments, expand_call calls us once again 11273 with a void_type_node type. Whatever we return here is 11274 passed as operand 2 to the call expanders. 11275 11276 We don't need this feature ... */ 11277 else if (type == void_type_node) 11278 return const0_rtx; 11279 11280 gcc_unreachable (); 11281} 11282 11283/* Return true if return values of type TYPE should be returned 11284 in a memory buffer whose address is passed by the caller as 11285 hidden first argument. */ 11286 11287static bool 11288s390_return_in_memory (const_tree type, const_tree fundecl ATTRIBUTE_UNUSED) 11289{ 11290 /* We accept small integral (and similar) types. */ 11291 if (INTEGRAL_TYPE_P (type) 11292 || POINTER_TYPE_P (type) 11293 || TREE_CODE (type) == OFFSET_TYPE 11294 || TREE_CODE (type) == REAL_TYPE) 11295 return int_size_in_bytes (type) > 8; 11296 11297 /* vector types which fit into a VR. */ 11298 if (TARGET_VX_ABI 11299 && VECTOR_TYPE_P (type) 11300 && int_size_in_bytes (type) <= 16) 11301 return false; 11302 11303 /* Aggregates and similar constructs are always returned 11304 in memory. */ 11305 if (AGGREGATE_TYPE_P (type) 11306 || TREE_CODE (type) == COMPLEX_TYPE 11307 || VECTOR_TYPE_P (type)) 11308 return true; 11309 11310 /* ??? We get called on all sorts of random stuff from 11311 aggregate_value_p. We can't abort, but it's not clear 11312 what's safe to return. Pretend it's a struct I guess. */ 11313 return true; 11314} 11315 11316/* Function arguments and return values are promoted to word size. */ 11317 11318static machine_mode 11319s390_promote_function_mode (const_tree type, machine_mode mode, 11320 int *punsignedp, 11321 const_tree fntype ATTRIBUTE_UNUSED, 11322 int for_return ATTRIBUTE_UNUSED) 11323{ 11324 if (INTEGRAL_MODE_P (mode) 11325 && GET_MODE_SIZE (mode) < UNITS_PER_LONG) 11326 { 11327 if (type != NULL_TREE && POINTER_TYPE_P (type)) 11328 *punsignedp = POINTERS_EXTEND_UNSIGNED; 11329 return Pmode; 11330 } 11331 11332 return mode; 11333} 11334 11335/* Define where to return a (scalar) value of type RET_TYPE. 11336 If RET_TYPE is null, define where to return a (scalar) 11337 value of mode MODE from a libcall. */ 11338 11339static rtx 11340s390_function_and_libcall_value (machine_mode mode, 11341 const_tree ret_type, 11342 const_tree fntype_or_decl, 11343 bool outgoing ATTRIBUTE_UNUSED) 11344{ 11345 /* For vector return types it is important to use the RET_TYPE 11346 argument whenever available since the middle-end might have 11347 changed the mode to a scalar mode. */ 11348 bool vector_ret_type_p = ((ret_type && VECTOR_TYPE_P (ret_type)) 11349 || (!ret_type && VECTOR_MODE_P (mode))); 11350 11351 /* For normal functions perform the promotion as 11352 promote_function_mode would do. */ 11353 if (ret_type) 11354 { 11355 int unsignedp = TYPE_UNSIGNED (ret_type); 11356 mode = promote_function_mode (ret_type, mode, &unsignedp, 11357 fntype_or_decl, 1); 11358 } 11359 11360 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT 11361 || SCALAR_FLOAT_MODE_P (mode) 11362 || (TARGET_VX_ABI && vector_ret_type_p)); 11363 gcc_assert (GET_MODE_SIZE (mode) <= (TARGET_VX_ABI ? 16 : 8)); 11364 11365 if (TARGET_VX_ABI && vector_ret_type_p) 11366 return gen_rtx_REG (mode, FIRST_VEC_ARG_REGNO); 11367 else if (TARGET_HARD_FLOAT && SCALAR_FLOAT_MODE_P (mode)) 11368 return gen_rtx_REG (mode, 16); 11369 else if (GET_MODE_SIZE (mode) <= UNITS_PER_LONG 11370 || UNITS_PER_LONG == UNITS_PER_WORD) 11371 return gen_rtx_REG (mode, 2); 11372 else if (GET_MODE_SIZE (mode) == 2 * UNITS_PER_LONG) 11373 { 11374 /* This case is triggered when returning a 64 bit value with 11375 -m31 -mzarch. Although the value would fit into a single 11376 register it has to be forced into a 32 bit register pair in 11377 order to match the ABI. */ 11378 rtvec p = rtvec_alloc (2); 11379 11380 RTVEC_ELT (p, 0) 11381 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, 2), const0_rtx); 11382 RTVEC_ELT (p, 1) 11383 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, 3), GEN_INT (4)); 11384 11385 return gen_rtx_PARALLEL (mode, p); 11386 } 11387 11388 gcc_unreachable (); 11389} 11390 11391/* Define where to return a scalar return value of type RET_TYPE. */ 11392 11393static rtx 11394s390_function_value (const_tree ret_type, const_tree fn_decl_or_type, 11395 bool outgoing) 11396{ 11397 return s390_function_and_libcall_value (TYPE_MODE (ret_type), ret_type, 11398 fn_decl_or_type, outgoing); 11399} 11400 11401/* Define where to return a scalar libcall return value of mode 11402 MODE. */ 11403 11404static rtx 11405s390_libcall_value (machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED) 11406{ 11407 return s390_function_and_libcall_value (mode, NULL_TREE, 11408 NULL_TREE, true); 11409} 11410 11411 11412/* Create and return the va_list datatype. 11413 11414 On S/390, va_list is an array type equivalent to 11415 11416 typedef struct __va_list_tag 11417 { 11418 long __gpr; 11419 long __fpr; 11420 void *__overflow_arg_area; 11421 void *__reg_save_area; 11422 } va_list[1]; 11423 11424 where __gpr and __fpr hold the number of general purpose 11425 or floating point arguments used up to now, respectively, 11426 __overflow_arg_area points to the stack location of the 11427 next argument passed on the stack, and __reg_save_area 11428 always points to the start of the register area in the 11429 call frame of the current function. The function prologue 11430 saves all registers used for argument passing into this 11431 area if the function uses variable arguments. */ 11432 11433static tree 11434s390_build_builtin_va_list (void) 11435{ 11436 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl; 11437 11438 record = lang_hooks.types.make_type (RECORD_TYPE); 11439 11440 type_decl = 11441 build_decl (BUILTINS_LOCATION, 11442 TYPE_DECL, get_identifier ("__va_list_tag"), record); 11443 11444 f_gpr = build_decl (BUILTINS_LOCATION, 11445 FIELD_DECL, get_identifier ("__gpr"), 11446 long_integer_type_node); 11447 f_fpr = build_decl (BUILTINS_LOCATION, 11448 FIELD_DECL, get_identifier ("__fpr"), 11449 long_integer_type_node); 11450 f_ovf = build_decl (BUILTINS_LOCATION, 11451 FIELD_DECL, get_identifier ("__overflow_arg_area"), 11452 ptr_type_node); 11453 f_sav = build_decl (BUILTINS_LOCATION, 11454 FIELD_DECL, get_identifier ("__reg_save_area"), 11455 ptr_type_node); 11456 11457 va_list_gpr_counter_field = f_gpr; 11458 va_list_fpr_counter_field = f_fpr; 11459 11460 DECL_FIELD_CONTEXT (f_gpr) = record; 11461 DECL_FIELD_CONTEXT (f_fpr) = record; 11462 DECL_FIELD_CONTEXT (f_ovf) = record; 11463 DECL_FIELD_CONTEXT (f_sav) = record; 11464 11465 TYPE_STUB_DECL (record) = type_decl; 11466 TYPE_NAME (record) = type_decl; 11467 TYPE_FIELDS (record) = f_gpr; 11468 DECL_CHAIN (f_gpr) = f_fpr; 11469 DECL_CHAIN (f_fpr) = f_ovf; 11470 DECL_CHAIN (f_ovf) = f_sav; 11471 11472 layout_type (record); 11473 11474 /* The correct type is an array type of one element. */ 11475 return build_array_type (record, build_index_type (size_zero_node)); 11476} 11477 11478/* Implement va_start by filling the va_list structure VALIST. 11479 STDARG_P is always true, and ignored. 11480 NEXTARG points to the first anonymous stack argument. 11481 11482 The following global variables are used to initialize 11483 the va_list structure: 11484 11485 crtl->args.info: 11486 holds number of gprs and fprs used for named arguments. 11487 crtl->args.arg_offset_rtx: 11488 holds the offset of the first anonymous stack argument 11489 (relative to the virtual arg pointer). */ 11490 11491static void 11492s390_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED) 11493{ 11494 HOST_WIDE_INT n_gpr, n_fpr; 11495 int off; 11496 tree f_gpr, f_fpr, f_ovf, f_sav; 11497 tree gpr, fpr, ovf, sav, t; 11498 11499 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node)); 11500 f_fpr = DECL_CHAIN (f_gpr); 11501 f_ovf = DECL_CHAIN (f_fpr); 11502 f_sav = DECL_CHAIN (f_ovf); 11503 11504 valist = build_simple_mem_ref (valist); 11505 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE); 11506 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE); 11507 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE); 11508 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE); 11509 11510 /* Count number of gp and fp argument registers used. */ 11511 11512 n_gpr = crtl->args.info.gprs; 11513 n_fpr = crtl->args.info.fprs; 11514 11515 if (cfun->va_list_gpr_size) 11516 { 11517 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr, 11518 build_int_cst (NULL_TREE, n_gpr)); 11519 TREE_SIDE_EFFECTS (t) = 1; 11520 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 11521 } 11522 11523 if (cfun->va_list_fpr_size) 11524 { 11525 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr, 11526 build_int_cst (NULL_TREE, n_fpr)); 11527 TREE_SIDE_EFFECTS (t) = 1; 11528 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 11529 } 11530 11531 /* Find the overflow area. 11532 FIXME: This currently is too pessimistic when the vector ABI is 11533 enabled. In that case we *always* set up the overflow area 11534 pointer. */ 11535 if (n_gpr + cfun->va_list_gpr_size > GP_ARG_NUM_REG 11536 || n_fpr + cfun->va_list_fpr_size > FP_ARG_NUM_REG 11537 || TARGET_VX_ABI) 11538 { 11539 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx); 11540 11541 off = INTVAL (crtl->args.arg_offset_rtx); 11542 off = off < 0 ? 0 : off; 11543 if (TARGET_DEBUG_ARG) 11544 fprintf (stderr, "va_start: n_gpr = %d, n_fpr = %d off %d\n", 11545 (int)n_gpr, (int)n_fpr, off); 11546 11547 t = fold_build_pointer_plus_hwi (t, off); 11548 11549 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t); 11550 TREE_SIDE_EFFECTS (t) = 1; 11551 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 11552 } 11553 11554 /* Find the register save area. */ 11555 if ((cfun->va_list_gpr_size && n_gpr < GP_ARG_NUM_REG) 11556 || (cfun->va_list_fpr_size && n_fpr < FP_ARG_NUM_REG)) 11557 { 11558 t = make_tree (TREE_TYPE (sav), return_address_pointer_rtx); 11559 t = fold_build_pointer_plus_hwi (t, -RETURN_REGNUM * UNITS_PER_LONG); 11560 11561 t = build2 (MODIFY_EXPR, TREE_TYPE (sav), sav, t); 11562 TREE_SIDE_EFFECTS (t) = 1; 11563 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 11564 } 11565} 11566 11567/* Implement va_arg by updating the va_list structure 11568 VALIST as required to retrieve an argument of type 11569 TYPE, and returning that argument. 11570 11571 Generates code equivalent to: 11572 11573 if (integral value) { 11574 if (size <= 4 && args.gpr < 5 || 11575 size > 4 && args.gpr < 4 ) 11576 ret = args.reg_save_area[args.gpr+8] 11577 else 11578 ret = *args.overflow_arg_area++; 11579 } else if (vector value) { 11580 ret = *args.overflow_arg_area; 11581 args.overflow_arg_area += size / 8; 11582 } else if (float value) { 11583 if (args.fgpr < 2) 11584 ret = args.reg_save_area[args.fpr+64] 11585 else 11586 ret = *args.overflow_arg_area++; 11587 } else if (aggregate value) { 11588 if (args.gpr < 5) 11589 ret = *args.reg_save_area[args.gpr] 11590 else 11591 ret = **args.overflow_arg_area++; 11592 } */ 11593 11594static tree 11595s390_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p, 11596 gimple_seq *post_p ATTRIBUTE_UNUSED) 11597{ 11598 tree f_gpr, f_fpr, f_ovf, f_sav; 11599 tree gpr, fpr, ovf, sav, reg, t, u; 11600 int indirect_p, size, n_reg, sav_ofs, sav_scale, max_reg; 11601 tree lab_false, lab_over; 11602 tree addr = create_tmp_var (ptr_type_node, "addr"); 11603 bool left_align_p; /* How a value < UNITS_PER_LONG is aligned within 11604 a stack slot. */ 11605 11606 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node)); 11607 f_fpr = DECL_CHAIN (f_gpr); 11608 f_ovf = DECL_CHAIN (f_fpr); 11609 f_sav = DECL_CHAIN (f_ovf); 11610 11611 valist = build_va_arg_indirect_ref (valist); 11612 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE); 11613 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE); 11614 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE); 11615 11616 /* The tree for args* cannot be shared between gpr/fpr and ovf since 11617 both appear on a lhs. */ 11618 valist = unshare_expr (valist); 11619 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE); 11620 11621 size = int_size_in_bytes (type); 11622 11623 s390_check_type_for_vector_abi (type, true, false); 11624 11625 if (pass_by_reference (NULL, TYPE_MODE (type), type, false)) 11626 { 11627 if (TARGET_DEBUG_ARG) 11628 { 11629 fprintf (stderr, "va_arg: aggregate type"); 11630 debug_tree (type); 11631 } 11632 11633 /* Aggregates are passed by reference. */ 11634 indirect_p = 1; 11635 reg = gpr; 11636 n_reg = 1; 11637 11638 /* kernel stack layout on 31 bit: It is assumed here that no padding 11639 will be added by s390_frame_info because for va_args always an even 11640 number of gprs has to be saved r15-r2 = 14 regs. */ 11641 sav_ofs = 2 * UNITS_PER_LONG; 11642 sav_scale = UNITS_PER_LONG; 11643 size = UNITS_PER_LONG; 11644 max_reg = GP_ARG_NUM_REG - n_reg; 11645 left_align_p = false; 11646 } 11647 else if (s390_function_arg_vector (TYPE_MODE (type), type)) 11648 { 11649 if (TARGET_DEBUG_ARG) 11650 { 11651 fprintf (stderr, "va_arg: vector type"); 11652 debug_tree (type); 11653 } 11654 11655 indirect_p = 0; 11656 reg = NULL_TREE; 11657 n_reg = 0; 11658 sav_ofs = 0; 11659 sav_scale = 8; 11660 max_reg = 0; 11661 left_align_p = true; 11662 } 11663 else if (s390_function_arg_float (TYPE_MODE (type), type)) 11664 { 11665 if (TARGET_DEBUG_ARG) 11666 { 11667 fprintf (stderr, "va_arg: float type"); 11668 debug_tree (type); 11669 } 11670 11671 /* FP args go in FP registers, if present. */ 11672 indirect_p = 0; 11673 reg = fpr; 11674 n_reg = 1; 11675 sav_ofs = 16 * UNITS_PER_LONG; 11676 sav_scale = 8; 11677 max_reg = FP_ARG_NUM_REG - n_reg; 11678 left_align_p = false; 11679 } 11680 else 11681 { 11682 if (TARGET_DEBUG_ARG) 11683 { 11684 fprintf (stderr, "va_arg: other type"); 11685 debug_tree (type); 11686 } 11687 11688 /* Otherwise into GP registers. */ 11689 indirect_p = 0; 11690 reg = gpr; 11691 n_reg = (size + UNITS_PER_LONG - 1) / UNITS_PER_LONG; 11692 11693 /* kernel stack layout on 31 bit: It is assumed here that no padding 11694 will be added by s390_frame_info because for va_args always an even 11695 number of gprs has to be saved r15-r2 = 14 regs. */ 11696 sav_ofs = 2 * UNITS_PER_LONG; 11697 11698 if (size < UNITS_PER_LONG) 11699 sav_ofs += UNITS_PER_LONG - size; 11700 11701 sav_scale = UNITS_PER_LONG; 11702 max_reg = GP_ARG_NUM_REG - n_reg; 11703 left_align_p = false; 11704 } 11705 11706 /* Pull the value out of the saved registers ... */ 11707 11708 if (reg != NULL_TREE) 11709 { 11710 /* 11711 if (reg > ((typeof (reg))max_reg)) 11712 goto lab_false; 11713 11714 addr = sav + sav_ofs + reg * save_scale; 11715 11716 goto lab_over; 11717 11718 lab_false: 11719 */ 11720 11721 lab_false = create_artificial_label (UNKNOWN_LOCATION); 11722 lab_over = create_artificial_label (UNKNOWN_LOCATION); 11723 11724 t = fold_convert (TREE_TYPE (reg), size_int (max_reg)); 11725 t = build2 (GT_EXPR, boolean_type_node, reg, t); 11726 u = build1 (GOTO_EXPR, void_type_node, lab_false); 11727 t = build3 (COND_EXPR, void_type_node, t, u, NULL_TREE); 11728 gimplify_and_add (t, pre_p); 11729 11730 t = fold_build_pointer_plus_hwi (sav, sav_ofs); 11731 u = build2 (MULT_EXPR, TREE_TYPE (reg), reg, 11732 fold_convert (TREE_TYPE (reg), size_int (sav_scale))); 11733 t = fold_build_pointer_plus (t, u); 11734 11735 gimplify_assign (addr, t, pre_p); 11736 11737 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over)); 11738 11739 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false)); 11740 } 11741 11742 /* ... Otherwise out of the overflow area. */ 11743 11744 t = ovf; 11745 if (size < UNITS_PER_LONG && !left_align_p) 11746 t = fold_build_pointer_plus_hwi (t, UNITS_PER_LONG - size); 11747 11748 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue); 11749 11750 gimplify_assign (addr, t, pre_p); 11751 11752 if (size < UNITS_PER_LONG && left_align_p) 11753 t = fold_build_pointer_plus_hwi (t, UNITS_PER_LONG); 11754 else 11755 t = fold_build_pointer_plus_hwi (t, size); 11756 11757 gimplify_assign (ovf, t, pre_p); 11758 11759 if (reg != NULL_TREE) 11760 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over)); 11761 11762 11763 /* Increment register save count. */ 11764 11765 if (n_reg > 0) 11766 { 11767 u = build2 (PREINCREMENT_EXPR, TREE_TYPE (reg), reg, 11768 fold_convert (TREE_TYPE (reg), size_int (n_reg))); 11769 gimplify_and_add (u, pre_p); 11770 } 11771 11772 if (indirect_p) 11773 { 11774 t = build_pointer_type_for_mode (build_pointer_type (type), 11775 ptr_mode, true); 11776 addr = fold_convert (t, addr); 11777 addr = build_va_arg_indirect_ref (addr); 11778 } 11779 else 11780 { 11781 t = build_pointer_type_for_mode (type, ptr_mode, true); 11782 addr = fold_convert (t, addr); 11783 } 11784 11785 return build_va_arg_indirect_ref (addr); 11786} 11787 11788/* Emit rtl for the tbegin or tbegin_retry (RETRY != NULL_RTX) 11789 expanders. 11790 DEST - Register location where CC will be stored. 11791 TDB - Pointer to a 256 byte area where to store the transaction. 11792 diagnostic block. NULL if TDB is not needed. 11793 RETRY - Retry count value. If non-NULL a retry loop for CC2 11794 is emitted 11795 CLOBBER_FPRS_P - If true clobbers for all FPRs are emitted as part 11796 of the tbegin instruction pattern. */ 11797 11798void 11799s390_expand_tbegin (rtx dest, rtx tdb, rtx retry, bool clobber_fprs_p) 11800{ 11801 rtx retry_plus_two = gen_reg_rtx (SImode); 11802 rtx retry_reg = gen_reg_rtx (SImode); 11803 rtx_code_label *retry_label = NULL; 11804 11805 if (retry != NULL_RTX) 11806 { 11807 emit_move_insn (retry_reg, retry); 11808 emit_insn (gen_addsi3 (retry_plus_two, retry_reg, const2_rtx)); 11809 emit_insn (gen_addsi3 (retry_reg, retry_reg, const1_rtx)); 11810 retry_label = gen_label_rtx (); 11811 emit_label (retry_label); 11812 } 11813 11814 if (clobber_fprs_p) 11815 emit_insn (gen_tbegin_1 (gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK), tdb)); 11816 else 11817 emit_insn (gen_tbegin_nofloat_1 (gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK), 11818 tdb)); 11819 11820 emit_move_insn (dest, gen_rtx_UNSPEC (SImode, 11821 gen_rtvec (1, gen_rtx_REG (CCRAWmode, 11822 CC_REGNUM)), 11823 UNSPEC_CC_TO_INT)); 11824 if (retry != NULL_RTX) 11825 { 11826 const int CC0 = 1 << 3; 11827 const int CC1 = 1 << 2; 11828 const int CC3 = 1 << 0; 11829 rtx jump; 11830 rtx count = gen_reg_rtx (SImode); 11831 rtx_code_label *leave_label = gen_label_rtx (); 11832 11833 /* Exit for success and permanent failures. */ 11834 jump = s390_emit_jump (leave_label, 11835 gen_rtx_EQ (VOIDmode, 11836 gen_rtx_REG (CCRAWmode, CC_REGNUM), 11837 gen_rtx_CONST_INT (VOIDmode, CC0 | CC1 | CC3))); 11838 LABEL_NUSES (leave_label) = 1; 11839 11840 /* CC2 - transient failure. Perform retry with ppa. */ 11841 emit_move_insn (count, retry_plus_two); 11842 emit_insn (gen_subsi3 (count, count, retry_reg)); 11843 emit_insn (gen_tx_assist (count)); 11844 jump = emit_jump_insn (gen_doloop_si64 (retry_label, 11845 retry_reg, 11846 retry_reg)); 11847 JUMP_LABEL (jump) = retry_label; 11848 LABEL_NUSES (retry_label) = 1; 11849 emit_label (leave_label); 11850 } 11851} 11852 11853 11854/* Return the decl for the target specific builtin with the function 11855 code FCODE. */ 11856 11857static tree 11858s390_builtin_decl (unsigned fcode, bool initialized_p ATTRIBUTE_UNUSED) 11859{ 11860 if (fcode >= S390_BUILTIN_MAX) 11861 return error_mark_node; 11862 11863 return s390_builtin_decls[fcode]; 11864} 11865 11866/* We call mcount before the function prologue. So a profiled leaf 11867 function should stay a leaf function. */ 11868 11869static bool 11870s390_keep_leaf_when_profiled () 11871{ 11872 return true; 11873} 11874 11875/* Output assembly code for the trampoline template to 11876 stdio stream FILE. 11877 11878 On S/390, we use gpr 1 internally in the trampoline code; 11879 gpr 0 is used to hold the static chain. */ 11880 11881static void 11882s390_asm_trampoline_template (FILE *file) 11883{ 11884 rtx op[2]; 11885 op[0] = gen_rtx_REG (Pmode, 0); 11886 op[1] = gen_rtx_REG (Pmode, 1); 11887 11888 if (TARGET_64BIT) 11889 { 11890 output_asm_insn ("basr\t%1,0", op); /* 2 byte */ 11891 output_asm_insn ("lmg\t%0,%1,14(%1)", op); /* 6 byte */ 11892 output_asm_insn ("br\t%1", op); /* 2 byte */ 11893 ASM_OUTPUT_SKIP (file, (HOST_WIDE_INT)(TRAMPOLINE_SIZE - 10)); 11894 } 11895 else 11896 { 11897 output_asm_insn ("basr\t%1,0", op); /* 2 byte */ 11898 output_asm_insn ("lm\t%0,%1,6(%1)", op); /* 4 byte */ 11899 output_asm_insn ("br\t%1", op); /* 2 byte */ 11900 ASM_OUTPUT_SKIP (file, (HOST_WIDE_INT)(TRAMPOLINE_SIZE - 8)); 11901 } 11902} 11903 11904/* Emit RTL insns to initialize the variable parts of a trampoline. 11905 FNADDR is an RTX for the address of the function's pure code. 11906 CXT is an RTX for the static chain value for the function. */ 11907 11908static void 11909s390_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt) 11910{ 11911 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0); 11912 rtx mem; 11913 11914 emit_block_move (m_tramp, assemble_trampoline_template (), 11915 GEN_INT (2 * UNITS_PER_LONG), BLOCK_OP_NORMAL); 11916 11917 mem = adjust_address (m_tramp, Pmode, 2 * UNITS_PER_LONG); 11918 emit_move_insn (mem, cxt); 11919 mem = adjust_address (m_tramp, Pmode, 3 * UNITS_PER_LONG); 11920 emit_move_insn (mem, fnaddr); 11921} 11922 11923/* Output assembler code to FILE to increment profiler label # LABELNO 11924 for profiling a function entry. */ 11925 11926void 11927s390_function_profiler (FILE *file, int labelno) 11928{ 11929 rtx op[7]; 11930 11931 char label[128]; 11932 ASM_GENERATE_INTERNAL_LABEL (label, "LP", labelno); 11933 11934 fprintf (file, "# function profiler \n"); 11935 11936 op[0] = gen_rtx_REG (Pmode, RETURN_REGNUM); 11937 op[1] = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM); 11938 op[1] = gen_rtx_MEM (Pmode, plus_constant (Pmode, op[1], UNITS_PER_LONG)); 11939 11940 op[2] = gen_rtx_REG (Pmode, 1); 11941 op[3] = gen_rtx_SYMBOL_REF (Pmode, label); 11942 SYMBOL_REF_FLAGS (op[3]) = SYMBOL_FLAG_LOCAL; 11943 11944 op[4] = gen_rtx_SYMBOL_REF (Pmode, "_mcount"); 11945 if (flag_pic) 11946 { 11947 op[4] = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op[4]), UNSPEC_PLT); 11948 op[4] = gen_rtx_CONST (Pmode, op[4]); 11949 } 11950 11951 if (TARGET_64BIT) 11952 { 11953 output_asm_insn ("stg\t%0,%1", op); 11954 output_asm_insn ("larl\t%2,%3", op); 11955 output_asm_insn ("brasl\t%0,%4", op); 11956 output_asm_insn ("lg\t%0,%1", op); 11957 } 11958 else if (!flag_pic) 11959 { 11960 op[6] = gen_label_rtx (); 11961 11962 output_asm_insn ("st\t%0,%1", op); 11963 output_asm_insn ("bras\t%2,%l6", op); 11964 output_asm_insn (".long\t%4", op); 11965 output_asm_insn (".long\t%3", op); 11966 targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[6])); 11967 output_asm_insn ("l\t%0,0(%2)", op); 11968 output_asm_insn ("l\t%2,4(%2)", op); 11969 output_asm_insn ("basr\t%0,%0", op); 11970 output_asm_insn ("l\t%0,%1", op); 11971 } 11972 else 11973 { 11974 op[5] = gen_label_rtx (); 11975 op[6] = gen_label_rtx (); 11976 11977 output_asm_insn ("st\t%0,%1", op); 11978 output_asm_insn ("bras\t%2,%l6", op); 11979 targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[5])); 11980 output_asm_insn (".long\t%4-%l5", op); 11981 output_asm_insn (".long\t%3-%l5", op); 11982 targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[6])); 11983 output_asm_insn ("lr\t%0,%2", op); 11984 output_asm_insn ("a\t%0,0(%2)", op); 11985 output_asm_insn ("a\t%2,4(%2)", op); 11986 output_asm_insn ("basr\t%0,%0", op); 11987 output_asm_insn ("l\t%0,%1", op); 11988 } 11989} 11990 11991/* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF 11992 into its SYMBOL_REF_FLAGS. */ 11993 11994static void 11995s390_encode_section_info (tree decl, rtx rtl, int first) 11996{ 11997 default_encode_section_info (decl, rtl, first); 11998 11999 if (TREE_CODE (decl) == VAR_DECL) 12000 { 12001 /* If a variable has a forced alignment to < 2 bytes, mark it 12002 with SYMBOL_FLAG_ALIGN1 to prevent it from being used as LARL 12003 operand. */ 12004 if (DECL_USER_ALIGN (decl) && DECL_ALIGN (decl) < 16) 12005 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_ALIGN1; 12006 if (!DECL_SIZE (decl) 12007 || !DECL_ALIGN (decl) 12008 || !tree_fits_shwi_p (DECL_SIZE (decl)) 12009 || (DECL_ALIGN (decl) <= 64 12010 && DECL_ALIGN (decl) != tree_to_shwi (DECL_SIZE (decl)))) 12011 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_NOT_NATURALLY_ALIGNED; 12012 } 12013 12014 /* Literal pool references don't have a decl so they are handled 12015 differently here. We rely on the information in the MEM_ALIGN 12016 entry to decide upon natural alignment. */ 12017 if (MEM_P (rtl) 12018 && GET_CODE (XEXP (rtl, 0)) == SYMBOL_REF 12019 && TREE_CONSTANT_POOL_ADDRESS_P (XEXP (rtl, 0)) 12020 && (MEM_ALIGN (rtl) == 0 12021 || GET_MODE_BITSIZE (GET_MODE (rtl)) == 0 12022 || MEM_ALIGN (rtl) < GET_MODE_BITSIZE (GET_MODE (rtl)))) 12023 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_NOT_NATURALLY_ALIGNED; 12024} 12025 12026/* Output thunk to FILE that implements a C++ virtual function call (with 12027 multiple inheritance) to FUNCTION. The thunk adjusts the this pointer 12028 by DELTA, and unless VCALL_OFFSET is zero, applies an additional adjustment 12029 stored at VCALL_OFFSET in the vtable whose address is located at offset 0 12030 relative to the resulting this pointer. */ 12031 12032static void 12033s390_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED, 12034 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset, 12035 tree function) 12036{ 12037 rtx op[10]; 12038 int nonlocal = 0; 12039 12040 /* Make sure unwind info is emitted for the thunk if needed. */ 12041 final_start_function (emit_barrier (), file, 1); 12042 12043 /* Operand 0 is the target function. */ 12044 op[0] = XEXP (DECL_RTL (function), 0); 12045 if (flag_pic && !SYMBOL_REF_LOCAL_P (op[0])) 12046 { 12047 nonlocal = 1; 12048 op[0] = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op[0]), 12049 TARGET_64BIT ? UNSPEC_PLT : UNSPEC_GOT); 12050 op[0] = gen_rtx_CONST (Pmode, op[0]); 12051 } 12052 12053 /* Operand 1 is the 'this' pointer. */ 12054 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)) 12055 op[1] = gen_rtx_REG (Pmode, 3); 12056 else 12057 op[1] = gen_rtx_REG (Pmode, 2); 12058 12059 /* Operand 2 is the delta. */ 12060 op[2] = GEN_INT (delta); 12061 12062 /* Operand 3 is the vcall_offset. */ 12063 op[3] = GEN_INT (vcall_offset); 12064 12065 /* Operand 4 is the temporary register. */ 12066 op[4] = gen_rtx_REG (Pmode, 1); 12067 12068 /* Operands 5 to 8 can be used as labels. */ 12069 op[5] = NULL_RTX; 12070 op[6] = NULL_RTX; 12071 op[7] = NULL_RTX; 12072 op[8] = NULL_RTX; 12073 12074 /* Operand 9 can be used for temporary register. */ 12075 op[9] = NULL_RTX; 12076 12077 /* Generate code. */ 12078 if (TARGET_64BIT) 12079 { 12080 /* Setup literal pool pointer if required. */ 12081 if ((!DISP_IN_RANGE (delta) 12082 && !CONST_OK_FOR_K (delta) 12083 && !CONST_OK_FOR_Os (delta)) 12084 || (!DISP_IN_RANGE (vcall_offset) 12085 && !CONST_OK_FOR_K (vcall_offset) 12086 && !CONST_OK_FOR_Os (vcall_offset))) 12087 { 12088 op[5] = gen_label_rtx (); 12089 output_asm_insn ("larl\t%4,%5", op); 12090 } 12091 12092 /* Add DELTA to this pointer. */ 12093 if (delta) 12094 { 12095 if (CONST_OK_FOR_J (delta)) 12096 output_asm_insn ("la\t%1,%2(%1)", op); 12097 else if (DISP_IN_RANGE (delta)) 12098 output_asm_insn ("lay\t%1,%2(%1)", op); 12099 else if (CONST_OK_FOR_K (delta)) 12100 output_asm_insn ("aghi\t%1,%2", op); 12101 else if (CONST_OK_FOR_Os (delta)) 12102 output_asm_insn ("agfi\t%1,%2", op); 12103 else 12104 { 12105 op[6] = gen_label_rtx (); 12106 output_asm_insn ("agf\t%1,%6-%5(%4)", op); 12107 } 12108 } 12109 12110 /* Perform vcall adjustment. */ 12111 if (vcall_offset) 12112 { 12113 if (DISP_IN_RANGE (vcall_offset)) 12114 { 12115 output_asm_insn ("lg\t%4,0(%1)", op); 12116 output_asm_insn ("ag\t%1,%3(%4)", op); 12117 } 12118 else if (CONST_OK_FOR_K (vcall_offset)) 12119 { 12120 output_asm_insn ("lghi\t%4,%3", op); 12121 output_asm_insn ("ag\t%4,0(%1)", op); 12122 output_asm_insn ("ag\t%1,0(%4)", op); 12123 } 12124 else if (CONST_OK_FOR_Os (vcall_offset)) 12125 { 12126 output_asm_insn ("lgfi\t%4,%3", op); 12127 output_asm_insn ("ag\t%4,0(%1)", op); 12128 output_asm_insn ("ag\t%1,0(%4)", op); 12129 } 12130 else 12131 { 12132 op[7] = gen_label_rtx (); 12133 output_asm_insn ("llgf\t%4,%7-%5(%4)", op); 12134 output_asm_insn ("ag\t%4,0(%1)", op); 12135 output_asm_insn ("ag\t%1,0(%4)", op); 12136 } 12137 } 12138 12139 /* Jump to target. */ 12140 output_asm_insn ("jg\t%0", op); 12141 12142 /* Output literal pool if required. */ 12143 if (op[5]) 12144 { 12145 output_asm_insn (".align\t4", op); 12146 targetm.asm_out.internal_label (file, "L", 12147 CODE_LABEL_NUMBER (op[5])); 12148 } 12149 if (op[6]) 12150 { 12151 targetm.asm_out.internal_label (file, "L", 12152 CODE_LABEL_NUMBER (op[6])); 12153 output_asm_insn (".long\t%2", op); 12154 } 12155 if (op[7]) 12156 { 12157 targetm.asm_out.internal_label (file, "L", 12158 CODE_LABEL_NUMBER (op[7])); 12159 output_asm_insn (".long\t%3", op); 12160 } 12161 } 12162 else 12163 { 12164 /* Setup base pointer if required. */ 12165 if (!vcall_offset 12166 || (!DISP_IN_RANGE (delta) 12167 && !CONST_OK_FOR_K (delta) 12168 && !CONST_OK_FOR_Os (delta)) 12169 || (!DISP_IN_RANGE (delta) 12170 && !CONST_OK_FOR_K (vcall_offset) 12171 && !CONST_OK_FOR_Os (vcall_offset))) 12172 { 12173 op[5] = gen_label_rtx (); 12174 output_asm_insn ("basr\t%4,0", op); 12175 targetm.asm_out.internal_label (file, "L", 12176 CODE_LABEL_NUMBER (op[5])); 12177 } 12178 12179 /* Add DELTA to this pointer. */ 12180 if (delta) 12181 { 12182 if (CONST_OK_FOR_J (delta)) 12183 output_asm_insn ("la\t%1,%2(%1)", op); 12184 else if (DISP_IN_RANGE (delta)) 12185 output_asm_insn ("lay\t%1,%2(%1)", op); 12186 else if (CONST_OK_FOR_K (delta)) 12187 output_asm_insn ("ahi\t%1,%2", op); 12188 else if (CONST_OK_FOR_Os (delta)) 12189 output_asm_insn ("afi\t%1,%2", op); 12190 else 12191 { 12192 op[6] = gen_label_rtx (); 12193 output_asm_insn ("a\t%1,%6-%5(%4)", op); 12194 } 12195 } 12196 12197 /* Perform vcall adjustment. */ 12198 if (vcall_offset) 12199 { 12200 if (CONST_OK_FOR_J (vcall_offset)) 12201 { 12202 output_asm_insn ("l\t%4,0(%1)", op); 12203 output_asm_insn ("a\t%1,%3(%4)", op); 12204 } 12205 else if (DISP_IN_RANGE (vcall_offset)) 12206 { 12207 output_asm_insn ("l\t%4,0(%1)", op); 12208 output_asm_insn ("ay\t%1,%3(%4)", op); 12209 } 12210 else if (CONST_OK_FOR_K (vcall_offset)) 12211 { 12212 output_asm_insn ("lhi\t%4,%3", op); 12213 output_asm_insn ("a\t%4,0(%1)", op); 12214 output_asm_insn ("a\t%1,0(%4)", op); 12215 } 12216 else if (CONST_OK_FOR_Os (vcall_offset)) 12217 { 12218 output_asm_insn ("iilf\t%4,%3", op); 12219 output_asm_insn ("a\t%4,0(%1)", op); 12220 output_asm_insn ("a\t%1,0(%4)", op); 12221 } 12222 else 12223 { 12224 op[7] = gen_label_rtx (); 12225 output_asm_insn ("l\t%4,%7-%5(%4)", op); 12226 output_asm_insn ("a\t%4,0(%1)", op); 12227 output_asm_insn ("a\t%1,0(%4)", op); 12228 } 12229 12230 /* We had to clobber the base pointer register. 12231 Re-setup the base pointer (with a different base). */ 12232 op[5] = gen_label_rtx (); 12233 output_asm_insn ("basr\t%4,0", op); 12234 targetm.asm_out.internal_label (file, "L", 12235 CODE_LABEL_NUMBER (op[5])); 12236 } 12237 12238 /* Jump to target. */ 12239 op[8] = gen_label_rtx (); 12240 12241 if (!flag_pic) 12242 output_asm_insn ("l\t%4,%8-%5(%4)", op); 12243 else if (!nonlocal) 12244 output_asm_insn ("a\t%4,%8-%5(%4)", op); 12245 /* We cannot call through .plt, since .plt requires %r12 loaded. */ 12246 else if (flag_pic == 1) 12247 { 12248 output_asm_insn ("a\t%4,%8-%5(%4)", op); 12249 output_asm_insn ("l\t%4,%0(%4)", op); 12250 } 12251 else if (flag_pic == 2) 12252 { 12253 op[9] = gen_rtx_REG (Pmode, 0); 12254 output_asm_insn ("l\t%9,%8-4-%5(%4)", op); 12255 output_asm_insn ("a\t%4,%8-%5(%4)", op); 12256 output_asm_insn ("ar\t%4,%9", op); 12257 output_asm_insn ("l\t%4,0(%4)", op); 12258 } 12259 12260 output_asm_insn ("br\t%4", op); 12261 12262 /* Output literal pool. */ 12263 output_asm_insn (".align\t4", op); 12264 12265 if (nonlocal && flag_pic == 2) 12266 output_asm_insn (".long\t%0", op); 12267 if (nonlocal) 12268 { 12269 op[0] = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_"); 12270 SYMBOL_REF_FLAGS (op[0]) = SYMBOL_FLAG_LOCAL; 12271 } 12272 12273 targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[8])); 12274 if (!flag_pic) 12275 output_asm_insn (".long\t%0", op); 12276 else 12277 output_asm_insn (".long\t%0-%5", op); 12278 12279 if (op[6]) 12280 { 12281 targetm.asm_out.internal_label (file, "L", 12282 CODE_LABEL_NUMBER (op[6])); 12283 output_asm_insn (".long\t%2", op); 12284 } 12285 if (op[7]) 12286 { 12287 targetm.asm_out.internal_label (file, "L", 12288 CODE_LABEL_NUMBER (op[7])); 12289 output_asm_insn (".long\t%3", op); 12290 } 12291 } 12292 final_end_function (); 12293} 12294 12295static bool 12296s390_valid_pointer_mode (machine_mode mode) 12297{ 12298 return (mode == SImode || (TARGET_64BIT && mode == DImode)); 12299} 12300 12301/* Checks whether the given CALL_EXPR would use a caller 12302 saved register. This is used to decide whether sibling call 12303 optimization could be performed on the respective function 12304 call. */ 12305 12306static bool 12307s390_call_saved_register_used (tree call_expr) 12308{ 12309 CUMULATIVE_ARGS cum_v; 12310 cumulative_args_t cum; 12311 tree parameter; 12312 machine_mode mode; 12313 tree type; 12314 rtx parm_rtx; 12315 int reg, i; 12316 12317 INIT_CUMULATIVE_ARGS (cum_v, NULL, NULL, 0, 0); 12318 cum = pack_cumulative_args (&cum_v); 12319 12320 for (i = 0; i < call_expr_nargs (call_expr); i++) 12321 { 12322 parameter = CALL_EXPR_ARG (call_expr, i); 12323 gcc_assert (parameter); 12324 12325 /* For an undeclared variable passed as parameter we will get 12326 an ERROR_MARK node here. */ 12327 if (TREE_CODE (parameter) == ERROR_MARK) 12328 return true; 12329 12330 type = TREE_TYPE (parameter); 12331 gcc_assert (type); 12332 12333 mode = TYPE_MODE (type); 12334 gcc_assert (mode); 12335 12336 /* We assume that in the target function all parameters are 12337 named. This only has an impact on vector argument register 12338 usage none of which is call-saved. */ 12339 if (pass_by_reference (&cum_v, mode, type, true)) 12340 { 12341 mode = Pmode; 12342 type = build_pointer_type (type); 12343 } 12344 12345 parm_rtx = s390_function_arg (cum, mode, type, true); 12346 12347 s390_function_arg_advance (cum, mode, type, true); 12348 12349 if (!parm_rtx) 12350 continue; 12351 12352 if (REG_P (parm_rtx)) 12353 { 12354 for (reg = 0; 12355 reg < HARD_REGNO_NREGS (REGNO (parm_rtx), GET_MODE (parm_rtx)); 12356 reg++) 12357 if (!call_used_regs[reg + REGNO (parm_rtx)]) 12358 return true; 12359 } 12360 12361 if (GET_CODE (parm_rtx) == PARALLEL) 12362 { 12363 int i; 12364 12365 for (i = 0; i < XVECLEN (parm_rtx, 0); i++) 12366 { 12367 rtx r = XEXP (XVECEXP (parm_rtx, 0, i), 0); 12368 12369 gcc_assert (REG_P (r)); 12370 12371 for (reg = 0; 12372 reg < HARD_REGNO_NREGS (REGNO (r), GET_MODE (r)); 12373 reg++) 12374 if (!call_used_regs[reg + REGNO (r)]) 12375 return true; 12376 } 12377 } 12378 12379 } 12380 return false; 12381} 12382 12383/* Return true if the given call expression can be 12384 turned into a sibling call. 12385 DECL holds the declaration of the function to be called whereas 12386 EXP is the call expression itself. */ 12387 12388static bool 12389s390_function_ok_for_sibcall (tree decl, tree exp) 12390{ 12391 /* The TPF epilogue uses register 1. */ 12392 if (TARGET_TPF_PROFILING) 12393 return false; 12394 12395 /* The 31 bit PLT code uses register 12 (GOT pointer - caller saved) 12396 which would have to be restored before the sibcall. */ 12397 if (!TARGET_64BIT && flag_pic && decl && !targetm.binds_local_p (decl)) 12398 return false; 12399 12400 /* Register 6 on s390 is available as an argument register but unfortunately 12401 "caller saved". This makes functions needing this register for arguments 12402 not suitable for sibcalls. */ 12403 return !s390_call_saved_register_used (exp); 12404} 12405 12406/* Return the fixed registers used for condition codes. */ 12407 12408static bool 12409s390_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2) 12410{ 12411 *p1 = CC_REGNUM; 12412 *p2 = INVALID_REGNUM; 12413 12414 return true; 12415} 12416 12417/* This function is used by the call expanders of the machine description. 12418 It emits the call insn itself together with the necessary operations 12419 to adjust the target address and returns the emitted insn. 12420 ADDR_LOCATION is the target address rtx 12421 TLS_CALL the location of the thread-local symbol 12422 RESULT_REG the register where the result of the call should be stored 12423 RETADDR_REG the register where the return address should be stored 12424 If this parameter is NULL_RTX the call is considered 12425 to be a sibling call. */ 12426 12427rtx_insn * 12428s390_emit_call (rtx addr_location, rtx tls_call, rtx result_reg, 12429 rtx retaddr_reg) 12430{ 12431 bool plt_call = false; 12432 rtx_insn *insn; 12433 rtx call; 12434 rtx clobber; 12435 rtvec vec; 12436 12437 /* Direct function calls need special treatment. */ 12438 if (GET_CODE (addr_location) == SYMBOL_REF) 12439 { 12440 /* When calling a global routine in PIC mode, we must 12441 replace the symbol itself with the PLT stub. */ 12442 if (flag_pic && !SYMBOL_REF_LOCAL_P (addr_location)) 12443 { 12444 if (TARGET_64BIT || retaddr_reg != NULL_RTX) 12445 { 12446 addr_location = gen_rtx_UNSPEC (Pmode, 12447 gen_rtvec (1, addr_location), 12448 UNSPEC_PLT); 12449 addr_location = gen_rtx_CONST (Pmode, addr_location); 12450 plt_call = true; 12451 } 12452 else 12453 /* For -fpic code the PLT entries might use r12 which is 12454 call-saved. Therefore we cannot do a sibcall when 12455 calling directly using a symbol ref. When reaching 12456 this point we decided (in s390_function_ok_for_sibcall) 12457 to do a sibcall for a function pointer but one of the 12458 optimizers was able to get rid of the function pointer 12459 by propagating the symbol ref into the call. This 12460 optimization is illegal for S/390 so we turn the direct 12461 call into a indirect call again. */ 12462 addr_location = force_reg (Pmode, addr_location); 12463 } 12464 12465 /* Unless we can use the bras(l) insn, force the 12466 routine address into a register. */ 12467 if (!TARGET_SMALL_EXEC && !TARGET_CPU_ZARCH) 12468 { 12469 if (flag_pic) 12470 addr_location = legitimize_pic_address (addr_location, 0); 12471 else 12472 addr_location = force_reg (Pmode, addr_location); 12473 } 12474 } 12475 12476 /* If it is already an indirect call or the code above moved the 12477 SYMBOL_REF to somewhere else make sure the address can be found in 12478 register 1. */ 12479 if (retaddr_reg == NULL_RTX 12480 && GET_CODE (addr_location) != SYMBOL_REF 12481 && !plt_call) 12482 { 12483 emit_move_insn (gen_rtx_REG (Pmode, SIBCALL_REGNUM), addr_location); 12484 addr_location = gen_rtx_REG (Pmode, SIBCALL_REGNUM); 12485 } 12486 12487 addr_location = gen_rtx_MEM (QImode, addr_location); 12488 call = gen_rtx_CALL (VOIDmode, addr_location, const0_rtx); 12489 12490 if (result_reg != NULL_RTX) 12491 call = gen_rtx_SET (VOIDmode, result_reg, call); 12492 12493 if (retaddr_reg != NULL_RTX) 12494 { 12495 clobber = gen_rtx_CLOBBER (VOIDmode, retaddr_reg); 12496 12497 if (tls_call != NULL_RTX) 12498 vec = gen_rtvec (3, call, clobber, 12499 gen_rtx_USE (VOIDmode, tls_call)); 12500 else 12501 vec = gen_rtvec (2, call, clobber); 12502 12503 call = gen_rtx_PARALLEL (VOIDmode, vec); 12504 } 12505 12506 insn = emit_call_insn (call); 12507 12508 /* 31-bit PLT stubs and tls calls use the GOT register implicitly. */ 12509 if ((!TARGET_64BIT && plt_call) || tls_call != NULL_RTX) 12510 { 12511 /* s390_function_ok_for_sibcall should 12512 have denied sibcalls in this case. */ 12513 gcc_assert (retaddr_reg != NULL_RTX); 12514 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), gen_rtx_REG (Pmode, 12)); 12515 } 12516 return insn; 12517} 12518 12519/* Implement TARGET_CONDITIONAL_REGISTER_USAGE. */ 12520 12521static void 12522s390_conditional_register_usage (void) 12523{ 12524 int i; 12525 12526 if (flag_pic) 12527 { 12528 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1; 12529 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1; 12530 } 12531 if (TARGET_CPU_ZARCH) 12532 { 12533 fixed_regs[BASE_REGNUM] = 0; 12534 call_used_regs[BASE_REGNUM] = 0; 12535 fixed_regs[RETURN_REGNUM] = 0; 12536 call_used_regs[RETURN_REGNUM] = 0; 12537 } 12538 if (TARGET_64BIT) 12539 { 12540 for (i = FPR8_REGNUM; i <= FPR15_REGNUM; i++) 12541 call_used_regs[i] = call_really_used_regs[i] = 0; 12542 } 12543 else 12544 { 12545 call_used_regs[FPR4_REGNUM] = call_really_used_regs[FPR4_REGNUM] = 0; 12546 call_used_regs[FPR6_REGNUM] = call_really_used_regs[FPR6_REGNUM] = 0; 12547 } 12548 12549 if (TARGET_SOFT_FLOAT) 12550 { 12551 for (i = FPR0_REGNUM; i <= FPR15_REGNUM; i++) 12552 call_used_regs[i] = fixed_regs[i] = 1; 12553 } 12554 12555 /* Disable v16 - v31 for non-vector target. */ 12556 if (!TARGET_VX) 12557 { 12558 for (i = VR16_REGNUM; i <= VR31_REGNUM; i++) 12559 fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1; 12560 } 12561} 12562 12563/* Corresponding function to eh_return expander. */ 12564 12565static GTY(()) rtx s390_tpf_eh_return_symbol; 12566void 12567s390_emit_tpf_eh_return (rtx target) 12568{ 12569 rtx_insn *insn; 12570 rtx reg, orig_ra; 12571 12572 if (!s390_tpf_eh_return_symbol) 12573 s390_tpf_eh_return_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tpf_eh_return"); 12574 12575 reg = gen_rtx_REG (Pmode, 2); 12576 orig_ra = gen_rtx_REG (Pmode, 3); 12577 12578 emit_move_insn (reg, target); 12579 emit_move_insn (orig_ra, get_hard_reg_initial_val (Pmode, RETURN_REGNUM)); 12580 insn = s390_emit_call (s390_tpf_eh_return_symbol, NULL_RTX, reg, 12581 gen_rtx_REG (Pmode, RETURN_REGNUM)); 12582 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), reg); 12583 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), orig_ra); 12584 12585 emit_move_insn (EH_RETURN_HANDLER_RTX, reg); 12586} 12587 12588/* Rework the prologue/epilogue to avoid saving/restoring 12589 registers unnecessarily. */ 12590 12591static void 12592s390_optimize_prologue (void) 12593{ 12594 rtx_insn *insn, *new_insn, *next_insn; 12595 12596 /* Do a final recompute of the frame-related data. */ 12597 s390_optimize_register_info (); 12598 12599 /* If all special registers are in fact used, there's nothing we 12600 can do, so no point in walking the insn list. */ 12601 12602 if (cfun_frame_layout.first_save_gpr <= BASE_REGNUM 12603 && cfun_frame_layout.last_save_gpr >= BASE_REGNUM 12604 && (TARGET_CPU_ZARCH 12605 || (cfun_frame_layout.first_save_gpr <= RETURN_REGNUM 12606 && cfun_frame_layout.last_save_gpr >= RETURN_REGNUM))) 12607 return; 12608 12609 /* Search for prologue/epilogue insns and replace them. */ 12610 12611 for (insn = get_insns (); insn; insn = next_insn) 12612 { 12613 int first, last, off; 12614 rtx set, base, offset; 12615 rtx pat; 12616 12617 next_insn = NEXT_INSN (insn); 12618 12619 if (! NONJUMP_INSN_P (insn) || ! RTX_FRAME_RELATED_P (insn)) 12620 continue; 12621 12622 pat = PATTERN (insn); 12623 12624 /* Remove ldgr/lgdr instructions used for saving and restore 12625 GPRs if possible. */ 12626 if (TARGET_Z10) 12627 { 12628 rtx tmp_pat = pat; 12629 12630 if (INSN_CODE (insn) == CODE_FOR_stack_restore_from_fpr) 12631 tmp_pat = XVECEXP (pat, 0, 0); 12632 12633 if (GET_CODE (tmp_pat) == SET 12634 && GET_MODE (SET_SRC (tmp_pat)) == DImode 12635 && REG_P (SET_SRC (tmp_pat)) 12636 && REG_P (SET_DEST (tmp_pat))) 12637 { 12638 int src_regno = REGNO (SET_SRC (tmp_pat)); 12639 int dest_regno = REGNO (SET_DEST (tmp_pat)); 12640 int gpr_regno; 12641 int fpr_regno; 12642 12643 if (!((GENERAL_REGNO_P (src_regno) 12644 && FP_REGNO_P (dest_regno)) 12645 || (FP_REGNO_P (src_regno) 12646 && GENERAL_REGNO_P (dest_regno)))) 12647 continue; 12648 12649 gpr_regno = GENERAL_REGNO_P (src_regno) ? src_regno : dest_regno; 12650 fpr_regno = FP_REGNO_P (src_regno) ? src_regno : dest_regno; 12651 12652 /* GPR must be call-saved, FPR must be call-clobbered. */ 12653 if (!call_really_used_regs[fpr_regno] 12654 || call_really_used_regs[gpr_regno]) 12655 continue; 12656 12657 /* It must not happen that what we once saved in an FPR now 12658 needs a stack slot. */ 12659 gcc_assert (cfun_gpr_save_slot (gpr_regno) != SAVE_SLOT_STACK); 12660 12661 if (cfun_gpr_save_slot (gpr_regno) == SAVE_SLOT_NONE) 12662 { 12663 remove_insn (insn); 12664 continue; 12665 } 12666 } 12667 } 12668 12669 if (GET_CODE (pat) == PARALLEL 12670 && store_multiple_operation (pat, VOIDmode)) 12671 { 12672 set = XVECEXP (pat, 0, 0); 12673 first = REGNO (SET_SRC (set)); 12674 last = first + XVECLEN (pat, 0) - 1; 12675 offset = const0_rtx; 12676 base = eliminate_constant_term (XEXP (SET_DEST (set), 0), &offset); 12677 off = INTVAL (offset); 12678 12679 if (GET_CODE (base) != REG || off < 0) 12680 continue; 12681 if (cfun_frame_layout.first_save_gpr != -1 12682 && (cfun_frame_layout.first_save_gpr < first 12683 || cfun_frame_layout.last_save_gpr > last)) 12684 continue; 12685 if (REGNO (base) != STACK_POINTER_REGNUM 12686 && REGNO (base) != HARD_FRAME_POINTER_REGNUM) 12687 continue; 12688 if (first > BASE_REGNUM || last < BASE_REGNUM) 12689 continue; 12690 12691 if (cfun_frame_layout.first_save_gpr != -1) 12692 { 12693 rtx s_pat = save_gprs (base, 12694 off + (cfun_frame_layout.first_save_gpr 12695 - first) * UNITS_PER_LONG, 12696 cfun_frame_layout.first_save_gpr, 12697 cfun_frame_layout.last_save_gpr); 12698 new_insn = emit_insn_before (s_pat, insn); 12699 INSN_ADDRESSES_NEW (new_insn, -1); 12700 } 12701 12702 remove_insn (insn); 12703 continue; 12704 } 12705 12706 if (cfun_frame_layout.first_save_gpr == -1 12707 && GET_CODE (pat) == SET 12708 && GENERAL_REG_P (SET_SRC (pat)) 12709 && GET_CODE (SET_DEST (pat)) == MEM) 12710 { 12711 set = pat; 12712 first = REGNO (SET_SRC (set)); 12713 offset = const0_rtx; 12714 base = eliminate_constant_term (XEXP (SET_DEST (set), 0), &offset); 12715 off = INTVAL (offset); 12716 12717 if (GET_CODE (base) != REG || off < 0) 12718 continue; 12719 if (REGNO (base) != STACK_POINTER_REGNUM 12720 && REGNO (base) != HARD_FRAME_POINTER_REGNUM) 12721 continue; 12722 12723 remove_insn (insn); 12724 continue; 12725 } 12726 12727 if (GET_CODE (pat) == PARALLEL 12728 && load_multiple_operation (pat, VOIDmode)) 12729 { 12730 set = XVECEXP (pat, 0, 0); 12731 first = REGNO (SET_DEST (set)); 12732 last = first + XVECLEN (pat, 0) - 1; 12733 offset = const0_rtx; 12734 base = eliminate_constant_term (XEXP (SET_SRC (set), 0), &offset); 12735 off = INTVAL (offset); 12736 12737 if (GET_CODE (base) != REG || off < 0) 12738 continue; 12739 12740 if (cfun_frame_layout.first_restore_gpr != -1 12741 && (cfun_frame_layout.first_restore_gpr < first 12742 || cfun_frame_layout.last_restore_gpr > last)) 12743 continue; 12744 if (REGNO (base) != STACK_POINTER_REGNUM 12745 && REGNO (base) != HARD_FRAME_POINTER_REGNUM) 12746 continue; 12747 if (first > BASE_REGNUM || last < BASE_REGNUM) 12748 continue; 12749 12750 if (cfun_frame_layout.first_restore_gpr != -1) 12751 { 12752 rtx rpat = restore_gprs (base, 12753 off + (cfun_frame_layout.first_restore_gpr 12754 - first) * UNITS_PER_LONG, 12755 cfun_frame_layout.first_restore_gpr, 12756 cfun_frame_layout.last_restore_gpr); 12757 12758 /* Remove REG_CFA_RESTOREs for registers that we no 12759 longer need to save. */ 12760 REG_NOTES (rpat) = REG_NOTES (insn); 12761 for (rtx *ptr = ®_NOTES (rpat); *ptr; ) 12762 if (REG_NOTE_KIND (*ptr) == REG_CFA_RESTORE 12763 && ((int) REGNO (XEXP (*ptr, 0)) 12764 < cfun_frame_layout.first_restore_gpr)) 12765 *ptr = XEXP (*ptr, 1); 12766 else 12767 ptr = &XEXP (*ptr, 1); 12768 new_insn = emit_insn_before (rpat, insn); 12769 RTX_FRAME_RELATED_P (new_insn) = 1; 12770 INSN_ADDRESSES_NEW (new_insn, -1); 12771 } 12772 12773 remove_insn (insn); 12774 continue; 12775 } 12776 12777 if (cfun_frame_layout.first_restore_gpr == -1 12778 && GET_CODE (pat) == SET 12779 && GENERAL_REG_P (SET_DEST (pat)) 12780 && GET_CODE (SET_SRC (pat)) == MEM) 12781 { 12782 set = pat; 12783 first = REGNO (SET_DEST (set)); 12784 offset = const0_rtx; 12785 base = eliminate_constant_term (XEXP (SET_SRC (set), 0), &offset); 12786 off = INTVAL (offset); 12787 12788 if (GET_CODE (base) != REG || off < 0) 12789 continue; 12790 12791 if (REGNO (base) != STACK_POINTER_REGNUM 12792 && REGNO (base) != HARD_FRAME_POINTER_REGNUM) 12793 continue; 12794 12795 remove_insn (insn); 12796 continue; 12797 } 12798 } 12799} 12800 12801/* On z10 and later the dynamic branch prediction must see the 12802 backward jump within a certain windows. If not it falls back to 12803 the static prediction. This function rearranges the loop backward 12804 branch in a way which makes the static prediction always correct. 12805 The function returns true if it added an instruction. */ 12806static bool 12807s390_fix_long_loop_prediction (rtx_insn *insn) 12808{ 12809 rtx set = single_set (insn); 12810 rtx code_label, label_ref, new_label; 12811 rtx_insn *uncond_jump; 12812 rtx_insn *cur_insn; 12813 rtx tmp; 12814 int distance; 12815 12816 /* This will exclude branch on count and branch on index patterns 12817 since these are correctly statically predicted. */ 12818 if (!set 12819 || SET_DEST (set) != pc_rtx 12820 || GET_CODE (SET_SRC(set)) != IF_THEN_ELSE) 12821 return false; 12822 12823 /* Skip conditional returns. */ 12824 if (ANY_RETURN_P (XEXP (SET_SRC (set), 1)) 12825 && XEXP (SET_SRC (set), 2) == pc_rtx) 12826 return false; 12827 12828 label_ref = (GET_CODE (XEXP (SET_SRC (set), 1)) == LABEL_REF ? 12829 XEXP (SET_SRC (set), 1) : XEXP (SET_SRC (set), 2)); 12830 12831 gcc_assert (GET_CODE (label_ref) == LABEL_REF); 12832 12833 code_label = XEXP (label_ref, 0); 12834 12835 if (INSN_ADDRESSES (INSN_UID (code_label)) == -1 12836 || INSN_ADDRESSES (INSN_UID (insn)) == -1 12837 || (INSN_ADDRESSES (INSN_UID (insn)) 12838 - INSN_ADDRESSES (INSN_UID (code_label)) < PREDICT_DISTANCE)) 12839 return false; 12840 12841 for (distance = 0, cur_insn = PREV_INSN (insn); 12842 distance < PREDICT_DISTANCE - 6; 12843 distance += get_attr_length (cur_insn), cur_insn = PREV_INSN (cur_insn)) 12844 if (!cur_insn || JUMP_P (cur_insn) || LABEL_P (cur_insn)) 12845 return false; 12846 12847 new_label = gen_label_rtx (); 12848 uncond_jump = emit_jump_insn_after ( 12849 gen_rtx_SET (VOIDmode, pc_rtx, 12850 gen_rtx_LABEL_REF (VOIDmode, code_label)), 12851 insn); 12852 emit_label_after (new_label, uncond_jump); 12853 12854 tmp = XEXP (SET_SRC (set), 1); 12855 XEXP (SET_SRC (set), 1) = XEXP (SET_SRC (set), 2); 12856 XEXP (SET_SRC (set), 2) = tmp; 12857 INSN_CODE (insn) = -1; 12858 12859 XEXP (label_ref, 0) = new_label; 12860 JUMP_LABEL (insn) = new_label; 12861 JUMP_LABEL (uncond_jump) = code_label; 12862 12863 return true; 12864} 12865 12866/* Returns 1 if INSN reads the value of REG for purposes not related 12867 to addressing of memory, and 0 otherwise. */ 12868static int 12869s390_non_addr_reg_read_p (rtx reg, rtx_insn *insn) 12870{ 12871 return reg_referenced_p (reg, PATTERN (insn)) 12872 && !reg_used_in_mem_p (REGNO (reg), PATTERN (insn)); 12873} 12874 12875/* Starting from INSN find_cond_jump looks downwards in the insn 12876 stream for a single jump insn which is the last user of the 12877 condition code set in INSN. */ 12878static rtx_insn * 12879find_cond_jump (rtx_insn *insn) 12880{ 12881 for (; insn; insn = NEXT_INSN (insn)) 12882 { 12883 rtx ite, cc; 12884 12885 if (LABEL_P (insn)) 12886 break; 12887 12888 if (!JUMP_P (insn)) 12889 { 12890 if (reg_mentioned_p (gen_rtx_REG (CCmode, CC_REGNUM), insn)) 12891 break; 12892 continue; 12893 } 12894 12895 /* This will be triggered by a return. */ 12896 if (GET_CODE (PATTERN (insn)) != SET) 12897 break; 12898 12899 gcc_assert (SET_DEST (PATTERN (insn)) == pc_rtx); 12900 ite = SET_SRC (PATTERN (insn)); 12901 12902 if (GET_CODE (ite) != IF_THEN_ELSE) 12903 break; 12904 12905 cc = XEXP (XEXP (ite, 0), 0); 12906 if (!REG_P (cc) || !CC_REGNO_P (REGNO (cc))) 12907 break; 12908 12909 if (find_reg_note (insn, REG_DEAD, cc)) 12910 return insn; 12911 break; 12912 } 12913 12914 return NULL; 12915} 12916 12917/* Swap the condition in COND and the operands in OP0 and OP1 so that 12918 the semantics does not change. If NULL_RTX is passed as COND the 12919 function tries to find the conditional jump starting with INSN. */ 12920static void 12921s390_swap_cmp (rtx cond, rtx *op0, rtx *op1, rtx_insn *insn) 12922{ 12923 rtx tmp = *op0; 12924 12925 if (cond == NULL_RTX) 12926 { 12927 rtx_insn *jump = find_cond_jump (NEXT_INSN (insn)); 12928 rtx set = jump ? single_set (jump) : NULL_RTX; 12929 12930 if (set == NULL_RTX) 12931 return; 12932 12933 cond = XEXP (SET_SRC (set), 0); 12934 } 12935 12936 *op0 = *op1; 12937 *op1 = tmp; 12938 PUT_CODE (cond, swap_condition (GET_CODE (cond))); 12939} 12940 12941/* On z10, instructions of the compare-and-branch family have the 12942 property to access the register occurring as second operand with 12943 its bits complemented. If such a compare is grouped with a second 12944 instruction that accesses the same register non-complemented, and 12945 if that register's value is delivered via a bypass, then the 12946 pipeline recycles, thereby causing significant performance decline. 12947 This function locates such situations and exchanges the two 12948 operands of the compare. The function return true whenever it 12949 added an insn. */ 12950static bool 12951s390_z10_optimize_cmp (rtx_insn *insn) 12952{ 12953 rtx_insn *prev_insn, *next_insn; 12954 bool insn_added_p = false; 12955 rtx cond, *op0, *op1; 12956 12957 if (GET_CODE (PATTERN (insn)) == PARALLEL) 12958 { 12959 /* Handle compare and branch and branch on count 12960 instructions. */ 12961 rtx pattern = single_set (insn); 12962 12963 if (!pattern 12964 || SET_DEST (pattern) != pc_rtx 12965 || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE) 12966 return false; 12967 12968 cond = XEXP (SET_SRC (pattern), 0); 12969 op0 = &XEXP (cond, 0); 12970 op1 = &XEXP (cond, 1); 12971 } 12972 else if (GET_CODE (PATTERN (insn)) == SET) 12973 { 12974 rtx src, dest; 12975 12976 /* Handle normal compare instructions. */ 12977 src = SET_SRC (PATTERN (insn)); 12978 dest = SET_DEST (PATTERN (insn)); 12979 12980 if (!REG_P (dest) 12981 || !CC_REGNO_P (REGNO (dest)) 12982 || GET_CODE (src) != COMPARE) 12983 return false; 12984 12985 /* s390_swap_cmp will try to find the conditional 12986 jump when passing NULL_RTX as condition. */ 12987 cond = NULL_RTX; 12988 op0 = &XEXP (src, 0); 12989 op1 = &XEXP (src, 1); 12990 } 12991 else 12992 return false; 12993 12994 if (!REG_P (*op0) || !REG_P (*op1)) 12995 return false; 12996 12997 if (GET_MODE_CLASS (GET_MODE (*op0)) != MODE_INT) 12998 return false; 12999 13000 /* Swap the COMPARE arguments and its mask if there is a 13001 conflicting access in the previous insn. */ 13002 prev_insn = prev_active_insn (insn); 13003 if (prev_insn != NULL_RTX && INSN_P (prev_insn) 13004 && reg_referenced_p (*op1, PATTERN (prev_insn))) 13005 s390_swap_cmp (cond, op0, op1, insn); 13006 13007 /* Check if there is a conflict with the next insn. If there 13008 was no conflict with the previous insn, then swap the 13009 COMPARE arguments and its mask. If we already swapped 13010 the operands, or if swapping them would cause a conflict 13011 with the previous insn, issue a NOP after the COMPARE in 13012 order to separate the two instuctions. */ 13013 next_insn = next_active_insn (insn); 13014 if (next_insn != NULL_RTX && INSN_P (next_insn) 13015 && s390_non_addr_reg_read_p (*op1, next_insn)) 13016 { 13017 if (prev_insn != NULL_RTX && INSN_P (prev_insn) 13018 && s390_non_addr_reg_read_p (*op0, prev_insn)) 13019 { 13020 if (REGNO (*op1) == 0) 13021 emit_insn_after (gen_nop1 (), insn); 13022 else 13023 emit_insn_after (gen_nop (), insn); 13024 insn_added_p = true; 13025 } 13026 else 13027 s390_swap_cmp (cond, op0, op1, insn); 13028 } 13029 return insn_added_p; 13030} 13031 13032/* Perform machine-dependent processing. */ 13033 13034static void 13035s390_reorg (void) 13036{ 13037 bool pool_overflow = false; 13038 int hw_before, hw_after; 13039 13040 /* Make sure all splits have been performed; splits after 13041 machine_dependent_reorg might confuse insn length counts. */ 13042 split_all_insns_noflow (); 13043 13044 /* Install the main literal pool and the associated base 13045 register load insns. 13046 13047 In addition, there are two problematic situations we need 13048 to correct: 13049 13050 - the literal pool might be > 4096 bytes in size, so that 13051 some of its elements cannot be directly accessed 13052 13053 - a branch target might be > 64K away from the branch, so that 13054 it is not possible to use a PC-relative instruction. 13055 13056 To fix those, we split the single literal pool into multiple 13057 pool chunks, reloading the pool base register at various 13058 points throughout the function to ensure it always points to 13059 the pool chunk the following code expects, and / or replace 13060 PC-relative branches by absolute branches. 13061 13062 However, the two problems are interdependent: splitting the 13063 literal pool can move a branch further away from its target, 13064 causing the 64K limit to overflow, and on the other hand, 13065 replacing a PC-relative branch by an absolute branch means 13066 we need to put the branch target address into the literal 13067 pool, possibly causing it to overflow. 13068 13069 So, we loop trying to fix up both problems until we manage 13070 to satisfy both conditions at the same time. Note that the 13071 loop is guaranteed to terminate as every pass of the loop 13072 strictly decreases the total number of PC-relative branches 13073 in the function. (This is not completely true as there 13074 might be branch-over-pool insns introduced by chunkify_start. 13075 Those never need to be split however.) */ 13076 13077 for (;;) 13078 { 13079 struct constant_pool *pool = NULL; 13080 13081 /* Collect the literal pool. */ 13082 if (!pool_overflow) 13083 { 13084 pool = s390_mainpool_start (); 13085 if (!pool) 13086 pool_overflow = true; 13087 } 13088 13089 /* If literal pool overflowed, start to chunkify it. */ 13090 if (pool_overflow) 13091 pool = s390_chunkify_start (); 13092 13093 /* Split out-of-range branches. If this has created new 13094 literal pool entries, cancel current chunk list and 13095 recompute it. zSeries machines have large branch 13096 instructions, so we never need to split a branch. */ 13097 if (!TARGET_CPU_ZARCH && s390_split_branches ()) 13098 { 13099 if (pool_overflow) 13100 s390_chunkify_cancel (pool); 13101 else 13102 s390_mainpool_cancel (pool); 13103 13104 continue; 13105 } 13106 13107 /* If we made it up to here, both conditions are satisfied. 13108 Finish up literal pool related changes. */ 13109 if (pool_overflow) 13110 s390_chunkify_finish (pool); 13111 else 13112 s390_mainpool_finish (pool); 13113 13114 /* We're done splitting branches. */ 13115 cfun->machine->split_branches_pending_p = false; 13116 break; 13117 } 13118 13119 /* Generate out-of-pool execute target insns. */ 13120 if (TARGET_CPU_ZARCH) 13121 { 13122 rtx_insn *insn, *target; 13123 rtx label; 13124 13125 for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) 13126 { 13127 label = s390_execute_label (insn); 13128 if (!label) 13129 continue; 13130 13131 gcc_assert (label != const0_rtx); 13132 13133 target = emit_label (XEXP (label, 0)); 13134 INSN_ADDRESSES_NEW (target, -1); 13135 13136 target = emit_insn (s390_execute_target (insn)); 13137 INSN_ADDRESSES_NEW (target, -1); 13138 } 13139 } 13140 13141 /* Try to optimize prologue and epilogue further. */ 13142 s390_optimize_prologue (); 13143 13144 /* Walk over the insns and do some >=z10 specific changes. */ 13145 if (s390_tune == PROCESSOR_2097_Z10 13146 || s390_tune == PROCESSOR_2817_Z196 13147 || s390_tune == PROCESSOR_2827_ZEC12 13148 || s390_tune == PROCESSOR_2964_Z13) 13149 { 13150 rtx_insn *insn; 13151 bool insn_added_p = false; 13152 13153 /* The insn lengths and addresses have to be up to date for the 13154 following manipulations. */ 13155 shorten_branches (get_insns ()); 13156 13157 for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) 13158 { 13159 if (!INSN_P (insn) || INSN_CODE (insn) <= 0) 13160 continue; 13161 13162 if (JUMP_P (insn)) 13163 insn_added_p |= s390_fix_long_loop_prediction (insn); 13164 13165 if ((GET_CODE (PATTERN (insn)) == PARALLEL 13166 || GET_CODE (PATTERN (insn)) == SET) 13167 && s390_tune == PROCESSOR_2097_Z10) 13168 insn_added_p |= s390_z10_optimize_cmp (insn); 13169 } 13170 13171 /* Adjust branches if we added new instructions. */ 13172 if (insn_added_p) 13173 shorten_branches (get_insns ()); 13174 } 13175 13176 s390_function_num_hotpatch_hw (current_function_decl, &hw_before, &hw_after); 13177 if (hw_after > 0) 13178 { 13179 rtx_insn *insn; 13180 13181 /* Insert NOPs for hotpatching. */ 13182 for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) 13183 /* Emit NOPs 13184 1. inside the area covered by debug information to allow setting 13185 breakpoints at the NOPs, 13186 2. before any insn which results in an asm instruction, 13187 3. before in-function labels to avoid jumping to the NOPs, for 13188 example as part of a loop, 13189 4. before any barrier in case the function is completely empty 13190 (__builtin_unreachable ()) and has neither internal labels nor 13191 active insns. 13192 */ 13193 if (active_insn_p (insn) || BARRIER_P (insn) || LABEL_P (insn)) 13194 break; 13195 /* Output a series of NOPs before the first active insn. */ 13196 while (insn && hw_after > 0) 13197 { 13198 if (hw_after >= 3 && TARGET_CPU_ZARCH) 13199 { 13200 emit_insn_before (gen_nop_6_byte (), insn); 13201 hw_after -= 3; 13202 } 13203 else if (hw_after >= 2) 13204 { 13205 emit_insn_before (gen_nop_4_byte (), insn); 13206 hw_after -= 2; 13207 } 13208 else 13209 { 13210 emit_insn_before (gen_nop_2_byte (), insn); 13211 hw_after -= 1; 13212 } 13213 } 13214 } 13215} 13216 13217/* Return true if INSN is a fp load insn writing register REGNO. */ 13218static inline bool 13219s390_fpload_toreg (rtx_insn *insn, unsigned int regno) 13220{ 13221 rtx set; 13222 enum attr_type flag = s390_safe_attr_type (insn); 13223 13224 if (flag != TYPE_FLOADSF && flag != TYPE_FLOADDF) 13225 return false; 13226 13227 set = single_set (insn); 13228 13229 if (set == NULL_RTX) 13230 return false; 13231 13232 if (!REG_P (SET_DEST (set)) || !MEM_P (SET_SRC (set))) 13233 return false; 13234 13235 if (REGNO (SET_DEST (set)) != regno) 13236 return false; 13237 13238 return true; 13239} 13240 13241/* This value describes the distance to be avoided between an 13242 aritmetic fp instruction and an fp load writing the same register. 13243 Z10_EARLYLOAD_DISTANCE - 1 as well as Z10_EARLYLOAD_DISTANCE + 1 is 13244 fine but the exact value has to be avoided. Otherwise the FP 13245 pipeline will throw an exception causing a major penalty. */ 13246#define Z10_EARLYLOAD_DISTANCE 7 13247 13248/* Rearrange the ready list in order to avoid the situation described 13249 for Z10_EARLYLOAD_DISTANCE. A problematic load instruction is 13250 moved to the very end of the ready list. */ 13251static void 13252s390_z10_prevent_earlyload_conflicts (rtx_insn **ready, int *nready_p) 13253{ 13254 unsigned int regno; 13255 int nready = *nready_p; 13256 rtx_insn *tmp; 13257 int i; 13258 rtx_insn *insn; 13259 rtx set; 13260 enum attr_type flag; 13261 int distance; 13262 13263 /* Skip DISTANCE - 1 active insns. */ 13264 for (insn = last_scheduled_insn, distance = Z10_EARLYLOAD_DISTANCE - 1; 13265 distance > 0 && insn != NULL_RTX; 13266 distance--, insn = prev_active_insn (insn)) 13267 if (CALL_P (insn) || JUMP_P (insn)) 13268 return; 13269 13270 if (insn == NULL_RTX) 13271 return; 13272 13273 set = single_set (insn); 13274 13275 if (set == NULL_RTX || !REG_P (SET_DEST (set)) 13276 || GET_MODE_CLASS (GET_MODE (SET_DEST (set))) != MODE_FLOAT) 13277 return; 13278 13279 flag = s390_safe_attr_type (insn); 13280 13281 if (flag == TYPE_FLOADSF || flag == TYPE_FLOADDF) 13282 return; 13283 13284 regno = REGNO (SET_DEST (set)); 13285 i = nready - 1; 13286 13287 while (!s390_fpload_toreg (ready[i], regno) && i > 0) 13288 i--; 13289 13290 if (!i) 13291 return; 13292 13293 tmp = ready[i]; 13294 memmove (&ready[1], &ready[0], sizeof (rtx_insn *) * i); 13295 ready[0] = tmp; 13296} 13297 13298 13299/* The s390_sched_state variable tracks the state of the current or 13300 the last instruction group. 13301 13302 0,1,2 number of instructions scheduled in the current group 13303 3 the last group is complete - normal insns 13304 4 the last group was a cracked/expanded insn */ 13305 13306static int s390_sched_state; 13307 13308#define S390_SCHED_STATE_NORMAL 3 13309#define S390_SCHED_STATE_CRACKED 4 13310 13311#define S390_SCHED_ATTR_MASK_CRACKED 0x1 13312#define S390_SCHED_ATTR_MASK_EXPANDED 0x2 13313#define S390_SCHED_ATTR_MASK_ENDGROUP 0x4 13314#define S390_SCHED_ATTR_MASK_GROUPALONE 0x8 13315 13316static unsigned int 13317s390_get_sched_attrmask (rtx_insn *insn) 13318{ 13319 unsigned int mask = 0; 13320 13321 switch (s390_tune) 13322 { 13323 case PROCESSOR_2827_ZEC12: 13324 if (get_attr_zEC12_cracked (insn)) 13325 mask |= S390_SCHED_ATTR_MASK_CRACKED; 13326 if (get_attr_zEC12_expanded (insn)) 13327 mask |= S390_SCHED_ATTR_MASK_EXPANDED; 13328 if (get_attr_zEC12_endgroup (insn)) 13329 mask |= S390_SCHED_ATTR_MASK_ENDGROUP; 13330 if (get_attr_zEC12_groupalone (insn)) 13331 mask |= S390_SCHED_ATTR_MASK_GROUPALONE; 13332 break; 13333 case PROCESSOR_2964_Z13: 13334 if (get_attr_z13_cracked (insn)) 13335 mask |= S390_SCHED_ATTR_MASK_CRACKED; 13336 if (get_attr_z13_expanded (insn)) 13337 mask |= S390_SCHED_ATTR_MASK_EXPANDED; 13338 if (get_attr_z13_endgroup (insn)) 13339 mask |= S390_SCHED_ATTR_MASK_ENDGROUP; 13340 if (get_attr_z13_groupalone (insn)) 13341 mask |= S390_SCHED_ATTR_MASK_GROUPALONE; 13342 break; 13343 default: 13344 gcc_unreachable (); 13345 } 13346 return mask; 13347} 13348 13349static unsigned int 13350s390_get_unit_mask (rtx_insn *insn, int *units) 13351{ 13352 unsigned int mask = 0; 13353 13354 switch (s390_tune) 13355 { 13356 case PROCESSOR_2964_Z13: 13357 *units = 3; 13358 if (get_attr_z13_unit_lsu (insn)) 13359 mask |= 1 << 0; 13360 if (get_attr_z13_unit_fxu (insn)) 13361 mask |= 1 << 1; 13362 if (get_attr_z13_unit_vfu (insn)) 13363 mask |= 1 << 2; 13364 break; 13365 default: 13366 gcc_unreachable (); 13367 } 13368 return mask; 13369} 13370 13371/* Return the scheduling score for INSN. The higher the score the 13372 better. The score is calculated from the OOO scheduling attributes 13373 of INSN and the scheduling state s390_sched_state. */ 13374static int 13375s390_sched_score (rtx_insn *insn) 13376{ 13377 unsigned int mask = s390_get_sched_attrmask (insn); 13378 int score = 0; 13379 13380 switch (s390_sched_state) 13381 { 13382 case 0: 13383 /* Try to put insns into the first slot which would otherwise 13384 break a group. */ 13385 if ((mask & S390_SCHED_ATTR_MASK_CRACKED) != 0 13386 || (mask & S390_SCHED_ATTR_MASK_EXPANDED) != 0) 13387 score += 5; 13388 if ((mask & S390_SCHED_ATTR_MASK_GROUPALONE) != 0) 13389 score += 10; 13390 case 1: 13391 /* Prefer not cracked insns while trying to put together a 13392 group. */ 13393 if ((mask & S390_SCHED_ATTR_MASK_CRACKED) == 0 13394 && (mask & S390_SCHED_ATTR_MASK_EXPANDED) == 0 13395 && (mask & S390_SCHED_ATTR_MASK_GROUPALONE) == 0) 13396 score += 10; 13397 if ((mask & S390_SCHED_ATTR_MASK_ENDGROUP) == 0) 13398 score += 5; 13399 break; 13400 case 2: 13401 /* Prefer not cracked insns while trying to put together a 13402 group. */ 13403 if ((mask & S390_SCHED_ATTR_MASK_CRACKED) == 0 13404 && (mask & S390_SCHED_ATTR_MASK_EXPANDED) == 0 13405 && (mask & S390_SCHED_ATTR_MASK_GROUPALONE) == 0) 13406 score += 10; 13407 /* Prefer endgroup insns in the last slot. */ 13408 if ((mask & S390_SCHED_ATTR_MASK_ENDGROUP) != 0) 13409 score += 10; 13410 break; 13411 case S390_SCHED_STATE_NORMAL: 13412 /* Prefer not cracked insns if the last was not cracked. */ 13413 if ((mask & S390_SCHED_ATTR_MASK_CRACKED) == 0 13414 && (mask & S390_SCHED_ATTR_MASK_EXPANDED) == 0) 13415 score += 5; 13416 if ((mask & S390_SCHED_ATTR_MASK_GROUPALONE) != 0) 13417 score += 10; 13418 break; 13419 case S390_SCHED_STATE_CRACKED: 13420 /* Try to keep cracked insns together to prevent them from 13421 interrupting groups. */ 13422 if ((mask & S390_SCHED_ATTR_MASK_CRACKED) != 0 13423 || (mask & S390_SCHED_ATTR_MASK_EXPANDED) != 0) 13424 score += 5; 13425 break; 13426 } 13427 13428 if (s390_tune == PROCESSOR_2964_Z13) 13429 { 13430 int units, i; 13431 unsigned unit_mask, m = 1; 13432 13433 unit_mask = s390_get_unit_mask (insn, &units); 13434 gcc_assert (units <= MAX_SCHED_UNITS); 13435 13436 /* Add a score in range 0..MAX_SCHED_MIX_SCORE depending on how long 13437 ago the last insn of this unit type got scheduled. This is 13438 supposed to help providing a proper instruction mix to the 13439 CPU. */ 13440 for (i = 0; i < units; i++, m <<= 1) 13441 if (m & unit_mask) 13442 score += (last_scheduled_unit_distance[i] * MAX_SCHED_MIX_SCORE / 13443 MAX_SCHED_MIX_DISTANCE); 13444 } 13445 return score; 13446} 13447 13448/* This function is called via hook TARGET_SCHED_REORDER before 13449 issuing one insn from list READY which contains *NREADYP entries. 13450 For target z10 it reorders load instructions to avoid early load 13451 conflicts in the floating point pipeline */ 13452static int 13453s390_sched_reorder (FILE *file, int verbose, 13454 rtx_insn **ready, int *nreadyp, int clock ATTRIBUTE_UNUSED) 13455{ 13456 if (s390_tune == PROCESSOR_2097_Z10) 13457 if (reload_completed && *nreadyp > 1) 13458 s390_z10_prevent_earlyload_conflicts (ready, nreadyp); 13459 13460 if ((s390_tune == PROCESSOR_2827_ZEC12 13461 || s390_tune == PROCESSOR_2964_Z13) 13462 && reload_completed 13463 && *nreadyp > 1) 13464 { 13465 int i; 13466 int last_index = *nreadyp - 1; 13467 int max_index = -1; 13468 int max_score = -1; 13469 rtx_insn *tmp; 13470 13471 /* Just move the insn with the highest score to the top (the 13472 end) of the list. A full sort is not needed since a conflict 13473 in the hazard recognition cannot happen. So the top insn in 13474 the ready list will always be taken. */ 13475 for (i = last_index; i >= 0; i--) 13476 { 13477 int score; 13478 13479 if (recog_memoized (ready[i]) < 0) 13480 continue; 13481 13482 score = s390_sched_score (ready[i]); 13483 if (score > max_score) 13484 { 13485 max_score = score; 13486 max_index = i; 13487 } 13488 } 13489 13490 if (max_index != -1) 13491 { 13492 if (max_index != last_index) 13493 { 13494 tmp = ready[max_index]; 13495 ready[max_index] = ready[last_index]; 13496 ready[last_index] = tmp; 13497 13498 if (verbose > 5) 13499 fprintf (file, 13500 ";;\t\tBACKEND: move insn %d to the top of list\n", 13501 INSN_UID (ready[last_index])); 13502 } 13503 else if (verbose > 5) 13504 fprintf (file, 13505 ";;\t\tBACKEND: best insn %d already on top\n", 13506 INSN_UID (ready[last_index])); 13507 } 13508 13509 if (verbose > 5) 13510 { 13511 fprintf (file, "ready list ooo attributes - sched state: %d\n", 13512 s390_sched_state); 13513 13514 for (i = last_index; i >= 0; i--) 13515 { 13516 unsigned int sched_mask; 13517 rtx_insn *insn = ready[i]; 13518 13519 if (recog_memoized (insn) < 0) 13520 continue; 13521 13522 sched_mask = s390_get_sched_attrmask (insn); 13523 fprintf (file, ";;\t\tBACKEND: insn %d score: %d: ", 13524 INSN_UID (insn), 13525 s390_sched_score (insn)); 13526#define PRINT_SCHED_ATTR(M, ATTR) fprintf (file, "%s ",\ 13527 ((M) & sched_mask) ? #ATTR : ""); 13528 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_CRACKED, cracked); 13529 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_EXPANDED, expanded); 13530 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_ENDGROUP, endgroup); 13531 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_GROUPALONE, groupalone); 13532#undef PRINT_SCHED_ATTR 13533 if (s390_tune == PROCESSOR_2964_Z13) 13534 { 13535 unsigned int unit_mask, m = 1; 13536 int units, j; 13537 13538 unit_mask = s390_get_unit_mask (insn, &units); 13539 fprintf (file, "(units:"); 13540 for (j = 0; j < units; j++, m <<= 1) 13541 if (m & unit_mask) 13542 fprintf (file, " u%d", j); 13543 fprintf (file, ")"); 13544 } 13545 fprintf (file, "\n"); 13546 } 13547 } 13548 } 13549 13550 return s390_issue_rate (); 13551} 13552 13553 13554/* This function is called via hook TARGET_SCHED_VARIABLE_ISSUE after 13555 the scheduler has issued INSN. It stores the last issued insn into 13556 last_scheduled_insn in order to make it available for 13557 s390_sched_reorder. */ 13558static int 13559s390_sched_variable_issue (FILE *file, int verbose, rtx_insn *insn, int more) 13560{ 13561 last_scheduled_insn = insn; 13562 13563 if ((s390_tune == PROCESSOR_2827_ZEC12 13564 || s390_tune == PROCESSOR_2964_Z13) 13565 && reload_completed 13566 && recog_memoized (insn) >= 0) 13567 { 13568 unsigned int mask = s390_get_sched_attrmask (insn); 13569 13570 if ((mask & S390_SCHED_ATTR_MASK_CRACKED) != 0 13571 || (mask & S390_SCHED_ATTR_MASK_EXPANDED) != 0) 13572 s390_sched_state = S390_SCHED_STATE_CRACKED; 13573 else if ((mask & S390_SCHED_ATTR_MASK_ENDGROUP) != 0 13574 || (mask & S390_SCHED_ATTR_MASK_GROUPALONE) != 0) 13575 s390_sched_state = S390_SCHED_STATE_NORMAL; 13576 else 13577 { 13578 /* Only normal insns are left (mask == 0). */ 13579 switch (s390_sched_state) 13580 { 13581 case 0: 13582 case 1: 13583 case 2: 13584 case S390_SCHED_STATE_NORMAL: 13585 if (s390_sched_state == S390_SCHED_STATE_NORMAL) 13586 s390_sched_state = 1; 13587 else 13588 s390_sched_state++; 13589 13590 break; 13591 case S390_SCHED_STATE_CRACKED: 13592 s390_sched_state = S390_SCHED_STATE_NORMAL; 13593 break; 13594 } 13595 } 13596 13597 if (s390_tune == PROCESSOR_2964_Z13) 13598 { 13599 int units, i; 13600 unsigned unit_mask, m = 1; 13601 13602 unit_mask = s390_get_unit_mask (insn, &units); 13603 gcc_assert (units <= MAX_SCHED_UNITS); 13604 13605 for (i = 0; i < units; i++, m <<= 1) 13606 if (m & unit_mask) 13607 last_scheduled_unit_distance[i] = 0; 13608 else if (last_scheduled_unit_distance[i] < MAX_SCHED_MIX_DISTANCE) 13609 last_scheduled_unit_distance[i]++; 13610 } 13611 13612 if (verbose > 5) 13613 { 13614 unsigned int sched_mask; 13615 13616 sched_mask = s390_get_sched_attrmask (insn); 13617 13618 fprintf (file, ";;\t\tBACKEND: insn %d: ", INSN_UID (insn)); 13619#define PRINT_SCHED_ATTR(M, ATTR) fprintf (file, "%s ", ((M) & sched_mask) ? #ATTR : ""); 13620 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_CRACKED, cracked); 13621 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_EXPANDED, expanded); 13622 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_ENDGROUP, endgroup); 13623 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_GROUPALONE, groupalone); 13624#undef PRINT_SCHED_ATTR 13625 13626 if (s390_tune == PROCESSOR_2964_Z13) 13627 { 13628 unsigned int unit_mask, m = 1; 13629 int units, j; 13630 13631 unit_mask = s390_get_unit_mask (insn, &units); 13632 fprintf (file, "(units:"); 13633 for (j = 0; j < units; j++, m <<= 1) 13634 if (m & unit_mask) 13635 fprintf (file, " %d", j); 13636 fprintf (file, ")"); 13637 } 13638 fprintf (file, " sched state: %d\n", s390_sched_state); 13639 13640 if (s390_tune == PROCESSOR_2964_Z13) 13641 { 13642 int units, j; 13643 13644 s390_get_unit_mask (insn, &units); 13645 13646 fprintf (file, ";;\t\tBACKEND: units unused for: "); 13647 for (j = 0; j < units; j++) 13648 fprintf (file, "%d:%d ", j, last_scheduled_unit_distance[j]); 13649 fprintf (file, "\n"); 13650 } 13651 } 13652 } 13653 13654 if (GET_CODE (PATTERN (insn)) != USE 13655 && GET_CODE (PATTERN (insn)) != CLOBBER) 13656 return more - 1; 13657 else 13658 return more; 13659} 13660 13661static void 13662s390_sched_init (FILE *file ATTRIBUTE_UNUSED, 13663 int verbose ATTRIBUTE_UNUSED, 13664 int max_ready ATTRIBUTE_UNUSED) 13665{ 13666 last_scheduled_insn = NULL; 13667 memset (last_scheduled_unit_distance, 0, MAX_SCHED_UNITS * sizeof (int)); 13668 s390_sched_state = 0; 13669} 13670 13671/* This target hook implementation for TARGET_LOOP_UNROLL_ADJUST calculates 13672 a new number struct loop *loop should be unrolled if tuned for cpus with 13673 a built-in stride prefetcher. 13674 The loop is analyzed for memory accesses by calling check_dpu for 13675 each rtx of the loop. Depending on the loop_depth and the amount of 13676 memory accesses a new number <=nunroll is returned to improve the 13677 behaviour of the hardware prefetch unit. */ 13678static unsigned 13679s390_loop_unroll_adjust (unsigned nunroll, struct loop *loop) 13680{ 13681 basic_block *bbs; 13682 rtx_insn *insn; 13683 unsigned i; 13684 unsigned mem_count = 0; 13685 13686 if (s390_tune != PROCESSOR_2097_Z10 13687 && s390_tune != PROCESSOR_2817_Z196 13688 && s390_tune != PROCESSOR_2827_ZEC12 13689 && s390_tune != PROCESSOR_2964_Z13) 13690 return nunroll; 13691 13692 /* Count the number of memory references within the loop body. */ 13693 bbs = get_loop_body (loop); 13694 subrtx_iterator::array_type array; 13695 for (i = 0; i < loop->num_nodes; i++) 13696 FOR_BB_INSNS (bbs[i], insn) 13697 if (INSN_P (insn) && INSN_CODE (insn) != -1) 13698 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST) 13699 if (MEM_P (*iter)) 13700 mem_count += 1; 13701 free (bbs); 13702 13703 /* Prevent division by zero, and we do not need to adjust nunroll in this case. */ 13704 if (mem_count == 0) 13705 return nunroll; 13706 13707 switch (loop_depth(loop)) 13708 { 13709 case 1: 13710 return MIN (nunroll, 28 / mem_count); 13711 case 2: 13712 return MIN (nunroll, 22 / mem_count); 13713 default: 13714 return MIN (nunroll, 16 / mem_count); 13715 } 13716} 13717 13718static void 13719s390_option_override (void) 13720{ 13721 unsigned int i; 13722 cl_deferred_option *opt; 13723 vec<cl_deferred_option> *v = 13724 (vec<cl_deferred_option> *) s390_deferred_options; 13725 13726 if (v) 13727 FOR_EACH_VEC_ELT (*v, i, opt) 13728 { 13729 switch (opt->opt_index) 13730 { 13731 case OPT_mhotpatch_: 13732 { 13733 int val1; 13734 int val2; 13735 char s[256]; 13736 char *t; 13737 13738 strncpy (s, opt->arg, 256); 13739 s[255] = 0; 13740 t = strchr (s, ','); 13741 if (t != NULL) 13742 { 13743 *t = 0; 13744 t++; 13745 val1 = integral_argument (s); 13746 val2 = integral_argument (t); 13747 } 13748 else 13749 { 13750 val1 = -1; 13751 val2 = -1; 13752 } 13753 if (val1 == -1 || val2 == -1) 13754 { 13755 /* argument is not a plain number */ 13756 error ("arguments to %qs should be non-negative integers", 13757 "-mhotpatch=n,m"); 13758 break; 13759 } 13760 else if (val1 > s390_hotpatch_hw_max 13761 || val2 > s390_hotpatch_hw_max) 13762 { 13763 error ("argument to %qs is too large (max. %d)", 13764 "-mhotpatch=n,m", s390_hotpatch_hw_max); 13765 break; 13766 } 13767 s390_hotpatch_hw_before_label = val1; 13768 s390_hotpatch_hw_after_label = val2; 13769 break; 13770 } 13771 default: 13772 gcc_unreachable (); 13773 } 13774 } 13775 13776 /* Set up function hooks. */ 13777 init_machine_status = s390_init_machine_status; 13778 13779 /* Architecture mode defaults according to ABI. */ 13780 if (!(target_flags_explicit & MASK_ZARCH)) 13781 { 13782 if (TARGET_64BIT) 13783 target_flags |= MASK_ZARCH; 13784 else 13785 target_flags &= ~MASK_ZARCH; 13786 } 13787 13788 /* Set the march default in case it hasn't been specified on 13789 cmdline. */ 13790 if (s390_arch == PROCESSOR_max) 13791 { 13792 s390_arch_string = TARGET_ZARCH? "z900" : "g5"; 13793 s390_arch = TARGET_ZARCH ? PROCESSOR_2064_Z900 : PROCESSOR_9672_G5; 13794 s390_arch_flags = processor_flags_table[(int)s390_arch]; 13795 } 13796 13797 /* Determine processor to tune for. */ 13798 if (s390_tune == PROCESSOR_max) 13799 { 13800 s390_tune = s390_arch; 13801 s390_tune_flags = s390_arch_flags; 13802 } 13803 13804 /* Sanity checks. */ 13805 if (TARGET_ZARCH && !TARGET_CPU_ZARCH) 13806 error ("z/Architecture mode not supported on %s", s390_arch_string); 13807 if (TARGET_64BIT && !TARGET_ZARCH) 13808 error ("64-bit ABI not supported in ESA/390 mode"); 13809 13810 /* Use hardware DFP if available and not explicitly disabled by 13811 user. E.g. with -m31 -march=z10 -mzarch */ 13812 if (!(target_flags_explicit & MASK_HARD_DFP) && TARGET_DFP) 13813 target_flags |= MASK_HARD_DFP; 13814 13815 /* Enable hardware transactions if available and not explicitly 13816 disabled by user. E.g. with -m31 -march=zEC12 -mzarch */ 13817 if (!(target_flags_explicit & MASK_OPT_HTM) && TARGET_CPU_HTM && TARGET_ZARCH) 13818 target_flags |= MASK_OPT_HTM; 13819 13820 if (target_flags_explicit & MASK_OPT_VX) 13821 { 13822 if (TARGET_OPT_VX) 13823 { 13824 if (!TARGET_CPU_VX) 13825 error ("hardware vector support not available on %s", 13826 s390_arch_string); 13827 if (TARGET_SOFT_FLOAT) 13828 error ("hardware vector support not available with -msoft-float"); 13829 } 13830 } 13831 else if (TARGET_CPU_VX) 13832 /* Enable vector support if available and not explicitly disabled 13833 by user. E.g. with -m31 -march=z13 -mzarch */ 13834 target_flags |= MASK_OPT_VX; 13835 13836 if (TARGET_HARD_DFP && !TARGET_DFP) 13837 { 13838 if (target_flags_explicit & MASK_HARD_DFP) 13839 { 13840 if (!TARGET_CPU_DFP) 13841 error ("hardware decimal floating point instructions" 13842 " not available on %s", s390_arch_string); 13843 if (!TARGET_ZARCH) 13844 error ("hardware decimal floating point instructions" 13845 " not available in ESA/390 mode"); 13846 } 13847 else 13848 target_flags &= ~MASK_HARD_DFP; 13849 } 13850 13851 if ((target_flags_explicit & MASK_SOFT_FLOAT) && TARGET_SOFT_FLOAT) 13852 { 13853 if ((target_flags_explicit & MASK_HARD_DFP) && TARGET_HARD_DFP) 13854 error ("-mhard-dfp can%'t be used in conjunction with -msoft-float"); 13855 13856 target_flags &= ~MASK_HARD_DFP; 13857 } 13858 13859 /* Set processor cost function. */ 13860 switch (s390_tune) 13861 { 13862 case PROCESSOR_2084_Z990: 13863 s390_cost = &z990_cost; 13864 break; 13865 case PROCESSOR_2094_Z9_109: 13866 s390_cost = &z9_109_cost; 13867 break; 13868 case PROCESSOR_2097_Z10: 13869 s390_cost = &z10_cost; 13870 break; 13871 case PROCESSOR_2817_Z196: 13872 s390_cost = &z196_cost; 13873 break; 13874 case PROCESSOR_2827_ZEC12: 13875 case PROCESSOR_2964_Z13: 13876 s390_cost = &zEC12_cost; 13877 break; 13878 default: 13879 s390_cost = &z900_cost; 13880 } 13881 13882 if (TARGET_BACKCHAIN && TARGET_PACKED_STACK && TARGET_HARD_FLOAT) 13883 error ("-mbackchain -mpacked-stack -mhard-float are not supported " 13884 "in combination"); 13885 13886 if (s390_stack_size) 13887 { 13888 if (s390_stack_guard >= s390_stack_size) 13889 error ("stack size must be greater than the stack guard value"); 13890 else if (s390_stack_size > 1 << 16) 13891 error ("stack size must not be greater than 64k"); 13892 } 13893 else if (s390_stack_guard) 13894 error ("-mstack-guard implies use of -mstack-size"); 13895 13896#ifdef TARGET_DEFAULT_LONG_DOUBLE_128 13897 if (!(target_flags_explicit & MASK_LONG_DOUBLE_128)) 13898 target_flags |= MASK_LONG_DOUBLE_128; 13899#endif 13900 13901 if (s390_tune == PROCESSOR_2097_Z10 13902 || s390_tune == PROCESSOR_2817_Z196 13903 || s390_tune == PROCESSOR_2827_ZEC12 13904 || s390_tune == PROCESSOR_2964_Z13) 13905 { 13906 maybe_set_param_value (PARAM_MAX_UNROLLED_INSNS, 100, 13907 global_options.x_param_values, 13908 global_options_set.x_param_values); 13909 maybe_set_param_value (PARAM_MAX_UNROLL_TIMES, 32, 13910 global_options.x_param_values, 13911 global_options_set.x_param_values); 13912 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEELED_INSNS, 2000, 13913 global_options.x_param_values, 13914 global_options_set.x_param_values); 13915 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEEL_TIMES, 64, 13916 global_options.x_param_values, 13917 global_options_set.x_param_values); 13918 } 13919 13920 maybe_set_param_value (PARAM_MAX_PENDING_LIST_LENGTH, 256, 13921 global_options.x_param_values, 13922 global_options_set.x_param_values); 13923 /* values for loop prefetching */ 13924 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE, 256, 13925 global_options.x_param_values, 13926 global_options_set.x_param_values); 13927 maybe_set_param_value (PARAM_L1_CACHE_SIZE, 128, 13928 global_options.x_param_values, 13929 global_options_set.x_param_values); 13930 /* s390 has more than 2 levels and the size is much larger. Since 13931 we are always running virtualized assume that we only get a small 13932 part of the caches above l1. */ 13933 maybe_set_param_value (PARAM_L2_CACHE_SIZE, 1500, 13934 global_options.x_param_values, 13935 global_options_set.x_param_values); 13936 maybe_set_param_value (PARAM_PREFETCH_MIN_INSN_TO_MEM_RATIO, 2, 13937 global_options.x_param_values, 13938 global_options_set.x_param_values); 13939 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES, 6, 13940 global_options.x_param_values, 13941 global_options_set.x_param_values); 13942 13943 /* This cannot reside in s390_option_optimization_table since HAVE_prefetch 13944 requires the arch flags to be evaluated already. Since prefetching 13945 is beneficial on s390, we enable it if available. */ 13946 if (flag_prefetch_loop_arrays < 0 && HAVE_prefetch && optimize >= 3) 13947 flag_prefetch_loop_arrays = 1; 13948 13949 /* Use the alternative scheduling-pressure algorithm by default. */ 13950 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, 2, 13951 global_options.x_param_values, 13952 global_options_set.x_param_values); 13953 13954 if (TARGET_TPF) 13955 { 13956 /* Don't emit DWARF3/4 unless specifically selected. The TPF 13957 debuggers do not yet support DWARF 3/4. */ 13958 if (!global_options_set.x_dwarf_strict) 13959 dwarf_strict = 1; 13960 if (!global_options_set.x_dwarf_version) 13961 dwarf_version = 2; 13962 } 13963 13964 /* Register a target-specific optimization-and-lowering pass 13965 to run immediately before prologue and epilogue generation. 13966 13967 Registering the pass must be done at start up. It's 13968 convenient to do it here. */ 13969 opt_pass *new_pass = new pass_s390_early_mach (g); 13970 struct register_pass_info insert_pass_s390_early_mach = 13971 { 13972 new_pass, /* pass */ 13973 "pro_and_epilogue", /* reference_pass_name */ 13974 1, /* ref_pass_instance_number */ 13975 PASS_POS_INSERT_BEFORE /* po_op */ 13976 }; 13977 register_pass (&insert_pass_s390_early_mach); 13978} 13979 13980/* Implement TARGET_USE_BY_PIECES_INFRASTRUCTURE_P. */ 13981 13982static bool 13983s390_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size, 13984 unsigned int align ATTRIBUTE_UNUSED, 13985 enum by_pieces_operation op ATTRIBUTE_UNUSED, 13986 bool speed_p ATTRIBUTE_UNUSED) 13987{ 13988 return (size == 1 || size == 2 13989 || size == 4 || (TARGET_ZARCH && size == 8)); 13990} 13991 13992/* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */ 13993 13994static void 13995s390_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update) 13996{ 13997 tree sfpc = s390_builtin_decls[S390_BUILTIN_s390_sfpc]; 13998 tree efpc = s390_builtin_decls[S390_BUILTIN_s390_efpc]; 13999 tree call_efpc = build_call_expr (efpc, 0); 14000 tree fenv_var = create_tmp_var (unsigned_type_node); 14001 14002#define FPC_EXCEPTION_MASK HOST_WIDE_INT_UC (0xf8000000) 14003#define FPC_FLAGS_MASK HOST_WIDE_INT_UC (0x00f80000) 14004#define FPC_DXC_MASK HOST_WIDE_INT_UC (0x0000ff00) 14005#define FPC_EXCEPTION_MASK_SHIFT HOST_WIDE_INT_UC (24) 14006#define FPC_FLAGS_SHIFT HOST_WIDE_INT_UC (16) 14007#define FPC_DXC_SHIFT HOST_WIDE_INT_UC (8) 14008 14009 /* Generates the equivalent of feholdexcept (&fenv_var) 14010 14011 fenv_var = __builtin_s390_efpc (); 14012 __builtin_s390_sfpc (fenv_var & mask) */ 14013 tree old_fpc = build2 (MODIFY_EXPR, unsigned_type_node, fenv_var, call_efpc); 14014 tree new_fpc = 14015 build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var, 14016 build_int_cst (unsigned_type_node, 14017 ~(FPC_DXC_MASK | FPC_FLAGS_MASK | 14018 FPC_EXCEPTION_MASK))); 14019 tree set_new_fpc = build_call_expr (sfpc, 1, new_fpc); 14020 *hold = build2 (COMPOUND_EXPR, void_type_node, old_fpc, set_new_fpc); 14021 14022 /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT) 14023 14024 __builtin_s390_sfpc (__builtin_s390_efpc () & mask) */ 14025 new_fpc = build2 (BIT_AND_EXPR, unsigned_type_node, call_efpc, 14026 build_int_cst (unsigned_type_node, 14027 ~(FPC_DXC_MASK | FPC_FLAGS_MASK))); 14028 *clear = build_call_expr (sfpc, 1, new_fpc); 14029 14030 /* Generates the equivalent of feupdateenv (fenv_var) 14031 14032 old_fpc = __builtin_s390_efpc (); 14033 __builtin_s390_sfpc (fenv_var); 14034 __atomic_feraiseexcept ((old_fpc & FPC_FLAGS_MASK) >> FPC_FLAGS_SHIFT); */ 14035 14036 old_fpc = create_tmp_var (unsigned_type_node); 14037 tree store_old_fpc = build2 (MODIFY_EXPR, void_type_node, 14038 old_fpc, call_efpc); 14039 14040 set_new_fpc = build_call_expr (sfpc, 1, fenv_var); 14041 14042 tree raise_old_except = build2 (BIT_AND_EXPR, unsigned_type_node, old_fpc, 14043 build_int_cst (unsigned_type_node, 14044 FPC_FLAGS_MASK)); 14045 raise_old_except = build2 (RSHIFT_EXPR, unsigned_type_node, raise_old_except, 14046 build_int_cst (unsigned_type_node, 14047 FPC_FLAGS_SHIFT)); 14048 tree atomic_feraiseexcept 14049 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT); 14050 raise_old_except = build_call_expr (atomic_feraiseexcept, 14051 1, raise_old_except); 14052 14053 *update = build2 (COMPOUND_EXPR, void_type_node, 14054 build2 (COMPOUND_EXPR, void_type_node, 14055 store_old_fpc, set_new_fpc), 14056 raise_old_except); 14057 14058#undef FPC_EXCEPTION_MASK 14059#undef FPC_FLAGS_MASK 14060#undef FPC_DXC_MASK 14061#undef FPC_EXCEPTION_MASK_SHIFT 14062#undef FPC_FLAGS_SHIFT 14063#undef FPC_DXC_SHIFT 14064} 14065 14066/* Return the vector mode to be used for inner mode MODE when doing 14067 vectorization. */ 14068static machine_mode 14069s390_preferred_simd_mode (machine_mode mode) 14070{ 14071 if (TARGET_VX) 14072 switch (mode) 14073 { 14074 case DFmode: 14075 return V2DFmode; 14076 case DImode: 14077 return V2DImode; 14078 case SImode: 14079 return V4SImode; 14080 case HImode: 14081 return V8HImode; 14082 case QImode: 14083 return V16QImode; 14084 default:; 14085 } 14086 return word_mode; 14087} 14088 14089/* Our hardware does not require vectors to be strictly aligned. */ 14090static bool 14091s390_support_vector_misalignment (machine_mode mode ATTRIBUTE_UNUSED, 14092 const_tree type ATTRIBUTE_UNUSED, 14093 int misalignment ATTRIBUTE_UNUSED, 14094 bool is_packed ATTRIBUTE_UNUSED) 14095{ 14096 if (TARGET_VX) 14097 return true; 14098 14099 return default_builtin_support_vector_misalignment (mode, type, misalignment, 14100 is_packed); 14101} 14102 14103/* The vector ABI requires vector types to be aligned on an 8 byte 14104 boundary (our stack alignment). However, we allow this to be 14105 overriden by the user, while this definitely breaks the ABI. */ 14106static HOST_WIDE_INT 14107s390_vector_alignment (const_tree type) 14108{ 14109 if (!TARGET_VX_ABI) 14110 return default_vector_alignment (type); 14111 14112 if (TYPE_USER_ALIGN (type)) 14113 return TYPE_ALIGN (type); 14114 14115 return MIN (64, tree_to_shwi (TYPE_SIZE (type))); 14116} 14117 14118/* Implement TARGET_ASM_FILE_END. */ 14119static void 14120s390_asm_file_end (void) 14121{ 14122#ifdef HAVE_AS_GNU_ATTRIBUTE 14123 varpool_node *vnode; 14124 cgraph_node *cnode; 14125 14126 FOR_EACH_VARIABLE (vnode) 14127 if (TREE_PUBLIC (vnode->decl)) 14128 s390_check_type_for_vector_abi (TREE_TYPE (vnode->decl), false, false); 14129 14130 FOR_EACH_FUNCTION (cnode) 14131 if (TREE_PUBLIC (cnode->decl)) 14132 s390_check_type_for_vector_abi (TREE_TYPE (cnode->decl), false, false); 14133 14134 14135 if (s390_vector_abi != 0) 14136 fprintf (asm_out_file, "\t.gnu_attribute 8, %d\n", 14137 s390_vector_abi); 14138#endif 14139 file_end_indicate_exec_stack (); 14140} 14141 14142/* Return true if TYPE is a vector bool type. */ 14143static inline bool 14144s390_vector_bool_type_p (const_tree type) 14145{ 14146 return TYPE_VECTOR_OPAQUE (type); 14147} 14148 14149/* Return the diagnostic message string if the binary operation OP is 14150 not permitted on TYPE1 and TYPE2, NULL otherwise. */ 14151static const char* 14152s390_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1, const_tree type2) 14153{ 14154 bool bool1_p, bool2_p; 14155 bool plusminus_p; 14156 bool muldiv_p; 14157 bool compare_p; 14158 machine_mode mode1, mode2; 14159 14160 if (!TARGET_ZVECTOR) 14161 return NULL; 14162 14163 if (!VECTOR_TYPE_P (type1) || !VECTOR_TYPE_P (type2)) 14164 return NULL; 14165 14166 bool1_p = s390_vector_bool_type_p (type1); 14167 bool2_p = s390_vector_bool_type_p (type2); 14168 14169 /* Mixing signed and unsigned types is forbidden for all 14170 operators. */ 14171 if (!bool1_p && !bool2_p 14172 && TYPE_UNSIGNED (type1) != TYPE_UNSIGNED (type2)) 14173 return N_("types differ in signess"); 14174 14175 plusminus_p = (op == PLUS_EXPR || op == MINUS_EXPR); 14176 muldiv_p = (op == MULT_EXPR || op == RDIV_EXPR || op == TRUNC_DIV_EXPR 14177 || op == CEIL_DIV_EXPR || op == FLOOR_DIV_EXPR 14178 || op == ROUND_DIV_EXPR); 14179 compare_p = (op == LT_EXPR || op == LE_EXPR || op == GT_EXPR || op == GE_EXPR 14180 || op == EQ_EXPR || op == NE_EXPR); 14181 14182 if (bool1_p && bool2_p && (plusminus_p || muldiv_p)) 14183 return N_("binary operator does not support two vector bool operands"); 14184 14185 if (bool1_p != bool2_p && (muldiv_p || compare_p)) 14186 return N_("binary operator does not support vector bool operand"); 14187 14188 mode1 = TYPE_MODE (type1); 14189 mode2 = TYPE_MODE (type2); 14190 14191 if (bool1_p != bool2_p && plusminus_p 14192 && (GET_MODE_CLASS (mode1) == MODE_VECTOR_FLOAT 14193 || GET_MODE_CLASS (mode2) == MODE_VECTOR_FLOAT)) 14194 return N_("binary operator does not support mixing vector " 14195 "bool with floating point vector operands"); 14196 14197 return NULL; 14198} 14199 14200 14201/* Initialize GCC target structure. */ 14202 14203#undef TARGET_ASM_ALIGNED_HI_OP 14204#define TARGET_ASM_ALIGNED_HI_OP "\t.word\t" 14205#undef TARGET_ASM_ALIGNED_DI_OP 14206#define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t" 14207#undef TARGET_ASM_INTEGER 14208#define TARGET_ASM_INTEGER s390_assemble_integer 14209 14210#undef TARGET_ASM_OPEN_PAREN 14211#define TARGET_ASM_OPEN_PAREN "" 14212 14213#undef TARGET_ASM_CLOSE_PAREN 14214#define TARGET_ASM_CLOSE_PAREN "" 14215 14216#undef TARGET_OPTION_OVERRIDE 14217#define TARGET_OPTION_OVERRIDE s390_option_override 14218 14219#undef TARGET_ENCODE_SECTION_INFO 14220#define TARGET_ENCODE_SECTION_INFO s390_encode_section_info 14221 14222#undef TARGET_SCALAR_MODE_SUPPORTED_P 14223#define TARGET_SCALAR_MODE_SUPPORTED_P s390_scalar_mode_supported_p 14224 14225#ifdef HAVE_AS_TLS 14226#undef TARGET_HAVE_TLS 14227#define TARGET_HAVE_TLS true 14228#endif 14229#undef TARGET_CANNOT_FORCE_CONST_MEM 14230#define TARGET_CANNOT_FORCE_CONST_MEM s390_cannot_force_const_mem 14231 14232#undef TARGET_DELEGITIMIZE_ADDRESS 14233#define TARGET_DELEGITIMIZE_ADDRESS s390_delegitimize_address 14234 14235#undef TARGET_LEGITIMIZE_ADDRESS 14236#define TARGET_LEGITIMIZE_ADDRESS s390_legitimize_address 14237 14238#undef TARGET_RETURN_IN_MEMORY 14239#define TARGET_RETURN_IN_MEMORY s390_return_in_memory 14240 14241#undef TARGET_INIT_BUILTINS 14242#define TARGET_INIT_BUILTINS s390_init_builtins 14243#undef TARGET_EXPAND_BUILTIN 14244#define TARGET_EXPAND_BUILTIN s390_expand_builtin 14245#undef TARGET_BUILTIN_DECL 14246#define TARGET_BUILTIN_DECL s390_builtin_decl 14247 14248#undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA 14249#define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA s390_output_addr_const_extra 14250 14251#undef TARGET_ASM_OUTPUT_MI_THUNK 14252#define TARGET_ASM_OUTPUT_MI_THUNK s390_output_mi_thunk 14253#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK 14254#define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true 14255 14256#undef TARGET_SCHED_ADJUST_PRIORITY 14257#define TARGET_SCHED_ADJUST_PRIORITY s390_adjust_priority 14258#undef TARGET_SCHED_ISSUE_RATE 14259#define TARGET_SCHED_ISSUE_RATE s390_issue_rate 14260#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD 14261#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD s390_first_cycle_multipass_dfa_lookahead 14262 14263#undef TARGET_SCHED_VARIABLE_ISSUE 14264#define TARGET_SCHED_VARIABLE_ISSUE s390_sched_variable_issue 14265#undef TARGET_SCHED_REORDER 14266#define TARGET_SCHED_REORDER s390_sched_reorder 14267#undef TARGET_SCHED_INIT 14268#define TARGET_SCHED_INIT s390_sched_init 14269 14270#undef TARGET_CANNOT_COPY_INSN_P 14271#define TARGET_CANNOT_COPY_INSN_P s390_cannot_copy_insn_p 14272#undef TARGET_RTX_COSTS 14273#define TARGET_RTX_COSTS s390_rtx_costs 14274#undef TARGET_ADDRESS_COST 14275#define TARGET_ADDRESS_COST s390_address_cost 14276#undef TARGET_REGISTER_MOVE_COST 14277#define TARGET_REGISTER_MOVE_COST s390_register_move_cost 14278#undef TARGET_MEMORY_MOVE_COST 14279#define TARGET_MEMORY_MOVE_COST s390_memory_move_cost 14280 14281#undef TARGET_MACHINE_DEPENDENT_REORG 14282#define TARGET_MACHINE_DEPENDENT_REORG s390_reorg 14283 14284#undef TARGET_VALID_POINTER_MODE 14285#define TARGET_VALID_POINTER_MODE s390_valid_pointer_mode 14286 14287#undef TARGET_BUILD_BUILTIN_VA_LIST 14288#define TARGET_BUILD_BUILTIN_VA_LIST s390_build_builtin_va_list 14289#undef TARGET_EXPAND_BUILTIN_VA_START 14290#define TARGET_EXPAND_BUILTIN_VA_START s390_va_start 14291#undef TARGET_GIMPLIFY_VA_ARG_EXPR 14292#define TARGET_GIMPLIFY_VA_ARG_EXPR s390_gimplify_va_arg 14293 14294#undef TARGET_PROMOTE_FUNCTION_MODE 14295#define TARGET_PROMOTE_FUNCTION_MODE s390_promote_function_mode 14296#undef TARGET_PASS_BY_REFERENCE 14297#define TARGET_PASS_BY_REFERENCE s390_pass_by_reference 14298 14299#undef TARGET_FUNCTION_OK_FOR_SIBCALL 14300#define TARGET_FUNCTION_OK_FOR_SIBCALL s390_function_ok_for_sibcall 14301#undef TARGET_FUNCTION_ARG 14302#define TARGET_FUNCTION_ARG s390_function_arg 14303#undef TARGET_FUNCTION_ARG_ADVANCE 14304#define TARGET_FUNCTION_ARG_ADVANCE s390_function_arg_advance 14305#undef TARGET_FUNCTION_VALUE 14306#define TARGET_FUNCTION_VALUE s390_function_value 14307#undef TARGET_LIBCALL_VALUE 14308#define TARGET_LIBCALL_VALUE s390_libcall_value 14309#undef TARGET_STRICT_ARGUMENT_NAMING 14310#define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true 14311 14312#undef TARGET_KEEP_LEAF_WHEN_PROFILED 14313#define TARGET_KEEP_LEAF_WHEN_PROFILED s390_keep_leaf_when_profiled 14314 14315#undef TARGET_FIXED_CONDITION_CODE_REGS 14316#define TARGET_FIXED_CONDITION_CODE_REGS s390_fixed_condition_code_regs 14317 14318#undef TARGET_CC_MODES_COMPATIBLE 14319#define TARGET_CC_MODES_COMPATIBLE s390_cc_modes_compatible 14320 14321#undef TARGET_INVALID_WITHIN_DOLOOP 14322#define TARGET_INVALID_WITHIN_DOLOOP hook_constcharptr_const_rtx_insn_null 14323 14324#ifdef HAVE_AS_TLS 14325#undef TARGET_ASM_OUTPUT_DWARF_DTPREL 14326#define TARGET_ASM_OUTPUT_DWARF_DTPREL s390_output_dwarf_dtprel 14327#endif 14328 14329#undef TARGET_DWARF_FRAME_REG_MODE 14330#define TARGET_DWARF_FRAME_REG_MODE s390_dwarf_frame_reg_mode 14331 14332#ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING 14333#undef TARGET_MANGLE_TYPE 14334#define TARGET_MANGLE_TYPE s390_mangle_type 14335#endif 14336 14337#undef TARGET_SCALAR_MODE_SUPPORTED_P 14338#define TARGET_SCALAR_MODE_SUPPORTED_P s390_scalar_mode_supported_p 14339 14340#undef TARGET_VECTOR_MODE_SUPPORTED_P 14341#define TARGET_VECTOR_MODE_SUPPORTED_P s390_vector_mode_supported_p 14342 14343#undef TARGET_PREFERRED_RELOAD_CLASS 14344#define TARGET_PREFERRED_RELOAD_CLASS s390_preferred_reload_class 14345 14346#undef TARGET_SECONDARY_RELOAD 14347#define TARGET_SECONDARY_RELOAD s390_secondary_reload 14348 14349#undef TARGET_LIBGCC_CMP_RETURN_MODE 14350#define TARGET_LIBGCC_CMP_RETURN_MODE s390_libgcc_cmp_return_mode 14351 14352#undef TARGET_LIBGCC_SHIFT_COUNT_MODE 14353#define TARGET_LIBGCC_SHIFT_COUNT_MODE s390_libgcc_shift_count_mode 14354 14355#undef TARGET_LEGITIMATE_ADDRESS_P 14356#define TARGET_LEGITIMATE_ADDRESS_P s390_legitimate_address_p 14357 14358#undef TARGET_LEGITIMATE_CONSTANT_P 14359#define TARGET_LEGITIMATE_CONSTANT_P s390_legitimate_constant_p 14360 14361#undef TARGET_LRA_P 14362#define TARGET_LRA_P s390_lra_p 14363 14364#undef TARGET_CAN_ELIMINATE 14365#define TARGET_CAN_ELIMINATE s390_can_eliminate 14366 14367#undef TARGET_CONDITIONAL_REGISTER_USAGE 14368#define TARGET_CONDITIONAL_REGISTER_USAGE s390_conditional_register_usage 14369 14370#undef TARGET_LOOP_UNROLL_ADJUST 14371#define TARGET_LOOP_UNROLL_ADJUST s390_loop_unroll_adjust 14372 14373#undef TARGET_ASM_TRAMPOLINE_TEMPLATE 14374#define TARGET_ASM_TRAMPOLINE_TEMPLATE s390_asm_trampoline_template 14375#undef TARGET_TRAMPOLINE_INIT 14376#define TARGET_TRAMPOLINE_INIT s390_trampoline_init 14377 14378#undef TARGET_UNWIND_WORD_MODE 14379#define TARGET_UNWIND_WORD_MODE s390_unwind_word_mode 14380 14381#undef TARGET_CANONICALIZE_COMPARISON 14382#define TARGET_CANONICALIZE_COMPARISON s390_canonicalize_comparison 14383 14384#undef TARGET_HARD_REGNO_SCRATCH_OK 14385#define TARGET_HARD_REGNO_SCRATCH_OK s390_hard_regno_scratch_ok 14386 14387#undef TARGET_ATTRIBUTE_TABLE 14388#define TARGET_ATTRIBUTE_TABLE s390_attribute_table 14389 14390#undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P 14391#define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true 14392 14393#undef TARGET_SET_UP_BY_PROLOGUE 14394#define TARGET_SET_UP_BY_PROLOGUE s300_set_up_by_prologue 14395 14396#undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P 14397#define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \ 14398 s390_use_by_pieces_infrastructure_p 14399 14400#undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV 14401#define TARGET_ATOMIC_ASSIGN_EXPAND_FENV s390_atomic_assign_expand_fenv 14402 14403#undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN 14404#define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN s390_invalid_arg_for_unprototyped_fn 14405 14406#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE 14407#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE s390_preferred_simd_mode 14408 14409#undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT 14410#define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT s390_support_vector_misalignment 14411 14412#undef TARGET_VECTOR_ALIGNMENT 14413#define TARGET_VECTOR_ALIGNMENT s390_vector_alignment 14414 14415#undef TARGET_INVALID_BINARY_OP 14416#define TARGET_INVALID_BINARY_OP s390_invalid_binary_op 14417 14418#undef TARGET_ASM_FILE_END 14419#define TARGET_ASM_FILE_END s390_asm_file_end 14420 14421struct gcc_target targetm = TARGET_INITIALIZER; 14422 14423#include "gt-s390.h" 14424