1/* Subroutines used for code generation on the Synopsys DesignWare ARC cpu.
2   Copyright (C) 1994-2015 Free Software Foundation, Inc.
3
4   Sources derived from work done by Sankhya Technologies (www.sankhya.com) on
5   behalf of Synopsys Inc.
6
7   Position Independent Code support added,Code cleaned up,
8   Comments and Support For ARC700 instructions added by
9   Saurabh Verma (saurabh.verma@codito.com)
10   Ramana Radhakrishnan(ramana.radhakrishnan@codito.com)
11
12   Fixing ABI inconsistencies, optimizations for ARC600 / ARC700 pipelines,
13   profiling support added by Joern Rennecke <joern.rennecke@embecosm.com>
14
15This file is part of GCC.
16
17GCC is free software; you can redistribute it and/or modify
18it under the terms of the GNU General Public License as published by
19the Free Software Foundation; either version 3, or (at your option)
20any later version.
21
22GCC is distributed in the hope that it will be useful,
23but WITHOUT ANY WARRANTY; without even the implied warranty of
24MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
25GNU General Public License for more details.
26
27You should have received a copy of the GNU General Public License
28along with GCC; see the file COPYING3.  If not see
29<http://www.gnu.org/licenses/>.  */
30
31#include "config.h"
32#include "system.h"
33#include "coretypes.h"
34#include "tm.h"
35#include "hash-set.h"
36#include "machmode.h"
37#include "vec.h"
38#include "double-int.h"
39#include "input.h"
40#include "alias.h"
41#include "symtab.h"
42#include "wide-int.h"
43#include "inchash.h"
44#include "tree.h"
45#include "fold-const.h"
46#include "varasm.h"
47#include "stor-layout.h"
48#include "stringpool.h"
49#include "calls.h"
50#include "rtl.h"
51#include "regs.h"
52#include "hard-reg-set.h"
53#include "real.h"
54#include "insn-config.h"
55#include "conditions.h"
56#include "insn-flags.h"
57#include "hashtab.h"
58#include "function.h"
59#include "toplev.h"
60#include "ggc.h"
61#include "tm_p.h"
62#include "target.h"
63#include "output.h"
64#include "insn-attr.h"
65#include "flags.h"
66#include "statistics.h"
67#include "fixed-value.h"
68#include "expmed.h"
69#include "dojump.h"
70#include "explow.h"
71#include "emit-rtl.h"
72#include "stmt.h"
73#include "expr.h"
74#include "recog.h"
75#include "debug.h"
76#include "diagnostic.h"
77#include "insn-codes.h"
78#include "langhooks.h"
79#include "optabs.h"
80#include "tm-constrs.h"
81#include "reload.h" /* For operands_match_p */
82#include "dominance.h"
83#include "cfg.h"
84#include "cfgrtl.h"
85#include "cfganal.h"
86#include "lcm.h"
87#include "cfgbuild.h"
88#include "cfgcleanup.h"
89#include "predict.h"
90#include "basic-block.h"
91#include "df.h"
92#include "tree-pass.h"
93#include "context.h"
94#include "pass_manager.h"
95#include "builtins.h"
96#include "rtl-iter.h"
97
98/* Which cpu we're compiling for (A5, ARC600, ARC601, ARC700).  */
99static const char *arc_cpu_string = "";
100
101/* ??? Loads can handle any constant, stores can only handle small ones.  */
102/* OTOH, LIMMs cost extra, so their usefulness is limited.  */
103#define RTX_OK_FOR_OFFSET_P(MODE, X) \
104(GET_CODE (X) == CONST_INT \
105 && SMALL_INT_RANGE (INTVAL (X), (GET_MODE_SIZE (MODE) - 1) & -4, \
106		     (INTVAL (X) & (GET_MODE_SIZE (MODE) - 1) & 3 \
107		      ? 0 \
108		      : -(-GET_MODE_SIZE (MODE) | -4) >> 1)))
109
110#define LEGITIMATE_OFFSET_ADDRESS_P(MODE, X, INDEX, STRICT) \
111(GET_CODE (X) == PLUS			     \
112  && RTX_OK_FOR_BASE_P (XEXP (X, 0), (STRICT)) \
113  && ((INDEX && RTX_OK_FOR_INDEX_P (XEXP (X, 1), (STRICT)) \
114       && GET_MODE_SIZE ((MODE)) <= 4) \
115      || RTX_OK_FOR_OFFSET_P (MODE, XEXP (X, 1))))
116
117#define LEGITIMATE_SCALED_ADDRESS_P(MODE, X, STRICT) \
118(GET_CODE (X) == PLUS \
119 && GET_CODE (XEXP (X, 0)) == MULT \
120 && RTX_OK_FOR_INDEX_P (XEXP (XEXP (X, 0), 0), (STRICT)) \
121 && GET_CODE (XEXP (XEXP (X, 0), 1)) == CONST_INT \
122 && ((GET_MODE_SIZE (MODE) == 2 && INTVAL (XEXP (XEXP (X, 0), 1)) == 2) \
123     || (GET_MODE_SIZE (MODE) == 4 && INTVAL (XEXP (XEXP (X, 0), 1)) == 4)) \
124 && (RTX_OK_FOR_BASE_P (XEXP (X, 1), (STRICT)) \
125     || (flag_pic ? CONST_INT_P (XEXP (X, 1)) : CONSTANT_P (XEXP (X, 1)))))
126
127#define LEGITIMATE_SMALL_DATA_ADDRESS_P(X) \
128  (GET_CODE (X) == PLUS \
129   && (REG_P (XEXP ((X), 0)) && REGNO (XEXP ((X), 0)) == SDATA_BASE_REGNUM) \
130   && ((GET_CODE (XEXP((X),1)) == SYMBOL_REF \
131	&& SYMBOL_REF_SMALL_P (XEXP ((X), 1))) \
132       || (GET_CODE (XEXP ((X), 1)) == CONST \
133	   && GET_CODE (XEXP (XEXP ((X), 1), 0)) == PLUS \
134	   && GET_CODE (XEXP (XEXP (XEXP ((X), 1), 0), 0)) == SYMBOL_REF \
135	   && SYMBOL_REF_SMALL_P (XEXP (XEXP (XEXP ((X), 1), 0), 0)) \
136	   && GET_CODE (XEXP(XEXP (XEXP ((X), 1), 0), 1)) == CONST_INT)))
137
138/* Array of valid operand punctuation characters.  */
139char arc_punct_chars[256];
140
141/* State used by arc_ccfsm_advance to implement conditional execution.  */
142struct GTY (()) arc_ccfsm
143{
144  int state;
145  int cc;
146  rtx cond;
147  rtx_insn *target_insn;
148  int target_label;
149};
150
151#define arc_ccfsm_current cfun->machine->ccfsm_current
152
153#define ARC_CCFSM_BRANCH_DELETED_P(STATE) \
154  ((STATE)->state == 1 || (STATE)->state == 2)
155
156/* Indicate we're conditionalizing insns now.  */
157#define ARC_CCFSM_RECORD_BRANCH_DELETED(STATE) \
158  ((STATE)->state += 2)
159
160#define ARC_CCFSM_COND_EXEC_P(STATE) \
161  ((STATE)->state == 3 || (STATE)->state == 4 || (STATE)->state == 5 \
162   || current_insn_predicate)
163
164/* Check if INSN has a 16 bit opcode considering struct arc_ccfsm *STATE.  */
165#define CCFSM_ISCOMPACT(INSN,STATE) \
166  (ARC_CCFSM_COND_EXEC_P (STATE) \
167   ? (get_attr_iscompact (INSN) == ISCOMPACT_TRUE \
168      || get_attr_iscompact (INSN) == ISCOMPACT_TRUE_LIMM) \
169   : get_attr_iscompact (INSN) != ISCOMPACT_FALSE)
170
171/* Likewise, but also consider that INSN might be in a delay slot of JUMP.  */
172#define CCFSM_DBR_ISCOMPACT(INSN,JUMP,STATE) \
173  ((ARC_CCFSM_COND_EXEC_P (STATE) \
174    || (JUMP_P (JUMP) \
175	&& INSN_ANNULLED_BRANCH_P (JUMP) \
176	&& (TARGET_AT_DBR_CONDEXEC || INSN_FROM_TARGET_P (INSN)))) \
177   ? (get_attr_iscompact (INSN) == ISCOMPACT_TRUE \
178      || get_attr_iscompact (INSN) == ISCOMPACT_TRUE_LIMM) \
179   : get_attr_iscompact (INSN) != ISCOMPACT_FALSE)
180
181/* The maximum number of insns skipped which will be conditionalised if
182   possible.  */
183/* When optimizing for speed:
184    Let p be the probability that the potentially skipped insns need to
185    be executed, pn the cost of a correctly predicted non-taken branch,
186    mt the cost of a mis/non-predicted taken branch,
187    mn mispredicted non-taken, pt correctly predicted taken ;
188    costs expressed in numbers of instructions like the ones considered
189    skipping.
190    Unfortunately we don't have a measure of predictability - this
191    is linked to probability only in that in the no-eviction-scenario
192    there is a lower bound 1 - 2 * min (p, 1-p), and a somewhat larger
193    value that can be assumed *if* the distribution is perfectly random.
194    A predictability of 1 is perfectly plausible not matter what p is,
195    because the decision could be dependent on an invocation parameter
196    of the program.
197    For large p, we want MAX_INSNS_SKIPPED == pn/(1-p) + mt - pn
198    For small p, we want MAX_INSNS_SKIPPED == pt
199
200   When optimizing for size:
201    We want to skip insn unless we could use 16 opcodes for the
202    non-conditionalized insn to balance the branch length or more.
203    Performance can be tie-breaker.  */
204/* If the potentially-skipped insns are likely to be executed, we'll
205   generally save one non-taken branch
206   o
207   this to be no less than the 1/p  */
208#define MAX_INSNS_SKIPPED 3
209
210/* The values of unspec's first field.  */
211enum {
212  ARC_UNSPEC_PLT = 3,
213  ARC_UNSPEC_GOT,
214  ARC_UNSPEC_GOTOFF
215} ;
216
217
218enum arc_builtins {
219  ARC_BUILTIN_NOP        =    2,
220  ARC_BUILTIN_NORM       =    3,
221  ARC_BUILTIN_NORMW      =    4,
222  ARC_BUILTIN_SWAP       =    5,
223  ARC_BUILTIN_BRK        =    6,
224  ARC_BUILTIN_DIVAW      =    7,
225  ARC_BUILTIN_EX         =    8,
226  ARC_BUILTIN_MUL64      =    9,
227  ARC_BUILTIN_MULU64     =   10,
228  ARC_BUILTIN_RTIE       =   11,
229  ARC_BUILTIN_SYNC       =   12,
230  ARC_BUILTIN_CORE_READ  =   13,
231  ARC_BUILTIN_CORE_WRITE =   14,
232  ARC_BUILTIN_FLAG       =   15,
233  ARC_BUILTIN_LR         =   16,
234  ARC_BUILTIN_SR         =   17,
235  ARC_BUILTIN_SLEEP      =   18,
236  ARC_BUILTIN_SWI        =   19,
237  ARC_BUILTIN_TRAP_S     =   20,
238  ARC_BUILTIN_UNIMP_S    =   21,
239  ARC_BUILTIN_ALIGNED    =   22,
240
241  /* Sentinel to mark start of simd builtins.  */
242  ARC_SIMD_BUILTIN_BEGIN      = 1000,
243
244  ARC_SIMD_BUILTIN_VADDAW     = 1001,
245  ARC_SIMD_BUILTIN_VADDW      = 1002,
246  ARC_SIMD_BUILTIN_VAVB       = 1003,
247  ARC_SIMD_BUILTIN_VAVRB      = 1004,
248  ARC_SIMD_BUILTIN_VDIFAW     = 1005,
249  ARC_SIMD_BUILTIN_VDIFW      = 1006,
250  ARC_SIMD_BUILTIN_VMAXAW     = 1007,
251  ARC_SIMD_BUILTIN_VMAXW      = 1008,
252  ARC_SIMD_BUILTIN_VMINAW     = 1009,
253  ARC_SIMD_BUILTIN_VMINW      = 1010,
254  ARC_SIMD_BUILTIN_VMULAW     = 1011,
255  ARC_SIMD_BUILTIN_VMULFAW    = 1012,
256  ARC_SIMD_BUILTIN_VMULFW     = 1013,
257  ARC_SIMD_BUILTIN_VMULW      = 1014,
258  ARC_SIMD_BUILTIN_VSUBAW     = 1015,
259  ARC_SIMD_BUILTIN_VSUBW      = 1016,
260  ARC_SIMD_BUILTIN_VSUMMW     = 1017,
261  ARC_SIMD_BUILTIN_VAND       = 1018,
262  ARC_SIMD_BUILTIN_VANDAW     = 1019,
263  ARC_SIMD_BUILTIN_VBIC       = 1020,
264  ARC_SIMD_BUILTIN_VBICAW     = 1021,
265  ARC_SIMD_BUILTIN_VOR        = 1022,
266  ARC_SIMD_BUILTIN_VXOR       = 1023,
267  ARC_SIMD_BUILTIN_VXORAW     = 1024,
268  ARC_SIMD_BUILTIN_VEQW       = 1025,
269  ARC_SIMD_BUILTIN_VLEW       = 1026,
270  ARC_SIMD_BUILTIN_VLTW       = 1027,
271  ARC_SIMD_BUILTIN_VNEW       = 1028,
272  ARC_SIMD_BUILTIN_VMR1AW     = 1029,
273  ARC_SIMD_BUILTIN_VMR1W      = 1030,
274  ARC_SIMD_BUILTIN_VMR2AW     = 1031,
275  ARC_SIMD_BUILTIN_VMR2W      = 1032,
276  ARC_SIMD_BUILTIN_VMR3AW     = 1033,
277  ARC_SIMD_BUILTIN_VMR3W      = 1034,
278  ARC_SIMD_BUILTIN_VMR4AW     = 1035,
279  ARC_SIMD_BUILTIN_VMR4W      = 1036,
280  ARC_SIMD_BUILTIN_VMR5AW     = 1037,
281  ARC_SIMD_BUILTIN_VMR5W      = 1038,
282  ARC_SIMD_BUILTIN_VMR6AW     = 1039,
283  ARC_SIMD_BUILTIN_VMR6W      = 1040,
284  ARC_SIMD_BUILTIN_VMR7AW     = 1041,
285  ARC_SIMD_BUILTIN_VMR7W      = 1042,
286  ARC_SIMD_BUILTIN_VMRB       = 1043,
287  ARC_SIMD_BUILTIN_VH264F     = 1044,
288  ARC_SIMD_BUILTIN_VH264FT    = 1045,
289  ARC_SIMD_BUILTIN_VH264FW    = 1046,
290  ARC_SIMD_BUILTIN_VVC1F      = 1047,
291  ARC_SIMD_BUILTIN_VVC1FT     = 1048,
292
293  /* Va, Vb, rlimm instructions.  */
294  ARC_SIMD_BUILTIN_VBADDW     = 1050,
295  ARC_SIMD_BUILTIN_VBMAXW     = 1051,
296  ARC_SIMD_BUILTIN_VBMINW     = 1052,
297  ARC_SIMD_BUILTIN_VBMULAW    = 1053,
298  ARC_SIMD_BUILTIN_VBMULFW    = 1054,
299  ARC_SIMD_BUILTIN_VBMULW     = 1055,
300  ARC_SIMD_BUILTIN_VBRSUBW    = 1056,
301  ARC_SIMD_BUILTIN_VBSUBW     = 1057,
302
303  /* Va, Vb, Ic instructions.  */
304  ARC_SIMD_BUILTIN_VASRW      = 1060,
305  ARC_SIMD_BUILTIN_VSR8       = 1061,
306  ARC_SIMD_BUILTIN_VSR8AW     = 1062,
307
308  /* Va, Vb, u6 instructions.  */
309  ARC_SIMD_BUILTIN_VASRRWi    = 1065,
310  ARC_SIMD_BUILTIN_VASRSRWi   = 1066,
311  ARC_SIMD_BUILTIN_VASRWi     = 1067,
312  ARC_SIMD_BUILTIN_VASRPWBi   = 1068,
313  ARC_SIMD_BUILTIN_VASRRPWBi  = 1069,
314  ARC_SIMD_BUILTIN_VSR8AWi    = 1070,
315  ARC_SIMD_BUILTIN_VSR8i      = 1071,
316
317  /* Va, Vb, u8 (simm) instructions.  */
318  ARC_SIMD_BUILTIN_VMVAW      = 1075,
319  ARC_SIMD_BUILTIN_VMVW       = 1076,
320  ARC_SIMD_BUILTIN_VMVZW      = 1077,
321  ARC_SIMD_BUILTIN_VD6TAPF    = 1078,
322
323  /* Va, rlimm, u8 (simm) instructions.  */
324  ARC_SIMD_BUILTIN_VMOVAW     = 1080,
325  ARC_SIMD_BUILTIN_VMOVW      = 1081,
326  ARC_SIMD_BUILTIN_VMOVZW     = 1082,
327
328  /* Va, Vb instructions.  */
329  ARC_SIMD_BUILTIN_VABSAW     = 1085,
330  ARC_SIMD_BUILTIN_VABSW      = 1086,
331  ARC_SIMD_BUILTIN_VADDSUW    = 1087,
332  ARC_SIMD_BUILTIN_VSIGNW     = 1088,
333  ARC_SIMD_BUILTIN_VEXCH1     = 1089,
334  ARC_SIMD_BUILTIN_VEXCH2     = 1090,
335  ARC_SIMD_BUILTIN_VEXCH4     = 1091,
336  ARC_SIMD_BUILTIN_VUPBAW     = 1092,
337  ARC_SIMD_BUILTIN_VUPBW      = 1093,
338  ARC_SIMD_BUILTIN_VUPSBAW    = 1094,
339  ARC_SIMD_BUILTIN_VUPSBW     = 1095,
340
341  ARC_SIMD_BUILTIN_VDIRUN     = 1100,
342  ARC_SIMD_BUILTIN_VDORUN     = 1101,
343  ARC_SIMD_BUILTIN_VDIWR      = 1102,
344  ARC_SIMD_BUILTIN_VDOWR      = 1103,
345
346  ARC_SIMD_BUILTIN_VREC       = 1105,
347  ARC_SIMD_BUILTIN_VRUN       = 1106,
348  ARC_SIMD_BUILTIN_VRECRUN    = 1107,
349  ARC_SIMD_BUILTIN_VENDREC    = 1108,
350
351  ARC_SIMD_BUILTIN_VLD32WH    = 1110,
352  ARC_SIMD_BUILTIN_VLD32WL    = 1111,
353  ARC_SIMD_BUILTIN_VLD64      = 1112,
354  ARC_SIMD_BUILTIN_VLD32      = 1113,
355  ARC_SIMD_BUILTIN_VLD64W     = 1114,
356  ARC_SIMD_BUILTIN_VLD128     = 1115,
357  ARC_SIMD_BUILTIN_VST128     = 1116,
358  ARC_SIMD_BUILTIN_VST64      = 1117,
359
360  ARC_SIMD_BUILTIN_VST16_N    = 1120,
361  ARC_SIMD_BUILTIN_VST32_N    = 1121,
362
363  ARC_SIMD_BUILTIN_VINTI      = 1201,
364
365  ARC_SIMD_BUILTIN_END
366};
367
368/* A nop is needed between a 4 byte insn that sets the condition codes and
369   a branch that uses them (the same isn't true for an 8 byte insn that sets
370   the condition codes).  Set by arc_ccfsm_advance.  Used by
371   arc_print_operand.  */
372
373static int get_arc_condition_code (rtx);
374
375static tree arc_handle_interrupt_attribute (tree *, tree, tree, int, bool *);
376
377/* Initialized arc_attribute_table to NULL since arc doesnot have any
378   machine specific supported attributes.  */
379const struct attribute_spec arc_attribute_table[] =
380{
381 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
382      affects_type_identity } */
383  { "interrupt", 1, 1, true, false, false, arc_handle_interrupt_attribute, true },
384  /* Function calls made to this symbol must be done indirectly, because
385     it may lie outside of the 21/25 bit addressing range of a normal function
386     call.  */
387  { "long_call",    0, 0, false, true,  true,  NULL, false },
388  /* Whereas these functions are always known to reside within the 25 bit
389     addressing range of unconditionalized bl.  */
390  { "medium_call",   0, 0, false, true,  true,  NULL, false },
391  /* And these functions are always known to reside within the 21 bit
392     addressing range of blcc.  */
393  { "short_call",   0, 0, false, true,  true,  NULL, false },
394  { NULL, 0, 0, false, false, false, NULL, false }
395};
396static int arc_comp_type_attributes (const_tree, const_tree);
397static void arc_file_start (void);
398static void arc_internal_label (FILE *, const char *, unsigned long);
399static void arc_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
400				 tree);
401static int arc_address_cost (rtx, machine_mode, addr_space_t, bool);
402static void arc_encode_section_info (tree decl, rtx rtl, int first);
403
404static void arc_init_builtins (void);
405static rtx arc_expand_builtin (tree, rtx, rtx, machine_mode, int);
406
407static int branch_dest (rtx);
408
409static void  arc_output_pic_addr_const (FILE *,  rtx, int);
410void emit_pic_move (rtx *, machine_mode);
411bool arc_legitimate_pic_operand_p (rtx);
412static bool arc_function_ok_for_sibcall (tree, tree);
413static rtx arc_function_value (const_tree, const_tree, bool);
414const char * output_shift (rtx *);
415static void arc_reorg (void);
416static bool arc_in_small_data_p (const_tree);
417
418static void arc_init_reg_tables (void);
419static bool arc_return_in_memory (const_tree, const_tree);
420static void arc_init_simd_builtins (void);
421static bool arc_vector_mode_supported_p (machine_mode);
422
423static bool arc_can_use_doloop_p (const widest_int &, const widest_int &,
424				  unsigned int, bool);
425static const char *arc_invalid_within_doloop (const rtx_insn *);
426
427static void output_short_suffix (FILE *file);
428
429static bool arc_frame_pointer_required (void);
430
431static bool arc_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT,
432						unsigned int,
433						enum by_pieces_operation op,
434						bool);
435
436/* Implements target hook vector_mode_supported_p.  */
437
438static bool
439arc_vector_mode_supported_p (machine_mode mode)
440{
441  if (!TARGET_SIMD_SET)
442    return false;
443
444  if ((mode == V4SImode)
445      || (mode == V8HImode))
446    return true;
447
448  return false;
449}
450
451
452/* TARGET_PRESERVE_RELOAD_P is still awaiting patch re-evaluation / review.  */
453static bool arc_preserve_reload_p (rtx in) ATTRIBUTE_UNUSED;
454static rtx arc_delegitimize_address (rtx);
455static bool arc_can_follow_jump (const rtx_insn *follower,
456				 const rtx_insn *followee);
457
458static rtx frame_insn (rtx);
459static void arc_function_arg_advance (cumulative_args_t, machine_mode,
460				      const_tree, bool);
461static rtx arc_legitimize_address_0 (rtx, rtx, machine_mode mode);
462
463static void arc_finalize_pic (void);
464
465/* initialize the GCC target structure.  */
466#undef  TARGET_COMP_TYPE_ATTRIBUTES
467#define TARGET_COMP_TYPE_ATTRIBUTES arc_comp_type_attributes
468#undef TARGET_ASM_FILE_START
469#define TARGET_ASM_FILE_START arc_file_start
470#undef TARGET_ATTRIBUTE_TABLE
471#define TARGET_ATTRIBUTE_TABLE arc_attribute_table
472#undef TARGET_ASM_INTERNAL_LABEL
473#define TARGET_ASM_INTERNAL_LABEL arc_internal_label
474#undef TARGET_RTX_COSTS
475#define TARGET_RTX_COSTS arc_rtx_costs
476#undef TARGET_ADDRESS_COST
477#define TARGET_ADDRESS_COST arc_address_cost
478
479#undef TARGET_ENCODE_SECTION_INFO
480#define TARGET_ENCODE_SECTION_INFO arc_encode_section_info
481
482#undef TARGET_CANNOT_FORCE_CONST_MEM
483#define TARGET_CANNOT_FORCE_CONST_MEM arc_cannot_force_const_mem
484
485#undef  TARGET_INIT_BUILTINS
486#define TARGET_INIT_BUILTINS  arc_init_builtins
487
488#undef  TARGET_EXPAND_BUILTIN
489#define TARGET_EXPAND_BUILTIN arc_expand_builtin
490
491#undef  TARGET_ASM_OUTPUT_MI_THUNK
492#define TARGET_ASM_OUTPUT_MI_THUNK arc_output_mi_thunk
493
494#undef  TARGET_ASM_CAN_OUTPUT_MI_THUNK
495#define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
496
497#undef  TARGET_FUNCTION_OK_FOR_SIBCALL
498#define TARGET_FUNCTION_OK_FOR_SIBCALL arc_function_ok_for_sibcall
499
500#undef  TARGET_MACHINE_DEPENDENT_REORG
501#define TARGET_MACHINE_DEPENDENT_REORG arc_reorg
502
503#undef TARGET_IN_SMALL_DATA_P
504#define TARGET_IN_SMALL_DATA_P arc_in_small_data_p
505
506#undef TARGET_PROMOTE_FUNCTION_MODE
507#define TARGET_PROMOTE_FUNCTION_MODE \
508  default_promote_function_mode_always_promote
509
510#undef TARGET_PROMOTE_PROTOTYPES
511#define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
512
513#undef TARGET_RETURN_IN_MEMORY
514#define TARGET_RETURN_IN_MEMORY arc_return_in_memory
515#undef TARGET_PASS_BY_REFERENCE
516#define TARGET_PASS_BY_REFERENCE arc_pass_by_reference
517
518#undef TARGET_SETUP_INCOMING_VARARGS
519#define TARGET_SETUP_INCOMING_VARARGS arc_setup_incoming_varargs
520
521#undef TARGET_ARG_PARTIAL_BYTES
522#define TARGET_ARG_PARTIAL_BYTES arc_arg_partial_bytes
523
524#undef TARGET_MUST_PASS_IN_STACK
525#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
526
527#undef TARGET_FUNCTION_VALUE
528#define TARGET_FUNCTION_VALUE arc_function_value
529
530#undef  TARGET_SCHED_ADJUST_PRIORITY
531#define TARGET_SCHED_ADJUST_PRIORITY arc_sched_adjust_priority
532
533#undef TARGET_VECTOR_MODE_SUPPORTED_P
534#define TARGET_VECTOR_MODE_SUPPORTED_P arc_vector_mode_supported_p
535
536#undef TARGET_CAN_USE_DOLOOP_P
537#define TARGET_CAN_USE_DOLOOP_P arc_can_use_doloop_p
538
539#undef TARGET_INVALID_WITHIN_DOLOOP
540#define TARGET_INVALID_WITHIN_DOLOOP arc_invalid_within_doloop
541
542#undef TARGET_PRESERVE_RELOAD_P
543#define TARGET_PRESERVE_RELOAD_P arc_preserve_reload_p
544
545#undef TARGET_CAN_FOLLOW_JUMP
546#define TARGET_CAN_FOLLOW_JUMP arc_can_follow_jump
547
548#undef TARGET_DELEGITIMIZE_ADDRESS
549#define TARGET_DELEGITIMIZE_ADDRESS arc_delegitimize_address
550
551#undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
552#define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
553  arc_use_by_pieces_infrastructure_p
554
555/* Usually, we will be able to scale anchor offsets.
556   When this fails, we want LEGITIMIZE_ADDRESS to kick in.  */
557#undef TARGET_MIN_ANCHOR_OFFSET
558#define TARGET_MIN_ANCHOR_OFFSET (-1024)
559#undef TARGET_MAX_ANCHOR_OFFSET
560#define TARGET_MAX_ANCHOR_OFFSET (1020)
561
562#undef TARGET_SECONDARY_RELOAD
563#define TARGET_SECONDARY_RELOAD arc_secondary_reload
564
565#define TARGET_OPTION_OVERRIDE arc_override_options
566
567#define TARGET_CONDITIONAL_REGISTER_USAGE arc_conditional_register_usage
568
569#define TARGET_TRAMPOLINE_INIT arc_initialize_trampoline
570
571#define TARGET_TRAMPOLINE_ADJUST_ADDRESS arc_trampoline_adjust_address
572
573#define TARGET_CAN_ELIMINATE arc_can_eliminate
574
575#define TARGET_FRAME_POINTER_REQUIRED arc_frame_pointer_required
576
577#define TARGET_FUNCTION_ARG arc_function_arg
578
579#define TARGET_FUNCTION_ARG_ADVANCE arc_function_arg_advance
580
581#define TARGET_LEGITIMATE_CONSTANT_P arc_legitimate_constant_p
582
583#define TARGET_LEGITIMATE_ADDRESS_P arc_legitimate_address_p
584
585#define TARGET_MODE_DEPENDENT_ADDRESS_P arc_mode_dependent_address_p
586
587#define TARGET_LEGITIMIZE_ADDRESS arc_legitimize_address
588
589#define TARGET_ADJUST_INSN_LENGTH arc_adjust_insn_length
590
591#define TARGET_INSN_LENGTH_PARAMETERS arc_insn_length_parameters
592
593#undef TARGET_LRA_P
594#define TARGET_LRA_P arc_lra_p
595#define TARGET_REGISTER_PRIORITY arc_register_priority
596/* Stores with scaled offsets have different displacement ranges.  */
597#define TARGET_DIFFERENT_ADDR_DISPLACEMENT_P hook_bool_void_true
598#define TARGET_SPILL_CLASS arc_spill_class
599
600#include "target-def.h"
601
602#undef TARGET_ASM_ALIGNED_HI_OP
603#define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
604#undef TARGET_ASM_ALIGNED_SI_OP
605#define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
606
607/* Try to keep the (mov:DF _, reg) as early as possible so
608   that the d<add/sub/mul>h-lr insns appear together and can
609   use the peephole2 pattern.  */
610
611static int
612arc_sched_adjust_priority (rtx_insn *insn, int priority)
613{
614  rtx set = single_set (insn);
615  if (set
616      && GET_MODE (SET_SRC(set)) == DFmode
617      && GET_CODE (SET_SRC(set)) == REG)
618    {
619      /* Incrementing priority by 20 (empirically derived).  */
620      return priority + 20;
621    }
622
623  return priority;
624}
625
626static reg_class_t
627arc_secondary_reload (bool in_p, rtx x, reg_class_t cl, machine_mode,
628		      secondary_reload_info *)
629{
630  if (cl == DOUBLE_REGS)
631    return GENERAL_REGS;
632
633  /* The loop counter register can be stored, but not loaded directly.  */
634  if ((cl == LPCOUNT_REG || cl == WRITABLE_CORE_REGS)
635      && in_p && MEM_P (x))
636    return GENERAL_REGS;
637  return NO_REGS;
638}
639
640static unsigned arc_ifcvt (void);
641
642namespace {
643
644const pass_data pass_data_arc_ifcvt =
645{
646  RTL_PASS,
647  "arc_ifcvt",				/* name */
648  OPTGROUP_NONE,			/* optinfo_flags */
649  TV_IFCVT2,				/* tv_id */
650  0,					/* properties_required */
651  0,					/* properties_provided */
652  0,					/* properties_destroyed */
653  0,					/* todo_flags_start */
654  TODO_df_finish			/* todo_flags_finish */
655};
656
657class pass_arc_ifcvt : public rtl_opt_pass
658{
659public:
660  pass_arc_ifcvt(gcc::context *ctxt)
661  : rtl_opt_pass(pass_data_arc_ifcvt, ctxt)
662  {}
663
664  /* opt_pass methods: */
665  opt_pass * clone () { return new pass_arc_ifcvt (m_ctxt); }
666  virtual unsigned int execute (function *) { return arc_ifcvt (); }
667};
668
669} // anon namespace
670
671rtl_opt_pass *
672make_pass_arc_ifcvt (gcc::context *ctxt)
673{
674  return new pass_arc_ifcvt (ctxt);
675}
676
677static unsigned arc_predicate_delay_insns (void);
678
679namespace {
680
681const pass_data pass_data_arc_predicate_delay_insns =
682{
683  RTL_PASS,
684  "arc_predicate_delay_insns",		/* name */
685  OPTGROUP_NONE,			/* optinfo_flags */
686  TV_IFCVT2,				/* tv_id */
687  0,					/* properties_required */
688  0,					/* properties_provided */
689  0,					/* properties_destroyed */
690  0,					/* todo_flags_start */
691  TODO_df_finish			/* todo_flags_finish */
692};
693
694class pass_arc_predicate_delay_insns : public rtl_opt_pass
695{
696public:
697  pass_arc_predicate_delay_insns(gcc::context *ctxt)
698  : rtl_opt_pass(pass_data_arc_predicate_delay_insns, ctxt)
699  {}
700
701  /* opt_pass methods: */
702  virtual unsigned int execute (function *)
703    {
704      return arc_predicate_delay_insns ();
705    }
706};
707
708} // anon namespace
709
710rtl_opt_pass *
711make_pass_arc_predicate_delay_insns (gcc::context *ctxt)
712{
713  return new pass_arc_predicate_delay_insns (ctxt);
714}
715
716/* Called by OVERRIDE_OPTIONS to initialize various things.  */
717
718void
719arc_init (void)
720{
721  enum attr_tune tune_dflt = TUNE_NONE;
722
723  if (TARGET_A5)
724    {
725      arc_cpu_string = "A5";
726    }
727  else if (TARGET_ARC600)
728    {
729      arc_cpu_string = "ARC600";
730      tune_dflt = TUNE_ARC600;
731    }
732  else if (TARGET_ARC601)
733    {
734      arc_cpu_string = "ARC601";
735      tune_dflt = TUNE_ARC600;
736    }
737  else if (TARGET_ARC700)
738    {
739      arc_cpu_string = "ARC700";
740      tune_dflt = TUNE_ARC700_4_2_STD;
741    }
742  else
743    gcc_unreachable ();
744  if (arc_tune == TUNE_NONE)
745    arc_tune = tune_dflt;
746  /* Note: arc_multcost is only used in rtx_cost if speed is true.  */
747  if (arc_multcost < 0)
748    switch (arc_tune)
749      {
750      case TUNE_ARC700_4_2_STD:
751	/* latency 7;
752	   max throughput (1 multiply + 4 other insns) / 5 cycles.  */
753	arc_multcost = COSTS_N_INSNS (4);
754	if (TARGET_NOMPY_SET)
755	  arc_multcost = COSTS_N_INSNS (30);
756	break;
757      case TUNE_ARC700_4_2_XMAC:
758	/* latency 5;
759	   max throughput (1 multiply + 2 other insns) / 3 cycles.  */
760	arc_multcost = COSTS_N_INSNS (3);
761	if (TARGET_NOMPY_SET)
762	  arc_multcost = COSTS_N_INSNS (30);
763	break;
764      case TUNE_ARC600:
765	if (TARGET_MUL64_SET)
766	  {
767	    arc_multcost = COSTS_N_INSNS (4);
768	    break;
769	  }
770	/* Fall through.  */
771      default:
772	arc_multcost = COSTS_N_INSNS (30);
773	break;
774      }
775
776  /* Support mul64 generation only for A5 and ARC600.  */
777  if (TARGET_MUL64_SET && TARGET_ARC700)
778      error ("-mmul64 not supported for ARC700");
779
780  /* MPY instructions valid only for ARC700.  */
781  if (TARGET_NOMPY_SET && !TARGET_ARC700)
782      error ("-mno-mpy supported only for ARC700");
783
784  /* mul/mac instructions only for ARC600.  */
785  if (TARGET_MULMAC_32BY16_SET && !(TARGET_ARC600 || TARGET_ARC601))
786      error ("-mmul32x16 supported only for ARC600 or ARC601");
787
788  if (!TARGET_DPFP && TARGET_DPFP_DISABLE_LRSR)
789      error ("-mno-dpfp-lrsr supported only with -mdpfp");
790
791  /* FPX-1. No fast and compact together.  */
792  if ((TARGET_DPFP_FAST_SET && TARGET_DPFP_COMPACT_SET)
793      || (TARGET_SPFP_FAST_SET && TARGET_SPFP_COMPACT_SET))
794    error ("FPX fast and compact options cannot be specified together");
795
796  /* FPX-2. No fast-spfp for arc600 or arc601.  */
797  if (TARGET_SPFP_FAST_SET && (TARGET_ARC600 || TARGET_ARC601))
798    error ("-mspfp_fast not available on ARC600 or ARC601");
799
800  /* FPX-3. No FPX extensions on pre-ARC600 cores.  */
801  if ((TARGET_DPFP || TARGET_SPFP)
802      && !(TARGET_ARC600 || TARGET_ARC601 || TARGET_ARC700))
803    error ("FPX extensions not available on pre-ARC600 cores");
804
805  /* Warn for unimplemented PIC in pre-ARC700 cores, and disable flag_pic.  */
806  if (flag_pic && !TARGET_ARC700)
807    {
808      warning (DK_WARNING, "PIC is not supported for %s. Generating non-PIC code only..", arc_cpu_string);
809      flag_pic = 0;
810    }
811
812  arc_init_reg_tables ();
813
814  /* Initialize array for PRINT_OPERAND_PUNCT_VALID_P.  */
815  memset (arc_punct_chars, 0, sizeof (arc_punct_chars));
816  arc_punct_chars['#'] = 1;
817  arc_punct_chars['*'] = 1;
818  arc_punct_chars['?'] = 1;
819  arc_punct_chars['!'] = 1;
820  arc_punct_chars['^'] = 1;
821  arc_punct_chars['&'] = 1;
822
823  if (optimize > 1 && !TARGET_NO_COND_EXEC)
824    {
825      /* There are two target-independent ifcvt passes, and arc_reorg may do
826	 one or more arc_ifcvt calls.  */
827      opt_pass *pass_arc_ifcvt_4 = make_pass_arc_ifcvt (g);
828      struct register_pass_info arc_ifcvt4_info
829	= { pass_arc_ifcvt_4, "dbr", 1, PASS_POS_INSERT_AFTER };
830      struct register_pass_info arc_ifcvt5_info
831	= { pass_arc_ifcvt_4->clone (), "shorten", 1, PASS_POS_INSERT_BEFORE };
832
833      register_pass (&arc_ifcvt4_info);
834      register_pass (&arc_ifcvt5_info);
835    }
836
837  if (flag_delayed_branch)
838    {
839      opt_pass *pass_arc_predicate_delay_insns
840	= make_pass_arc_predicate_delay_insns (g);
841      struct register_pass_info arc_predicate_delay_info
842	= { pass_arc_predicate_delay_insns, "dbr", 1, PASS_POS_INSERT_AFTER };
843
844      register_pass (&arc_predicate_delay_info);
845    }
846}
847
848/* Check ARC options, generate derived target attributes.  */
849
850static void
851arc_override_options (void)
852{
853  if (arc_cpu == PROCESSOR_NONE)
854    arc_cpu = PROCESSOR_ARC700;
855
856  if (arc_size_opt_level == 3)
857    optimize_size = 1;
858
859  if (flag_pic)
860    target_flags |= MASK_NO_SDATA_SET;
861
862  if (flag_no_common == 255)
863    flag_no_common = !TARGET_NO_SDATA_SET;
864
865  /* TARGET_COMPACT_CASESI needs the "q" register class.  */ \
866  if (TARGET_MIXED_CODE)
867    TARGET_Q_CLASS = 1;
868  if (!TARGET_Q_CLASS)
869    TARGET_COMPACT_CASESI = 0;
870  if (TARGET_COMPACT_CASESI)
871    TARGET_CASE_VECTOR_PC_RELATIVE = 1;
872
873  /* These need to be done at start up.  It's convenient to do them here.  */
874  arc_init ();
875}
876
877/* The condition codes of the ARC, and the inverse function.  */
878/* For short branches, the "c" / "nc" names are not defined in the ARC
879   Programmers manual, so we have to use "lo" / "hs"" instead.  */
880static const char *arc_condition_codes[] =
881{
882  "al", 0, "eq", "ne", "p", "n", "lo", "hs", "v", "nv",
883  "gt", "le", "ge", "lt", "hi", "ls", "pnz", 0
884};
885
886enum arc_cc_code_index
887{
888  ARC_CC_AL, ARC_CC_EQ = ARC_CC_AL+2, ARC_CC_NE, ARC_CC_P, ARC_CC_N,
889  ARC_CC_C,  ARC_CC_NC, ARC_CC_V, ARC_CC_NV,
890  ARC_CC_GT, ARC_CC_LE, ARC_CC_GE, ARC_CC_LT, ARC_CC_HI, ARC_CC_LS, ARC_CC_PNZ,
891  ARC_CC_LO = ARC_CC_C, ARC_CC_HS = ARC_CC_NC
892};
893
894#define ARC_INVERSE_CONDITION_CODE(X)  ((X) ^ 1)
895
896/* Returns the index of the ARC condition code string in
897   `arc_condition_codes'.  COMPARISON should be an rtx like
898   `(eq (...) (...))'.  */
899
900static int
901get_arc_condition_code (rtx comparison)
902{
903  switch (GET_MODE (XEXP (comparison, 0)))
904    {
905    case CCmode:
906    case SImode: /* For BRcc.  */
907      switch (GET_CODE (comparison))
908	{
909	case EQ : return ARC_CC_EQ;
910	case NE : return ARC_CC_NE;
911	case GT : return ARC_CC_GT;
912	case LE : return ARC_CC_LE;
913	case GE : return ARC_CC_GE;
914	case LT : return ARC_CC_LT;
915	case GTU : return ARC_CC_HI;
916	case LEU : return ARC_CC_LS;
917	case LTU : return ARC_CC_LO;
918	case GEU : return ARC_CC_HS;
919	default : gcc_unreachable ();
920	}
921    case CC_ZNmode:
922      switch (GET_CODE (comparison))
923	{
924	case EQ : return ARC_CC_EQ;
925	case NE : return ARC_CC_NE;
926	case GE: return ARC_CC_P;
927	case LT: return ARC_CC_N;
928	case GT : return ARC_CC_PNZ;
929	default : gcc_unreachable ();
930	}
931    case CC_Zmode:
932      switch (GET_CODE (comparison))
933	{
934	case EQ : return ARC_CC_EQ;
935	case NE : return ARC_CC_NE;
936	default : gcc_unreachable ();
937	}
938    case CC_Cmode:
939      switch (GET_CODE (comparison))
940	{
941	case LTU : return ARC_CC_C;
942	case GEU : return ARC_CC_NC;
943	default : gcc_unreachable ();
944	}
945    case CC_FP_GTmode:
946      if (TARGET_ARGONAUT_SET && TARGET_SPFP)
947	switch (GET_CODE (comparison))
948	  {
949	  case GT  : return ARC_CC_N;
950	  case UNLE: return ARC_CC_P;
951	  default : gcc_unreachable ();
952	}
953      else
954	switch (GET_CODE (comparison))
955	  {
956	  case GT   : return ARC_CC_HI;
957	  case UNLE : return ARC_CC_LS;
958	  default : gcc_unreachable ();
959	}
960    case CC_FP_GEmode:
961      /* Same for FPX and non-FPX.  */
962      switch (GET_CODE (comparison))
963	{
964	case GE   : return ARC_CC_HS;
965	case UNLT : return ARC_CC_LO;
966	default : gcc_unreachable ();
967	}
968    case CC_FP_UNEQmode:
969      switch (GET_CODE (comparison))
970	{
971	case UNEQ : return ARC_CC_EQ;
972	case LTGT : return ARC_CC_NE;
973	default : gcc_unreachable ();
974	}
975    case CC_FP_ORDmode:
976      switch (GET_CODE (comparison))
977	{
978	case UNORDERED : return ARC_CC_C;
979	case ORDERED   : return ARC_CC_NC;
980	default : gcc_unreachable ();
981	}
982    case CC_FPXmode:
983      switch (GET_CODE (comparison))
984	{
985	case EQ        : return ARC_CC_EQ;
986	case NE        : return ARC_CC_NE;
987	case UNORDERED : return ARC_CC_C;
988	case ORDERED   : return ARC_CC_NC;
989	case LTGT      : return ARC_CC_HI;
990	case UNEQ      : return ARC_CC_LS;
991	default : gcc_unreachable ();
992	}
993    default : gcc_unreachable ();
994    }
995  /*NOTREACHED*/
996  return (42);
997}
998
999/* Return true if COMPARISON has a short form that can accomodate OFFSET.  */
1000
1001bool
1002arc_short_comparison_p (rtx comparison, int offset)
1003{
1004  gcc_assert (ARC_CC_NC == ARC_CC_HS);
1005  gcc_assert (ARC_CC_C == ARC_CC_LO);
1006  switch (get_arc_condition_code (comparison))
1007    {
1008    case ARC_CC_EQ: case ARC_CC_NE:
1009      return offset >= -512 && offset <= 506;
1010    case ARC_CC_GT: case ARC_CC_LE: case ARC_CC_GE: case ARC_CC_LT:
1011    case ARC_CC_HI: case ARC_CC_LS: case ARC_CC_LO: case ARC_CC_HS:
1012      return offset >= -64 && offset <= 58;
1013    default:
1014      return false;
1015    }
1016}
1017
1018/* Given a comparison code (EQ, NE, etc.) and the first operand of a COMPARE,
1019   return the mode to be used for the comparison.  */
1020
1021machine_mode
1022arc_select_cc_mode (enum rtx_code op, rtx x, rtx y)
1023{
1024  machine_mode mode = GET_MODE (x);
1025  rtx x1;
1026
1027  /* For an operation that sets the condition codes as a side-effect, the
1028     C and V flags is not set as for cmp, so we can only use comparisons where
1029     this doesn't matter.  (For LT and GE we can use "mi" and "pl"
1030     instead.)  */
1031  /* ??? We could use "pnz" for greater than zero, however, we could then
1032     get into trouble because the comparison could not be reversed.  */
1033  if (GET_MODE_CLASS (mode) == MODE_INT
1034      && y == const0_rtx
1035      && (op == EQ || op == NE
1036	  || ((op == LT || op == GE) && GET_MODE_SIZE (GET_MODE (x)) <= 4)))
1037    return CC_ZNmode;
1038
1039  /* add.f for if (a+b) */
1040  if (mode == SImode
1041      && GET_CODE (y) == NEG
1042      && (op == EQ || op == NE))
1043    return CC_ZNmode;
1044
1045  /* Check if this is a test suitable for bxor.f .  */
1046  if (mode == SImode && (op == EQ || op == NE) && CONST_INT_P (y)
1047      && ((INTVAL (y) - 1) & INTVAL (y)) == 0
1048      && INTVAL (y))
1049    return CC_Zmode;
1050
1051  /* Check if this is a test suitable for add / bmsk.f .  */
1052  if (mode == SImode && (op == EQ || op == NE) && CONST_INT_P (y)
1053      && GET_CODE (x) == AND && CONST_INT_P ((x1 = XEXP (x, 1)))
1054      && ((INTVAL (x1) + 1) & INTVAL (x1)) == 0
1055      && (~INTVAL (x1) | INTVAL (y)) < 0
1056      && (~INTVAL (x1) | INTVAL (y)) > -0x800)
1057    return CC_Zmode;
1058
1059  if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
1060      && GET_CODE (x) == PLUS
1061      && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
1062    return CC_Cmode;
1063
1064  if (TARGET_ARGONAUT_SET
1065      && ((mode == SFmode && TARGET_SPFP) || (mode == DFmode && TARGET_DPFP)))
1066    switch (op)
1067      {
1068      case EQ: case NE: case UNEQ: case LTGT: case ORDERED: case UNORDERED:
1069	return CC_FPXmode;
1070      case LT: case UNGE: case GT: case UNLE:
1071	return CC_FP_GTmode;
1072      case LE: case UNGT: case GE: case UNLT:
1073	return CC_FP_GEmode;
1074      default: gcc_unreachable ();
1075      }
1076  else if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_OPTFPE)
1077    switch (op)
1078      {
1079      case EQ: case NE: return CC_Zmode;
1080      case LT: case UNGE:
1081      case GT: case UNLE: return CC_FP_GTmode;
1082      case LE: case UNGT:
1083      case GE: case UNLT: return CC_FP_GEmode;
1084      case UNEQ: case LTGT: return CC_FP_UNEQmode;
1085      case ORDERED: case UNORDERED: return CC_FP_ORDmode;
1086      default: gcc_unreachable ();
1087      }
1088
1089  return CCmode;
1090}
1091
1092/* Vectors to keep interesting information about registers where it can easily
1093   be got.  We use to use the actual mode value as the bit number, but there
1094   is (or may be) more than 32 modes now.  Instead we use two tables: one
1095   indexed by hard register number, and one indexed by mode.  */
1096
1097/* The purpose of arc_mode_class is to shrink the range of modes so that
1098   they all fit (as bit numbers) in a 32-bit word (again).  Each real mode is
1099   mapped into one arc_mode_class mode.  */
1100
1101enum arc_mode_class {
1102  C_MODE,
1103  S_MODE, D_MODE, T_MODE, O_MODE,
1104  SF_MODE, DF_MODE, TF_MODE, OF_MODE,
1105  V_MODE
1106};
1107
1108/* Modes for condition codes.  */
1109#define C_MODES (1 << (int) C_MODE)
1110
1111/* Modes for single-word and smaller quantities.  */
1112#define S_MODES ((1 << (int) S_MODE) | (1 << (int) SF_MODE))
1113
1114/* Modes for double-word and smaller quantities.  */
1115#define D_MODES (S_MODES | (1 << (int) D_MODE) | (1 << DF_MODE))
1116
1117/* Mode for 8-byte DF values only.  */
1118#define DF_MODES (1 << DF_MODE)
1119
1120/* Modes for quad-word and smaller quantities.  */
1121#define T_MODES (D_MODES | (1 << (int) T_MODE) | (1 << (int) TF_MODE))
1122
1123/* Modes for 128-bit vectors.  */
1124#define V_MODES (1 << (int) V_MODE)
1125
1126/* Value is 1 if register/mode pair is acceptable on arc.  */
1127
1128unsigned int arc_hard_regno_mode_ok[] = {
1129  T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, T_MODES,
1130  T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, T_MODES,
1131  T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, D_MODES,
1132  D_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES,
1133
1134  /* ??? Leave these as S_MODES for now.  */
1135  S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES,
1136  DF_MODES, 0, DF_MODES, 0, S_MODES, S_MODES, S_MODES, S_MODES,
1137  S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES,
1138  S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, C_MODES, S_MODES,
1139
1140  V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES,
1141  V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES,
1142  V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES,
1143  V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES,
1144
1145  V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES,
1146  V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES,
1147  V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES,
1148  V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES,
1149
1150  S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES,
1151  S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES
1152};
1153
1154unsigned int arc_mode_class [NUM_MACHINE_MODES];
1155
1156enum reg_class arc_regno_reg_class[FIRST_PSEUDO_REGISTER];
1157
1158enum reg_class
1159arc_preferred_reload_class (rtx, enum reg_class cl)
1160{
1161  if ((cl) == CHEAP_CORE_REGS  || (cl) == WRITABLE_CORE_REGS)
1162    return GENERAL_REGS;
1163  return cl;
1164}
1165
1166/* Initialize the arc_mode_class array.  */
1167
1168static void
1169arc_init_reg_tables (void)
1170{
1171  int i;
1172
1173  for (i = 0; i < NUM_MACHINE_MODES; i++)
1174    {
1175      machine_mode m = (machine_mode) i;
1176
1177      switch (GET_MODE_CLASS (m))
1178	{
1179	case MODE_INT:
1180	case MODE_PARTIAL_INT:
1181	case MODE_COMPLEX_INT:
1182	  if (GET_MODE_SIZE (m) <= 4)
1183	    arc_mode_class[i] = 1 << (int) S_MODE;
1184	  else if (GET_MODE_SIZE (m) == 8)
1185	    arc_mode_class[i] = 1 << (int) D_MODE;
1186	  else if (GET_MODE_SIZE (m) == 16)
1187	    arc_mode_class[i] = 1 << (int) T_MODE;
1188	  else if (GET_MODE_SIZE (m) == 32)
1189	    arc_mode_class[i] = 1 << (int) O_MODE;
1190	  else
1191	    arc_mode_class[i] = 0;
1192	  break;
1193	case MODE_FLOAT:
1194	case MODE_COMPLEX_FLOAT:
1195	  if (GET_MODE_SIZE (m) <= 4)
1196	    arc_mode_class[i] = 1 << (int) SF_MODE;
1197	  else if (GET_MODE_SIZE (m) == 8)
1198	    arc_mode_class[i] = 1 << (int) DF_MODE;
1199	  else if (GET_MODE_SIZE (m) == 16)
1200	    arc_mode_class[i] = 1 << (int) TF_MODE;
1201	  else if (GET_MODE_SIZE (m) == 32)
1202	    arc_mode_class[i] = 1 << (int) OF_MODE;
1203	  else
1204	    arc_mode_class[i] = 0;
1205	  break;
1206	case MODE_VECTOR_INT:
1207	  arc_mode_class [i] = (1<< (int) V_MODE);
1208	  break;
1209	case MODE_CC:
1210	default:
1211	  /* mode_class hasn't been initialized yet for EXTRA_CC_MODES, so
1212	     we must explicitly check for them here.  */
1213	  if (i == (int) CCmode || i == (int) CC_ZNmode || i == (int) CC_Zmode
1214	      || i == (int) CC_Cmode
1215	      || i == CC_FP_GTmode || i == CC_FP_GEmode || i == CC_FP_ORDmode)
1216	    arc_mode_class[i] = 1 << (int) C_MODE;
1217	  else
1218	    arc_mode_class[i] = 0;
1219	  break;
1220	}
1221    }
1222}
1223
1224/* Core registers 56..59 are used for multiply extension options.
1225   The dsp option uses r56 and r57, these are then named acc1 and acc2.
1226   acc1 is the highpart, and acc2 the lowpart, so which register gets which
1227   number depends on endianness.
1228   The mul64 multiplier options use r57 for mlo, r58 for mmid and r59 for mhi.
1229   Because mlo / mhi form a 64 bit value, we use different gcc internal
1230   register numbers to make them form a register pair as the gcc internals
1231   know it.  mmid gets number 57, if still available, and mlo / mhi get
1232   number 58 and 59, depending on endianness.  We use DBX_REGISTER_NUMBER
1233   to map this back.  */
1234  char rname56[5] = "r56";
1235  char rname57[5] = "r57";
1236  char rname58[5] = "r58";
1237  char rname59[5] = "r59";
1238
1239static void
1240arc_conditional_register_usage (void)
1241{
1242  int regno;
1243  int i;
1244  int fix_start = 60, fix_end = 55;
1245
1246  if (TARGET_MUL64_SET)
1247    {
1248      fix_start = 57;
1249      fix_end = 59;
1250
1251      /* We don't provide a name for mmed.  In rtl / assembly resource lists,
1252	 you are supposed to refer to it as mlo & mhi, e.g
1253	 (zero_extract:SI (reg:DI 58) (const_int 32) (16)) .
1254	 In an actual asm instruction, you are of course use mmed.
1255	 The point of avoiding having a separate register for mmed is that
1256	 this way, we don't have to carry clobbers of that reg around in every
1257	 isntruction that modifies mlo and/or mhi.  */
1258      strcpy (rname57, "");
1259      strcpy (rname58, TARGET_BIG_ENDIAN ? "mhi" : "mlo");
1260      strcpy (rname59, TARGET_BIG_ENDIAN ? "mlo" : "mhi");
1261    }
1262  if (TARGET_MULMAC_32BY16_SET)
1263    {
1264      fix_start = 56;
1265      fix_end = fix_end > 57 ? fix_end : 57;
1266      strcpy (rname56, TARGET_BIG_ENDIAN ? "acc1" : "acc2");
1267      strcpy (rname57, TARGET_BIG_ENDIAN ? "acc2" : "acc1");
1268    }
1269  for (regno = fix_start; regno <= fix_end; regno++)
1270    {
1271      if (!fixed_regs[regno])
1272	warning (0, "multiply option implies r%d is fixed", regno);
1273      fixed_regs [regno] = call_used_regs[regno] = 1;
1274    }
1275  if (TARGET_Q_CLASS)
1276    {
1277      reg_alloc_order[2] = 12;
1278      reg_alloc_order[3] = 13;
1279      reg_alloc_order[4] = 14;
1280      reg_alloc_order[5] = 15;
1281      reg_alloc_order[6] = 1;
1282      reg_alloc_order[7] = 0;
1283      reg_alloc_order[8] = 4;
1284      reg_alloc_order[9] = 5;
1285      reg_alloc_order[10] = 6;
1286      reg_alloc_order[11] = 7;
1287      reg_alloc_order[12] = 8;
1288      reg_alloc_order[13] = 9;
1289      reg_alloc_order[14] = 10;
1290      reg_alloc_order[15] = 11;
1291    }
1292  if (TARGET_SIMD_SET)
1293    {
1294      int i;
1295      for (i = ARC_FIRST_SIMD_VR_REG; i <= ARC_LAST_SIMD_VR_REG; i++)
1296	reg_alloc_order [i] = i;
1297      for (i = ARC_FIRST_SIMD_DMA_CONFIG_REG;
1298	   i <= ARC_LAST_SIMD_DMA_CONFIG_REG; i++)
1299	reg_alloc_order [i] = i;
1300    }
1301  /* For Arctangent-A5 / ARC600, lp_count may not be read in an instruction
1302     following immediately after another one setting it to a new value.
1303     There was some discussion on how to enforce scheduling constraints for
1304     processors with missing interlocks on the gcc mailing list:
1305     http://gcc.gnu.org/ml/gcc/2008-05/msg00021.html .
1306     However, we can't actually use this approach, because for ARC the
1307     delay slot scheduling pass is active, which runs after
1308     machine_dependent_reorg.  */
1309  if (TARGET_ARC600)
1310    CLEAR_HARD_REG_BIT (reg_class_contents[SIBCALL_REGS], LP_COUNT);
1311  else if (!TARGET_ARC700)
1312    fixed_regs[LP_COUNT] = 1;
1313  for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
1314    if (!call_used_regs[regno])
1315      CLEAR_HARD_REG_BIT (reg_class_contents[SIBCALL_REGS], regno);
1316  for (regno = 32; regno < 60; regno++)
1317    if (!fixed_regs[regno])
1318      SET_HARD_REG_BIT (reg_class_contents[WRITABLE_CORE_REGS], regno);
1319  if (TARGET_ARC700)
1320    {
1321      for (regno = 32; regno <= 60; regno++)
1322	CLEAR_HARD_REG_BIT (reg_class_contents[CHEAP_CORE_REGS], regno);
1323
1324      /* If they have used -ffixed-lp_count, make sure it takes
1325	 effect.  */
1326      if (fixed_regs[LP_COUNT])
1327	{
1328	  CLEAR_HARD_REG_BIT (reg_class_contents[LPCOUNT_REG], LP_COUNT);
1329	  CLEAR_HARD_REG_BIT (reg_class_contents[SIBCALL_REGS], LP_COUNT);
1330	  CLEAR_HARD_REG_BIT (reg_class_contents[WRITABLE_CORE_REGS], LP_COUNT);
1331
1332	  /* Instead of taking out SF_MODE like below, forbid it outright.  */
1333	  arc_hard_regno_mode_ok[60] = 0;
1334	}
1335      else
1336	arc_hard_regno_mode_ok[60] = 1 << (int) S_MODE;
1337    }
1338
1339  for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
1340    {
1341      if (i < 29)
1342	{
1343	  if (TARGET_Q_CLASS && ((i <= 3) || ((i >= 12) && (i <= 15))))
1344	    arc_regno_reg_class[i] = ARCOMPACT16_REGS;
1345	  else
1346	    arc_regno_reg_class[i] = GENERAL_REGS;
1347	}
1348      else if (i < 60)
1349	arc_regno_reg_class[i]
1350	  = (fixed_regs[i]
1351	     ? (TEST_HARD_REG_BIT (reg_class_contents[CHEAP_CORE_REGS], i)
1352		? CHEAP_CORE_REGS : ALL_CORE_REGS)
1353	     : ((TARGET_ARC700
1354		 && TEST_HARD_REG_BIT (reg_class_contents[CHEAP_CORE_REGS], i))
1355		? CHEAP_CORE_REGS : WRITABLE_CORE_REGS));
1356      else
1357	arc_regno_reg_class[i] = NO_REGS;
1358    }
1359
1360  /* ARCOMPACT16_REGS is empty, if TARGET_Q_CLASS has not been activated.  */
1361  if (!TARGET_Q_CLASS)
1362    {
1363      CLEAR_HARD_REG_SET(reg_class_contents [ARCOMPACT16_REGS]);
1364      CLEAR_HARD_REG_SET(reg_class_contents [AC16_BASE_REGS]);
1365    }
1366
1367  gcc_assert (FIRST_PSEUDO_REGISTER >= 144);
1368
1369  /* Handle Special Registers.  */
1370  arc_regno_reg_class[29] = LINK_REGS; /* ilink1 register.  */
1371  arc_regno_reg_class[30] = LINK_REGS; /* ilink2 register.  */
1372  arc_regno_reg_class[31] = LINK_REGS; /* blink register.  */
1373  arc_regno_reg_class[60] = LPCOUNT_REG;
1374  arc_regno_reg_class[61] = NO_REGS;      /* CC_REG: must be NO_REGS.  */
1375  arc_regno_reg_class[62] = GENERAL_REGS;
1376
1377  if (TARGET_DPFP)
1378    {
1379      for (i = 40; i < 44; ++i)
1380	{
1381	  arc_regno_reg_class[i] = DOUBLE_REGS;
1382
1383	  /* Unless they want us to do 'mov d1, 0x00000000' make sure
1384	     no attempt is made to use such a register as a destination
1385	     operand in *movdf_insn.  */
1386	  if (!TARGET_ARGONAUT_SET)
1387	    {
1388	    /* Make sure no 'c', 'w', 'W', or 'Rac' constraint is
1389	       interpreted to mean they can use D1 or D2 in their insn.  */
1390	    CLEAR_HARD_REG_BIT(reg_class_contents[CHEAP_CORE_REGS       ], i);
1391	    CLEAR_HARD_REG_BIT(reg_class_contents[ALL_CORE_REGS         ], i);
1392	    CLEAR_HARD_REG_BIT(reg_class_contents[WRITABLE_CORE_REGS    ], i);
1393	    CLEAR_HARD_REG_BIT(reg_class_contents[MPY_WRITABLE_CORE_REGS], i);
1394	    }
1395	}
1396    }
1397  else
1398    {
1399      /* Disable all DOUBLE_REGISTER settings,
1400	 if not generating DPFP code.  */
1401      arc_regno_reg_class[40] = ALL_REGS;
1402      arc_regno_reg_class[41] = ALL_REGS;
1403      arc_regno_reg_class[42] = ALL_REGS;
1404      arc_regno_reg_class[43] = ALL_REGS;
1405
1406      arc_hard_regno_mode_ok[40] = 0;
1407      arc_hard_regno_mode_ok[42] = 0;
1408
1409      CLEAR_HARD_REG_SET(reg_class_contents [DOUBLE_REGS]);
1410    }
1411
1412  if (TARGET_SIMD_SET)
1413    {
1414      gcc_assert (ARC_FIRST_SIMD_VR_REG == 64);
1415      gcc_assert (ARC_LAST_SIMD_VR_REG  == 127);
1416
1417      for (i = ARC_FIRST_SIMD_VR_REG; i <= ARC_LAST_SIMD_VR_REG; i++)
1418	arc_regno_reg_class [i] =  SIMD_VR_REGS;
1419
1420      gcc_assert (ARC_FIRST_SIMD_DMA_CONFIG_REG == 128);
1421      gcc_assert (ARC_FIRST_SIMD_DMA_CONFIG_IN_REG == 128);
1422      gcc_assert (ARC_FIRST_SIMD_DMA_CONFIG_OUT_REG == 136);
1423      gcc_assert (ARC_LAST_SIMD_DMA_CONFIG_REG  == 143);
1424
1425      for (i = ARC_FIRST_SIMD_DMA_CONFIG_REG;
1426	   i <= ARC_LAST_SIMD_DMA_CONFIG_REG; i++)
1427	arc_regno_reg_class [i] =  SIMD_DMA_CONFIG_REGS;
1428    }
1429
1430  /* pc : r63 */
1431  arc_regno_reg_class[PROGRAM_COUNTER_REGNO] = GENERAL_REGS;
1432}
1433
1434/* Handle an "interrupt" attribute; arguments as in
1435   struct attribute_spec.handler.  */
1436
1437static tree
1438arc_handle_interrupt_attribute (tree *, tree name, tree args, int,
1439				bool *no_add_attrs)
1440{
1441  gcc_assert (args);
1442
1443  tree value = TREE_VALUE (args);
1444
1445  if (TREE_CODE (value) != STRING_CST)
1446    {
1447      warning (OPT_Wattributes,
1448	       "argument of %qE attribute is not a string constant",
1449	       name);
1450      *no_add_attrs = true;
1451    }
1452  else if (strcmp (TREE_STRING_POINTER (value), "ilink1")
1453	   && strcmp (TREE_STRING_POINTER (value), "ilink2"))
1454    {
1455      warning (OPT_Wattributes,
1456	       "argument of %qE attribute is not \"ilink1\" or \"ilink2\"",
1457	       name);
1458      *no_add_attrs = true;
1459    }
1460  return NULL_TREE;
1461}
1462
1463/* Return zero if TYPE1 and TYPE are incompatible, one if they are compatible,
1464   and two if they are nearly compatible (which causes a warning to be
1465   generated).  */
1466
1467static int
1468arc_comp_type_attributes (const_tree type1,
1469			  const_tree type2)
1470{
1471  int l1, l2, m1, m2, s1, s2;
1472
1473  /* Check for mismatch of non-default calling convention.  */
1474  if (TREE_CODE (type1) != FUNCTION_TYPE)
1475    return 1;
1476
1477  /* Check for mismatched call attributes.  */
1478  l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
1479  l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
1480  m1 = lookup_attribute ("medium_call", TYPE_ATTRIBUTES (type1)) != NULL;
1481  m2 = lookup_attribute ("medium_call", TYPE_ATTRIBUTES (type2)) != NULL;
1482  s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
1483  s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
1484
1485  /* Only bother to check if an attribute is defined.  */
1486  if (l1 | l2 | m1 | m2 | s1 | s2)
1487    {
1488      /* If one type has an attribute, the other must have the same attribute.  */
1489      if ((l1 != l2) || (m1 != m2) || (s1 != s2))
1490	return 0;
1491
1492      /* Disallow mixed attributes.  */
1493      if (l1 + m1 + s1 > 1)
1494	return 0;
1495    }
1496
1497
1498  return 1;
1499}
1500
1501/* Set the default attributes for TYPE.  */
1502
1503void
1504arc_set_default_type_attributes (tree type ATTRIBUTE_UNUSED)
1505{
1506  gcc_unreachable();
1507}
1508
1509/* Misc. utilities.  */
1510
1511/* X and Y are two things to compare using CODE.  Emit the compare insn and
1512   return the rtx for the cc reg in the proper mode.  */
1513
1514rtx
1515gen_compare_reg (rtx comparison, machine_mode omode)
1516{
1517  enum rtx_code code = GET_CODE (comparison);
1518  rtx x = XEXP (comparison, 0);
1519  rtx y = XEXP (comparison, 1);
1520  rtx tmp, cc_reg;
1521  machine_mode mode, cmode;
1522
1523
1524  cmode = GET_MODE (x);
1525  if (cmode == VOIDmode)
1526    cmode = GET_MODE (y);
1527  gcc_assert (cmode == SImode || cmode == SFmode || cmode == DFmode);
1528  if (cmode == SImode)
1529    {
1530      if (!register_operand (x, SImode))
1531	{
1532	  if (register_operand (y, SImode))
1533	    {
1534	      tmp = x;
1535	      x = y;
1536	      y = tmp;
1537	      code = swap_condition (code);
1538	    }
1539	  else
1540	    x = copy_to_mode_reg (SImode, x);
1541	}
1542      if (GET_CODE (y) == SYMBOL_REF && flag_pic)
1543	y = copy_to_mode_reg (SImode, y);
1544    }
1545  else
1546    {
1547      x = force_reg (cmode, x);
1548      y = force_reg (cmode, y);
1549    }
1550  mode = SELECT_CC_MODE (code, x, y);
1551
1552  cc_reg = gen_rtx_REG (mode, CC_REG);
1553
1554  /* ??? FIXME (x-y)==0, as done by both cmpsfpx_raw and
1555     cmpdfpx_raw, is not a correct comparison for floats:
1556        http://www.cygnus-software.com/papers/comparingfloats/comparingfloats.htm
1557   */
1558  if (TARGET_ARGONAUT_SET
1559      && ((cmode == SFmode && TARGET_SPFP) || (cmode == DFmode && TARGET_DPFP)))
1560    {
1561      switch (code)
1562	{
1563	case NE: case EQ: case LT: case UNGE: case LE: case UNGT:
1564	case UNEQ: case LTGT: case ORDERED: case UNORDERED:
1565	  break;
1566	case GT: case UNLE: case GE: case UNLT:
1567	  code = swap_condition (code);
1568	  tmp = x;
1569	  x = y;
1570	  y = tmp;
1571	  break;
1572	default:
1573	  gcc_unreachable ();
1574	}
1575      if (cmode == SFmode)
1576      {
1577	emit_insn (gen_cmpsfpx_raw (x, y));
1578      }
1579      else /* DFmode */
1580      {
1581	/* Accepts Dx regs directly by insns.  */
1582	emit_insn (gen_cmpdfpx_raw (x, y));
1583      }
1584
1585      if (mode != CC_FPXmode)
1586	emit_insn (gen_rtx_SET (VOIDmode, cc_reg,
1587				gen_rtx_COMPARE (mode,
1588						 gen_rtx_REG (CC_FPXmode, 61),
1589						 const0_rtx)));
1590    }
1591  else if (GET_MODE_CLASS (cmode) == MODE_FLOAT && TARGET_OPTFPE)
1592    {
1593      rtx op0 = gen_rtx_REG (cmode, 0);
1594      rtx op1 = gen_rtx_REG (cmode, GET_MODE_SIZE (cmode) / UNITS_PER_WORD);
1595
1596      switch (code)
1597	{
1598	case NE: case EQ: case GT: case UNLE: case GE: case UNLT:
1599	case UNEQ: case LTGT: case ORDERED: case UNORDERED:
1600	  break;
1601	case LT: case UNGE: case LE: case UNGT:
1602	  code = swap_condition (code);
1603	  tmp = x;
1604	  x = y;
1605	  y = tmp;
1606	  break;
1607	default:
1608	  gcc_unreachable ();
1609	}
1610      if (currently_expanding_to_rtl)
1611	{
1612	  emit_move_insn (op0, x);
1613	  emit_move_insn (op1, y);
1614	}
1615      else
1616	{
1617	  gcc_assert (rtx_equal_p (op0, x));
1618	  gcc_assert (rtx_equal_p (op1, y));
1619	}
1620      emit_insn (gen_cmp_float (cc_reg, gen_rtx_COMPARE (mode, op0, op1)));
1621    }
1622  else
1623    emit_insn (gen_rtx_SET (omode, cc_reg,
1624			    gen_rtx_COMPARE (mode, x, y)));
1625  return gen_rtx_fmt_ee (code, omode, cc_reg, const0_rtx);
1626}
1627
1628/* Return true if VALUE, a const_double, will fit in a limm (4 byte number).
1629   We assume the value can be either signed or unsigned.  */
1630
1631bool
1632arc_double_limm_p (rtx value)
1633{
1634  HOST_WIDE_INT low, high;
1635
1636  gcc_assert (GET_CODE (value) == CONST_DOUBLE);
1637
1638  if (TARGET_DPFP)
1639    return true;
1640
1641  low = CONST_DOUBLE_LOW (value);
1642  high = CONST_DOUBLE_HIGH (value);
1643
1644  if (low & 0x80000000)
1645    {
1646      return (((unsigned HOST_WIDE_INT) low <= 0xffffffff && high == 0)
1647	      || (((low & - (unsigned HOST_WIDE_INT) 0x80000000)
1648		   == - (unsigned HOST_WIDE_INT) 0x80000000)
1649		  && high == -1));
1650    }
1651  else
1652    {
1653      return (unsigned HOST_WIDE_INT) low <= 0x7fffffff && high == 0;
1654    }
1655}
1656
1657/* Do any needed setup for a variadic function.  For the ARC, we must
1658   create a register parameter block, and then copy any anonymous arguments
1659   in registers to memory.
1660
1661   CUM has not been updated for the last named argument which has type TYPE
1662   and mode MODE, and we rely on this fact.  */
1663
1664static void
1665arc_setup_incoming_varargs (cumulative_args_t args_so_far,
1666			    machine_mode mode, tree type,
1667			    int *pretend_size, int no_rtl)
1668{
1669  int first_anon_arg;
1670  CUMULATIVE_ARGS next_cum;
1671
1672  /* We must treat `__builtin_va_alist' as an anonymous arg.  */
1673
1674  next_cum = *get_cumulative_args (args_so_far);
1675  arc_function_arg_advance (pack_cumulative_args (&next_cum), mode, type, 1);
1676  first_anon_arg = next_cum;
1677
1678  if (first_anon_arg < MAX_ARC_PARM_REGS)
1679    {
1680      /* First anonymous (unnamed) argument is in a reg.  */
1681
1682      /* Note that first_reg_offset < MAX_ARC_PARM_REGS.  */
1683      int first_reg_offset = first_anon_arg;
1684
1685      if (!no_rtl)
1686	{
1687	  rtx regblock
1688	    = gen_rtx_MEM (BLKmode, plus_constant (Pmode, arg_pointer_rtx,
1689			   FIRST_PARM_OFFSET (0)));
1690	  move_block_from_reg (first_reg_offset, regblock,
1691			       MAX_ARC_PARM_REGS - first_reg_offset);
1692	}
1693
1694      *pretend_size
1695	= ((MAX_ARC_PARM_REGS - first_reg_offset ) * UNITS_PER_WORD);
1696    }
1697}
1698
1699/* Cost functions.  */
1700
1701/* Provide the costs of an addressing mode that contains ADDR.
1702   If ADDR is not a valid address, its cost is irrelevant.  */
1703
1704int
1705arc_address_cost (rtx addr, machine_mode, addr_space_t, bool speed)
1706{
1707  switch (GET_CODE (addr))
1708    {
1709    case REG :
1710      return speed || satisfies_constraint_Rcq (addr) ? 0 : 1;
1711    case PRE_INC: case PRE_DEC: case POST_INC: case POST_DEC:
1712    case PRE_MODIFY: case POST_MODIFY:
1713      return !speed;
1714
1715    case LABEL_REF :
1716    case SYMBOL_REF :
1717    case CONST :
1718      /* Most likely needs a LIMM.  */
1719      return COSTS_N_INSNS (1);
1720
1721    case PLUS :
1722      {
1723	register rtx plus0 = XEXP (addr, 0);
1724	register rtx plus1 = XEXP (addr, 1);
1725
1726	if (GET_CODE (plus0) != REG
1727	    && (GET_CODE (plus0) != MULT
1728		|| !CONST_INT_P (XEXP (plus0, 1))
1729		|| (INTVAL (XEXP (plus0, 1)) != 2
1730		    && INTVAL (XEXP (plus0, 1)) != 4)))
1731	  break;
1732
1733	switch (GET_CODE (plus1))
1734	  {
1735	  case CONST_INT :
1736	    return (!RTX_OK_FOR_OFFSET_P (SImode, plus1)
1737		    ? COSTS_N_INSNS (1)
1738		    : speed
1739		    ? 0
1740		    : (satisfies_constraint_Rcq (plus0)
1741		       && satisfies_constraint_O (plus1))
1742		    ? 0
1743		    : 1);
1744	  case REG:
1745	    return (speed < 1 ? 0
1746		    : (satisfies_constraint_Rcq (plus0)
1747		       && satisfies_constraint_Rcq (plus1))
1748		    ? 0 : 1);
1749	  case CONST :
1750	  case SYMBOL_REF :
1751	  case LABEL_REF :
1752	    return COSTS_N_INSNS (1);
1753	  default:
1754	    break;
1755	  }
1756	break;
1757      }
1758    default:
1759      break;
1760    }
1761
1762  return 4;
1763}
1764
1765/* Emit instruction X with the frame related bit set.  */
1766
1767static rtx
1768frame_insn (rtx x)
1769{
1770  x = emit_insn (x);
1771  RTX_FRAME_RELATED_P (x) = 1;
1772  return x;
1773}
1774
1775/* Emit a frame insn to move SRC to DST.  */
1776
1777static rtx
1778frame_move (rtx dst, rtx src)
1779{
1780  return frame_insn (gen_rtx_SET (VOIDmode, dst, src));
1781}
1782
1783/* Like frame_move, but add a REG_INC note for REG if ADDR contains an
1784   auto increment address, or is zero.  */
1785
1786static rtx
1787frame_move_inc (rtx dst, rtx src, rtx reg, rtx addr)
1788{
1789  rtx insn = frame_move (dst, src);
1790
1791  if (!addr
1792      || GET_CODE (addr) == PRE_DEC || GET_CODE (addr) == POST_INC
1793      || GET_CODE (addr) == PRE_MODIFY || GET_CODE (addr) == POST_MODIFY)
1794    add_reg_note (insn, REG_INC, reg);
1795  return insn;
1796}
1797
1798/* Emit a frame insn which adjusts a frame address register REG by OFFSET.  */
1799
1800static rtx
1801frame_add (rtx reg, HOST_WIDE_INT offset)
1802{
1803  gcc_assert ((offset & 0x3) == 0);
1804  if (!offset)
1805    return NULL_RTX;
1806  return frame_move (reg, plus_constant (Pmode, reg, offset));
1807}
1808
1809/* Emit a frame insn which adjusts stack pointer by OFFSET.  */
1810
1811static rtx
1812frame_stack_add (HOST_WIDE_INT offset)
1813{
1814  return frame_add (stack_pointer_rtx, offset);
1815}
1816
1817/* Traditionally, we push saved registers first in the prologue,
1818   then we allocate the rest of the frame - and reverse in the epilogue.
1819   This has still its merits for ease of debugging, or saving code size
1820   or even execution time if the stack frame is so large that some accesses
1821   can't be encoded anymore with offsets in the instruction code when using
1822   a different scheme.
1823   Also, it would be a good starting point if we got instructions to help
1824   with register save/restore.
1825
1826   However, often stack frames are small, and the pushing / popping has
1827   some costs:
1828   - the stack modification prevents a lot of scheduling.
1829   - frame allocation / deallocation needs extra instructions.
1830   - unless we know that we compile ARC700 user code, we need to put
1831     a memory barrier after frame allocation / before deallocation to
1832     prevent interrupts clobbering our data in the frame.
1833     In particular, we don't have any such guarantees for library functions,
1834     which tend to, on the other hand, to have small frames.
1835
1836   Thus, for small frames, we'd like to use a different scheme:
1837   - The frame is allocated in full with the first prologue instruction,
1838     and deallocated in full with the last epilogue instruction.
1839     Thus, the instructions in-betwen can be freely scheduled.
1840   - If the function has no outgoing arguments on the stack, we can allocate
1841     one register save slot at the top of the stack.  This register can then
1842     be saved simultanously with frame allocation, and restored with
1843     frame deallocation.
1844     This register can be picked depending on scheduling considerations,
1845     although same though should go into having some set of registers
1846     to be potentially lingering after a call, and others to be available
1847     immediately - i.e. in the absence of interprocedual optimization, we
1848     can use an ABI-like convention for register allocation to reduce
1849     stalls after function return.  */
1850/* Function prologue/epilogue handlers.  */
1851
1852/* ARCompact stack frames look like:
1853
1854           Before call                     After call
1855  high  +-----------------------+       +-----------------------+
1856  mem   |  reg parm save area   |       | reg parm save area    |
1857        |  only created for     |       | only created for      |
1858        |  variable arg fns     |       | variable arg fns      |
1859    AP  +-----------------------+       +-----------------------+
1860        |  return addr register |       | return addr register  |
1861        |  (if required)        |       | (if required)         |
1862        +-----------------------+       +-----------------------+
1863        |                       |       |                       |
1864        |  reg save area        |       | reg save area         |
1865        |                       |       |                       |
1866        +-----------------------+       +-----------------------+
1867        |  frame pointer        |       | frame pointer         |
1868        |  (if required)        |       | (if required)         |
1869    FP  +-----------------------+       +-----------------------+
1870        |                       |       |                       |
1871        |  local/temp variables |       | local/temp variables  |
1872        |                       |       |                       |
1873        +-----------------------+       +-----------------------+
1874        |                       |       |                       |
1875        |  arguments on stack   |       | arguments on stack    |
1876        |                       |       |                       |
1877    SP  +-----------------------+       +-----------------------+
1878                                        | reg parm save area    |
1879                                        | only created for      |
1880                                        | variable arg fns      |
1881                                    AP  +-----------------------+
1882                                        | return addr register  |
1883                                        | (if required)         |
1884                                        +-----------------------+
1885                                        |                       |
1886                                        | reg save area         |
1887                                        |                       |
1888                                        +-----------------------+
1889                                        | frame pointer         |
1890                                        | (if required)         |
1891                                    FP  +-----------------------+
1892                                        |                       |
1893                                        | local/temp variables  |
1894                                        |                       |
1895                                        +-----------------------+
1896                                        |                       |
1897                                        | arguments on stack    |
1898  low                                   |                       |
1899  mem                               SP  +-----------------------+
1900
1901Notes:
19021) The "reg parm save area" does not exist for non variable argument fns.
1903   The "reg parm save area" can be eliminated completely if we created our
1904   own va-arc.h, but that has tradeoffs as well (so it's not done).  */
1905
1906/* Structure to be filled in by arc_compute_frame_size with register
1907   save masks, and offsets for the current function.  */
1908struct GTY (()) arc_frame_info
1909{
1910  unsigned int total_size;	/* # bytes that the entire frame takes up.  */
1911  unsigned int extra_size;	/* # bytes of extra stuff.  */
1912  unsigned int pretend_size;	/* # bytes we push and pretend caller did.  */
1913  unsigned int args_size;	/* # bytes that outgoing arguments take up.  */
1914  unsigned int reg_size;	/* # bytes needed to store regs.  */
1915  unsigned int var_size;	/* # bytes that variables take up.  */
1916  unsigned int reg_offset;	/* Offset from new sp to store regs.  */
1917  unsigned int gmask;		/* Mask of saved gp registers.  */
1918  int          initialized;	/* Nonzero if frame size already calculated.  */
1919  short millicode_start_reg;
1920  short millicode_end_reg;
1921  bool save_return_addr;
1922};
1923
1924/* Defining data structures for per-function information.  */
1925
1926typedef struct GTY (()) machine_function
1927{
1928  enum arc_function_type fn_type;
1929  struct arc_frame_info frame_info;
1930  /* To keep track of unalignment caused by short insns.  */
1931  int unalign;
1932  int force_short_suffix; /* Used when disgorging return delay slot insns.  */
1933  const char *size_reason;
1934  struct arc_ccfsm ccfsm_current;
1935  /* Map from uid to ccfsm state during branch shortening.  */
1936  rtx ccfsm_current_insn;
1937  char arc_reorg_started;
1938  char prescan_initialized;
1939} machine_function;
1940
1941/* Type of function DECL.
1942
1943   The result is cached.  To reset the cache at the end of a function,
1944   call with DECL = NULL_TREE.  */
1945
1946enum arc_function_type
1947arc_compute_function_type (struct function *fun)
1948{
1949  tree decl = fun->decl;
1950  tree a;
1951  enum arc_function_type fn_type = fun->machine->fn_type;
1952
1953  if (fn_type != ARC_FUNCTION_UNKNOWN)
1954    return fn_type;
1955
1956  /* Assume we have a normal function (not an interrupt handler).  */
1957  fn_type = ARC_FUNCTION_NORMAL;
1958
1959  /* Now see if this is an interrupt handler.  */
1960  for (a = DECL_ATTRIBUTES (decl);
1961       a;
1962       a = TREE_CHAIN (a))
1963    {
1964      tree name = TREE_PURPOSE (a), args = TREE_VALUE (a);
1965
1966      if (name == get_identifier ("interrupt")
1967	  && list_length (args) == 1
1968	  && TREE_CODE (TREE_VALUE (args)) == STRING_CST)
1969	{
1970	  tree value = TREE_VALUE (args);
1971
1972	  if (!strcmp (TREE_STRING_POINTER (value), "ilink1"))
1973	    fn_type = ARC_FUNCTION_ILINK1;
1974	  else if (!strcmp (TREE_STRING_POINTER (value), "ilink2"))
1975	    fn_type = ARC_FUNCTION_ILINK2;
1976	  else
1977	    gcc_unreachable ();
1978	  break;
1979	}
1980    }
1981
1982  return fun->machine->fn_type = fn_type;
1983}
1984
1985#define FRAME_POINTER_MASK (1 << (FRAME_POINTER_REGNUM))
1986#define RETURN_ADDR_MASK (1 << (RETURN_ADDR_REGNUM))
1987
1988/* Tell prologue and epilogue if register REGNO should be saved / restored.
1989   The return address and frame pointer are treated separately.
1990   Don't consider them here.
1991   Addition for pic: The gp register needs to be saved if the current
1992   function changes it to access gotoff variables.
1993   FIXME: This will not be needed if we used some arbitrary register
1994   instead of r26.
1995*/
1996#define MUST_SAVE_REGISTER(regno, interrupt_p) \
1997(((regno) != RETURN_ADDR_REGNUM && (regno) != FRAME_POINTER_REGNUM \
1998  && (df_regs_ever_live_p (regno) && (!call_used_regs[regno] || interrupt_p))) \
1999 || (flag_pic && crtl->uses_pic_offset_table \
2000     && regno == PIC_OFFSET_TABLE_REGNUM) )
2001
2002#define MUST_SAVE_RETURN_ADDR \
2003  (cfun->machine->frame_info.save_return_addr)
2004
2005/* Return non-zero if there are registers to be saved or loaded using
2006   millicode thunks.  We can only use consecutive sequences starting
2007   with r13, and not going beyond r25.
2008   GMASK is a bitmask of registers to save.  This function sets
2009   FRAME->millicod_start_reg .. FRAME->millicode_end_reg to the range
2010   of registers to be saved / restored with a millicode call.  */
2011
2012static int
2013arc_compute_millicode_save_restore_regs (unsigned int gmask,
2014					 struct arc_frame_info *frame)
2015{
2016  int regno;
2017
2018  int start_reg = 13, end_reg = 25;
2019
2020  for (regno = start_reg; regno <= end_reg && (gmask & (1L << regno));)
2021    regno++;
2022  end_reg = regno - 1;
2023  /* There is no point in using millicode thunks if we don't save/restore
2024     at least three registers.  For non-leaf functions we also have the
2025     blink restore.  */
2026  if (regno - start_reg >= 3 - (crtl->is_leaf == 0))
2027    {
2028      frame->millicode_start_reg = 13;
2029      frame->millicode_end_reg = regno - 1;
2030      return 1;
2031    }
2032  return 0;
2033}
2034
2035/* Return the bytes needed to compute the frame pointer from the current
2036   stack pointer.
2037
2038   SIZE is the size needed for local variables.  */
2039
2040unsigned int
2041arc_compute_frame_size (int size)	/* size = # of var. bytes allocated.  */
2042{
2043  int regno;
2044  unsigned int total_size, var_size, args_size, pretend_size, extra_size;
2045  unsigned int reg_size, reg_offset;
2046  unsigned int gmask;
2047  enum arc_function_type fn_type;
2048  int interrupt_p;
2049  struct arc_frame_info *frame_info = &cfun->machine->frame_info;
2050
2051  size = ARC_STACK_ALIGN (size);
2052
2053  /* 1) Size of locals and temporaries */
2054  var_size	= size;
2055
2056  /* 2) Size of outgoing arguments */
2057  args_size	= crtl->outgoing_args_size;
2058
2059  /* 3) Calculate space needed for saved registers.
2060     ??? We ignore the extension registers for now.  */
2061
2062  /* See if this is an interrupt handler.  Call used registers must be saved
2063     for them too.  */
2064
2065  reg_size = 0;
2066  gmask = 0;
2067  fn_type = arc_compute_function_type (cfun);
2068  interrupt_p = ARC_INTERRUPT_P (fn_type);
2069
2070  for (regno = 0; regno <= 31; regno++)
2071    {
2072      if (MUST_SAVE_REGISTER (regno, interrupt_p))
2073	{
2074	  reg_size += UNITS_PER_WORD;
2075	  gmask |= 1 << regno;
2076	}
2077    }
2078
2079  /* 4) Space for back trace data structure.
2080	<return addr reg size> (if required) + <fp size> (if required).  */
2081  frame_info->save_return_addr
2082    = (!crtl->is_leaf || df_regs_ever_live_p (RETURN_ADDR_REGNUM));
2083  /* Saving blink reg in case of leaf function for millicode thunk calls.  */
2084  if (optimize_size && !TARGET_NO_MILLICODE_THUNK_SET)
2085    {
2086      if (arc_compute_millicode_save_restore_regs (gmask, frame_info))
2087	frame_info->save_return_addr = true;
2088    }
2089
2090  extra_size = 0;
2091  if (MUST_SAVE_RETURN_ADDR)
2092    extra_size = 4;
2093  if (frame_pointer_needed)
2094    extra_size += 4;
2095
2096  /* 5) Space for variable arguments passed in registers */
2097  pretend_size	= crtl->args.pretend_args_size;
2098
2099  /* Ensure everything before the locals is aligned appropriately.  */
2100    {
2101       unsigned int extra_plus_reg_size;
2102       unsigned int extra_plus_reg_size_aligned;
2103
2104       extra_plus_reg_size = extra_size + reg_size;
2105       extra_plus_reg_size_aligned = ARC_STACK_ALIGN(extra_plus_reg_size);
2106       reg_size = extra_plus_reg_size_aligned - extra_size;
2107    }
2108
2109  /* Compute total frame size.  */
2110  total_size = var_size + args_size + extra_size + pretend_size + reg_size;
2111
2112  total_size = ARC_STACK_ALIGN (total_size);
2113
2114  /* Compute offset of register save area from stack pointer:
2115     A5 Frame: pretend_size <blink> reg_size <fp> var_size args_size <--sp
2116  */
2117  reg_offset = (total_size - (pretend_size + reg_size + extra_size)
2118		+ (frame_pointer_needed ? 4 : 0));
2119
2120  /* Save computed information.  */
2121  frame_info->total_size   = total_size;
2122  frame_info->extra_size   = extra_size;
2123  frame_info->pretend_size = pretend_size;
2124  frame_info->var_size     = var_size;
2125  frame_info->args_size    = args_size;
2126  frame_info->reg_size     = reg_size;
2127  frame_info->reg_offset   = reg_offset;
2128  frame_info->gmask        = gmask;
2129  frame_info->initialized  = reload_completed;
2130
2131  /* Ok, we're done.  */
2132  return total_size;
2133}
2134
2135/* Common code to save/restore registers.  */
2136/* BASE_REG is the base register to use for addressing and to adjust.
2137   GMASK is a bitmask of general purpose registers to save/restore.
2138   epilogue_p 0: prologue 1:epilogue 2:epilogue, sibling thunk
2139   If *FIRST_OFFSET is non-zero, add it first to BASE_REG - preferably
2140   using a pre-modify for the first memory access.  *FIRST_OFFSET is then
2141   zeroed.  */
2142
2143static void
2144arc_save_restore (rtx base_reg,
2145		  unsigned int gmask, int epilogue_p, int *first_offset)
2146{
2147  unsigned int offset = 0;
2148  int regno;
2149  struct arc_frame_info *frame = &cfun->machine->frame_info;
2150  rtx sibthunk_insn = NULL_RTX;
2151
2152  if (gmask)
2153    {
2154      /* Millicode thunks implementation:
2155	 Generates calls to millicodes for registers starting from r13 to r25
2156	 Present Limitations:
2157	 - Only one range supported. The remaining regs will have the ordinary
2158	   st and ld instructions for store and loads. Hence a gmask asking
2159	   to store r13-14, r16-r25 will only generate calls to store and
2160	   load r13 to r14 while store and load insns will be generated for
2161	   r16 to r25 in the prologue and epilogue respectively.
2162
2163	 - Presently library only supports register ranges starting from r13.
2164      */
2165      if (epilogue_p == 2 || frame->millicode_end_reg > 14)
2166	{
2167	  int start_call = frame->millicode_start_reg;
2168	  int end_call = frame->millicode_end_reg;
2169	  int n_regs = end_call - start_call + 1;
2170	  int i = 0, r, off = 0;
2171	  rtx insn;
2172	  rtx ret_addr = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
2173
2174	  if (*first_offset)
2175	    {
2176	      /* "reg_size" won't be more than 127 .  */
2177	      gcc_assert (epilogue_p || abs (*first_offset) <= 127);
2178	      frame_add (base_reg, *first_offset);
2179	      *first_offset = 0;
2180	    }
2181	  insn = gen_rtx_PARALLEL
2182		  (VOIDmode, rtvec_alloc ((epilogue_p == 2) + n_regs + 1));
2183	  if (epilogue_p == 2)
2184	    i += 2;
2185	  else
2186	    XVECEXP (insn, 0, n_regs) = gen_rtx_CLOBBER (VOIDmode, ret_addr);
2187	  for (r = start_call; r <= end_call; r++, off += UNITS_PER_WORD, i++)
2188	    {
2189	      rtx reg = gen_rtx_REG (SImode, r);
2190	      rtx mem
2191		= gen_frame_mem (SImode, plus_constant (Pmode, base_reg, off));
2192
2193	      if (epilogue_p)
2194		XVECEXP (insn, 0, i) = gen_rtx_SET (VOIDmode, reg, mem);
2195	      else
2196		XVECEXP (insn, 0, i) = gen_rtx_SET (VOIDmode, mem, reg);
2197	      gmask = gmask & ~(1L << r);
2198	    }
2199	  if (epilogue_p == 2)
2200	    sibthunk_insn = insn;
2201	  else
2202	    frame_insn (insn);
2203	  offset += off;
2204	}
2205
2206      for (regno = 0; regno <= 31; regno++)
2207	{
2208	  if ((gmask & (1L << regno)) != 0)
2209	    {
2210	      rtx reg = gen_rtx_REG (SImode, regno);
2211	      rtx addr, mem;
2212
2213	      if (*first_offset)
2214		{
2215		  gcc_assert (!offset);
2216		  addr = plus_constant (Pmode, base_reg, *first_offset);
2217		  addr = gen_rtx_PRE_MODIFY (Pmode, base_reg, addr);
2218		  *first_offset = 0;
2219		}
2220	      else
2221		{
2222		  gcc_assert (SMALL_INT (offset));
2223		  addr = plus_constant (Pmode, base_reg, offset);
2224		}
2225	      mem = gen_frame_mem (SImode, addr);
2226	      if (epilogue_p)
2227		frame_move_inc (reg, mem, base_reg, addr);
2228	      else
2229		frame_move_inc (mem, reg, base_reg, addr);
2230	      offset += UNITS_PER_WORD;
2231	    } /* if */
2232	} /* for */
2233    }/* if */
2234  if (sibthunk_insn)
2235    {
2236      rtx r12 = gen_rtx_REG (Pmode, 12);
2237
2238      frame_insn (gen_rtx_SET (VOIDmode, r12, GEN_INT (offset)));
2239      XVECEXP (sibthunk_insn, 0, 0) = ret_rtx;
2240      XVECEXP (sibthunk_insn, 0, 1)
2241	= gen_rtx_SET (VOIDmode, stack_pointer_rtx,
2242		       gen_rtx_PLUS (Pmode, stack_pointer_rtx, r12));
2243      sibthunk_insn = emit_jump_insn (sibthunk_insn);
2244      RTX_FRAME_RELATED_P (sibthunk_insn) = 1;
2245    }
2246} /* arc_save_restore */
2247
2248
2249int arc_return_address_regs[4]
2250  = {0, RETURN_ADDR_REGNUM, ILINK1_REGNUM, ILINK2_REGNUM};
2251
2252/* Set up the stack and frame pointer (if desired) for the function.  */
2253
2254void
2255arc_expand_prologue (void)
2256{
2257  int size = get_frame_size ();
2258  unsigned int gmask = cfun->machine->frame_info.gmask;
2259  /*  unsigned int frame_pointer_offset;*/
2260  unsigned int frame_size_to_allocate;
2261  /* (FIXME: The first store will use a PRE_MODIFY; this will usually be r13.
2262     Change the stack layout so that we rather store a high register with the
2263     PRE_MODIFY, thus enabling more short insn generation.)  */
2264  int first_offset = 0;
2265
2266  size = ARC_STACK_ALIGN (size);
2267
2268  /* Compute/get total frame size.  */
2269  size = (!cfun->machine->frame_info.initialized
2270	   ? arc_compute_frame_size (size)
2271	   : cfun->machine->frame_info.total_size);
2272
2273  if (flag_stack_usage_info)
2274    current_function_static_stack_size = size;
2275
2276  /* Keep track of frame size to be allocated.  */
2277  frame_size_to_allocate = size;
2278
2279  /* These cases shouldn't happen.  Catch them now.  */
2280  gcc_assert (!(size == 0 && gmask));
2281
2282  /* Allocate space for register arguments if this is a variadic function.  */
2283  if (cfun->machine->frame_info.pretend_size != 0)
2284    {
2285       /* Ensure pretend_size is maximum of 8 * word_size.  */
2286      gcc_assert (cfun->machine->frame_info.pretend_size <= 32);
2287
2288      frame_stack_add (-(HOST_WIDE_INT)cfun->machine->frame_info.pretend_size);
2289      frame_size_to_allocate -= cfun->machine->frame_info.pretend_size;
2290    }
2291
2292  /* The home-grown ABI says link register is saved first.  */
2293  if (MUST_SAVE_RETURN_ADDR)
2294    {
2295      rtx ra = gen_rtx_REG (SImode, RETURN_ADDR_REGNUM);
2296      rtx mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx));
2297
2298      frame_move_inc (mem, ra, stack_pointer_rtx, 0);
2299      frame_size_to_allocate -= UNITS_PER_WORD;
2300
2301    } /* MUST_SAVE_RETURN_ADDR */
2302
2303  /* Save any needed call-saved regs (and call-used if this is an
2304     interrupt handler) for ARCompact ISA.  */
2305  if (cfun->machine->frame_info.reg_size)
2306    {
2307      first_offset = -cfun->machine->frame_info.reg_size;
2308      /* N.B. FRAME_POINTER_MASK and RETURN_ADDR_MASK are cleared in gmask.  */
2309      arc_save_restore (stack_pointer_rtx, gmask, 0, &first_offset);
2310      frame_size_to_allocate -= cfun->machine->frame_info.reg_size;
2311    }
2312
2313
2314  /* Save frame pointer if needed.  */
2315  if (frame_pointer_needed)
2316    {
2317      rtx addr = gen_rtx_PLUS (Pmode, stack_pointer_rtx,
2318			       GEN_INT (-UNITS_PER_WORD + first_offset));
2319      rtx mem = gen_frame_mem (Pmode, gen_rtx_PRE_MODIFY (Pmode,
2320							  stack_pointer_rtx,
2321							  addr));
2322      frame_move_inc (mem, frame_pointer_rtx, stack_pointer_rtx, 0);
2323      frame_size_to_allocate -= UNITS_PER_WORD;
2324      first_offset = 0;
2325      frame_move (frame_pointer_rtx, stack_pointer_rtx);
2326    }
2327
2328  /* ??? We don't handle the case where the saved regs are more than 252
2329     bytes away from sp.  This can be handled by decrementing sp once, saving
2330     the regs, and then decrementing it again.  The epilogue doesn't have this
2331     problem as the `ld' insn takes reg+limm values (though it would be more
2332     efficient to avoid reg+limm).  */
2333
2334  frame_size_to_allocate -= first_offset;
2335  /* Allocate the stack frame.  */
2336  if (frame_size_to_allocate > 0)
2337    frame_stack_add ((HOST_WIDE_INT) 0 - frame_size_to_allocate);
2338
2339  /* Setup the gp register, if needed.  */
2340  if (crtl->uses_pic_offset_table)
2341    arc_finalize_pic ();
2342}
2343
2344/* Do any necessary cleanup after a function to restore stack, frame,
2345   and regs.  */
2346
2347void
2348arc_expand_epilogue (int sibcall_p)
2349{
2350  int size = get_frame_size ();
2351  enum arc_function_type fn_type = arc_compute_function_type (cfun);
2352
2353  size = ARC_STACK_ALIGN (size);
2354  size = (!cfun->machine->frame_info.initialized
2355	   ? arc_compute_frame_size (size)
2356	   : cfun->machine->frame_info.total_size);
2357
2358  unsigned int pretend_size = cfun->machine->frame_info.pretend_size;
2359  unsigned int frame_size;
2360  unsigned int size_to_deallocate;
2361  int restored;
2362  int can_trust_sp_p = !cfun->calls_alloca;
2363  int first_offset = 0;
2364  int millicode_p = cfun->machine->frame_info.millicode_end_reg > 0;
2365
2366  size_to_deallocate = size;
2367
2368  frame_size = size - (pretend_size +
2369		       cfun->machine->frame_info.reg_size +
2370		       cfun->machine->frame_info.extra_size);
2371
2372  /* ??? There are lots of optimizations that can be done here.
2373     EG: Use fp to restore regs if it's closer.
2374     Maybe in time we'll do them all.  For now, always restore regs from
2375     sp, but don't restore sp if we don't have to.  */
2376
2377  if (!can_trust_sp_p)
2378    gcc_assert (frame_pointer_needed);
2379
2380  /* Restore stack pointer to the beginning of saved register area for
2381     ARCompact ISA.  */
2382  if (frame_size)
2383    {
2384      if (frame_pointer_needed)
2385	frame_move (stack_pointer_rtx, frame_pointer_rtx);
2386      else
2387	first_offset = frame_size;
2388      size_to_deallocate -= frame_size;
2389    }
2390  else if (!can_trust_sp_p)
2391    frame_stack_add (-frame_size);
2392
2393
2394  /* Restore any saved registers.  */
2395  if (frame_pointer_needed)
2396    {
2397	  rtx addr = gen_rtx_POST_INC (Pmode, stack_pointer_rtx);
2398
2399	  frame_move_inc (frame_pointer_rtx, gen_frame_mem (Pmode, addr),
2400			  stack_pointer_rtx, 0);
2401	  size_to_deallocate -= UNITS_PER_WORD;
2402    }
2403
2404  /* Load blink after the calls to thunk calls in case of optimize size.  */
2405  if (millicode_p)
2406    {
2407	  int sibthunk_p = (!sibcall_p
2408			    && fn_type == ARC_FUNCTION_NORMAL
2409			    && !cfun->machine->frame_info.pretend_size);
2410
2411	  gcc_assert (!(cfun->machine->frame_info.gmask
2412			& (FRAME_POINTER_MASK | RETURN_ADDR_MASK)));
2413	  arc_save_restore (stack_pointer_rtx,
2414			    cfun->machine->frame_info.gmask,
2415			    1 + sibthunk_p, &first_offset);
2416	  if (sibthunk_p)
2417	    goto epilogue_done;
2418    }
2419  /* If we are to restore registers, and first_offset would require
2420     a limm to be encoded in a PRE_MODIFY, yet we can add it with a
2421     fast add to the stack pointer, do this now.  */
2422  if ((!SMALL_INT (first_offset)
2423       && cfun->machine->frame_info.gmask
2424       && ((TARGET_ARC700 && !optimize_size)
2425	    ? first_offset <= 0x800
2426	    : satisfies_constraint_C2a (GEN_INT (first_offset))))
2427       /* Also do this if we have both gprs and return
2428	  address to restore, and they both would need a LIMM.  */
2429       || (MUST_SAVE_RETURN_ADDR
2430	   && !SMALL_INT ((cfun->machine->frame_info.reg_size + first_offset) >> 2)
2431	   && cfun->machine->frame_info.gmask))
2432    {
2433      frame_stack_add (first_offset);
2434      first_offset = 0;
2435    }
2436  if (MUST_SAVE_RETURN_ADDR)
2437    {
2438      rtx ra = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
2439      int ra_offs = cfun->machine->frame_info.reg_size + first_offset;
2440      rtx addr = plus_constant (Pmode, stack_pointer_rtx, ra_offs);
2441
2442      /* If the load of blink would need a LIMM, but we can add
2443	 the offset quickly to sp, do the latter.  */
2444      if (!SMALL_INT (ra_offs >> 2)
2445	  && !cfun->machine->frame_info.gmask
2446	  && ((TARGET_ARC700 && !optimize_size)
2447	       ? ra_offs <= 0x800
2448	       : satisfies_constraint_C2a (GEN_INT (ra_offs))))
2449	{
2450	   size_to_deallocate -= ra_offs - first_offset;
2451	   first_offset = 0;
2452	   frame_stack_add (ra_offs);
2453	   ra_offs = 0;
2454	   addr = stack_pointer_rtx;
2455	}
2456      /* See if we can combine the load of the return address with the
2457	 final stack adjustment.
2458	 We need a separate load if there are still registers to
2459	 restore.  We also want a separate load if the combined insn
2460	 would need a limm, but a separate load doesn't.  */
2461      if (ra_offs
2462	  && !cfun->machine->frame_info.gmask
2463	  && (SMALL_INT (ra_offs) || !SMALL_INT (ra_offs >> 2)))
2464	{
2465	  addr = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, addr);
2466	  first_offset = 0;
2467	  size_to_deallocate -= cfun->machine->frame_info.reg_size;
2468	}
2469      else if (!ra_offs && size_to_deallocate == UNITS_PER_WORD)
2470	{
2471	  addr = gen_rtx_POST_INC (Pmode, addr);
2472	  size_to_deallocate = 0;
2473	}
2474      frame_move_inc (ra, gen_frame_mem (Pmode, addr), stack_pointer_rtx, addr);
2475    }
2476
2477  if (!millicode_p)
2478    {
2479       if (cfun->machine->frame_info.reg_size)
2480	 arc_save_restore (stack_pointer_rtx,
2481	   /* The zeroing of these two bits is unnecessary, but leave this in for clarity.  */
2482			   cfun->machine->frame_info.gmask
2483			   & ~(FRAME_POINTER_MASK | RETURN_ADDR_MASK), 1, &first_offset);
2484    }
2485
2486
2487  /* The rest of this function does the following:
2488     ARCompact    : handle epilogue_delay, restore sp (phase-2), return
2489  */
2490
2491  /* Keep track of how much of the stack pointer we've restored.
2492     It makes the following a lot more readable.  */
2493  size_to_deallocate += first_offset;
2494  restored = size - size_to_deallocate;
2495
2496  if (size > restored)
2497    frame_stack_add (size - restored);
2498  /* Emit the return instruction.  */
2499  if (sibcall_p == FALSE)
2500    emit_jump_insn (gen_simple_return ());
2501 epilogue_done:
2502  if (!TARGET_EPILOGUE_CFI)
2503    {
2504      rtx_insn *insn;
2505
2506      for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2507	RTX_FRAME_RELATED_P (insn) = 0;
2508    }
2509}
2510
2511/* Return the offset relative to the stack pointer where the return address
2512   is stored, or -1 if it is not stored.  */
2513
2514int
2515arc_return_slot_offset ()
2516{
2517  struct arc_frame_info *afi = &cfun->machine->frame_info;
2518
2519  return (afi->save_return_addr
2520	  ? afi->total_size - afi->pretend_size - afi->extra_size : -1);
2521}
2522
2523/* PIC */
2524
2525/* Emit special PIC prologues and epilogues.  */
2526/* If the function has any GOTOFF relocations, then the GOTBASE
2527   register has to be setup in the prologue
2528   The instruction needed at the function start for setting up the
2529   GOTBASE register is
2530      add rdest, pc,
2531   ----------------------------------------------------------
2532   The rtl to be emitted for this should be:
2533     set (reg basereg)
2534         (plus (reg pc)
2535               (const (unspec (symref _DYNAMIC) 3)))
2536   ----------------------------------------------------------  */
2537
2538static void
2539arc_finalize_pic (void)
2540{
2541  rtx pat;
2542  rtx baseptr_rtx = gen_rtx_REG (Pmode, PIC_OFFSET_TABLE_REGNUM);
2543
2544  if (crtl->uses_pic_offset_table == 0)
2545    return;
2546
2547  gcc_assert (flag_pic != 0);
2548
2549  pat = gen_rtx_SYMBOL_REF (Pmode, "_DYNAMIC");
2550  pat = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pat), ARC_UNSPEC_GOT);
2551  pat = gen_rtx_CONST (Pmode, pat);
2552
2553  pat = gen_rtx_SET (VOIDmode, baseptr_rtx, pat);
2554
2555  emit_insn (pat);
2556}
2557
2558/* !TARGET_BARREL_SHIFTER support.  */
2559/* Emit a shift insn to set OP0 to OP1 shifted by OP2; CODE specifies what
2560   kind of shift.  */
2561
2562void
2563emit_shift (enum rtx_code code, rtx op0, rtx op1, rtx op2)
2564{
2565  rtx shift = gen_rtx_fmt_ee (code, SImode, op1, op2);
2566  rtx pat
2567    = ((shift4_operator (shift, SImode) ?  gen_shift_si3 : gen_shift_si3_loop)
2568	(op0, op1, op2, shift));
2569  emit_insn (pat);
2570}
2571
2572/* Output the assembler code for doing a shift.
2573   We go to a bit of trouble to generate efficient code as the ARC601 only has
2574   single bit shifts.  This is taken from the h8300 port.  We only have one
2575   mode of shifting and can't access individual bytes like the h8300 can, so
2576   this is greatly simplified (at the expense of not generating hyper-
2577   efficient code).
2578
2579   This function is not used if the variable shift insns are present.  */
2580
2581/* FIXME:  This probably can be done using a define_split in arc.md.
2582   Alternately, generate rtx rather than output instructions.  */
2583
2584const char *
2585output_shift (rtx *operands)
2586{
2587  /*  static int loopend_lab;*/
2588  rtx shift = operands[3];
2589  machine_mode mode = GET_MODE (shift);
2590  enum rtx_code code = GET_CODE (shift);
2591  const char *shift_one;
2592
2593  gcc_assert (mode == SImode);
2594
2595  switch (code)
2596    {
2597    case ASHIFT:   shift_one = "add %0,%1,%1"; break;
2598    case ASHIFTRT: shift_one = "asr %0,%1"; break;
2599    case LSHIFTRT: shift_one = "lsr %0,%1"; break;
2600    default:       gcc_unreachable ();
2601    }
2602
2603  if (GET_CODE (operands[2]) != CONST_INT)
2604    {
2605      output_asm_insn ("and.f lp_count,%2, 0x1f", operands);
2606      goto shiftloop;
2607    }
2608  else
2609    {
2610      int n;
2611
2612      n = INTVAL (operands[2]);
2613
2614      /* Only consider the lower 5 bits of the shift count.  */
2615      n = n & 0x1f;
2616
2617      /* First see if we can do them inline.  */
2618      /* ??? We could get better scheduling & shorter code (using short insns)
2619	 by using splitters.  Alas, that'd be even more verbose.  */
2620      if (code == ASHIFT && n <= 9 && n > 2
2621	  && dest_reg_operand (operands[4], SImode))
2622	{
2623	  output_asm_insn ("mov %4,0\n\tadd3 %0,%4,%1", operands);
2624	  for (n -=3 ; n >= 3; n -= 3)
2625	    output_asm_insn ("add3 %0,%4,%0", operands);
2626	  if (n == 2)
2627	    output_asm_insn ("add2 %0,%4,%0", operands);
2628	  else if (n)
2629	    output_asm_insn ("add %0,%0,%0", operands);
2630	}
2631      else if (n <= 4)
2632	{
2633	  while (--n >= 0)
2634	    {
2635	      output_asm_insn (shift_one, operands);
2636	      operands[1] = operands[0];
2637	    }
2638	}
2639      /* See if we can use a rotate/and.  */
2640      else if (n == BITS_PER_WORD - 1)
2641	{
2642	  switch (code)
2643	    {
2644	    case ASHIFT :
2645	      output_asm_insn ("and %0,%1,1\n\tror %0,%0", operands);
2646	      break;
2647	    case ASHIFTRT :
2648	      /* The ARC doesn't have a rol insn.  Use something else.  */
2649	      output_asm_insn ("add.f 0,%1,%1\n\tsbc %0,%0,%0", operands);
2650	      break;
2651	    case LSHIFTRT :
2652	      /* The ARC doesn't have a rol insn.  Use something else.  */
2653	      output_asm_insn ("add.f 0,%1,%1\n\trlc %0,0", operands);
2654	      break;
2655	    default:
2656	      break;
2657	    }
2658	}
2659      else if (n == BITS_PER_WORD - 2 && dest_reg_operand (operands[4], SImode))
2660	{
2661	  switch (code)
2662	    {
2663	    case ASHIFT :
2664	      output_asm_insn ("and %0,%1,3\n\tror %0,%0\n\tror %0,%0", operands);
2665	      break;
2666	    case ASHIFTRT :
2667#if 1 /* Need some scheduling comparisons.  */
2668	      output_asm_insn ("add.f %4,%1,%1\n\tsbc %0,%0,%0\n\t"
2669			       "add.f 0,%4,%4\n\trlc %0,%0", operands);
2670#else
2671	      output_asm_insn ("add.f %4,%1,%1\n\tbxor %0,%4,31\n\t"
2672			       "sbc.f %0,%0,%4\n\trlc %0,%0", operands);
2673#endif
2674	      break;
2675	    case LSHIFTRT :
2676#if 1
2677	      output_asm_insn ("add.f %4,%1,%1\n\trlc %0,0\n\t"
2678			       "add.f 0,%4,%4\n\trlc %0,%0", operands);
2679#else
2680	      output_asm_insn ("add.f %0,%1,%1\n\trlc.f %0,0\n\t"
2681			       "and %0,%0,1\n\trlc %0,%0", operands);
2682#endif
2683	      break;
2684	    default:
2685	      break;
2686	    }
2687	}
2688      else if (n == BITS_PER_WORD - 3 && code == ASHIFT)
2689	output_asm_insn ("and %0,%1,7\n\tror %0,%0\n\tror %0,%0\n\tror %0,%0",
2690			 operands);
2691      /* Must loop.  */
2692      else
2693	{
2694	  operands[2] = GEN_INT (n);
2695	  output_asm_insn ("mov.f lp_count, %2", operands);
2696
2697	shiftloop:
2698	    {
2699	      output_asm_insn ("lpnz\t2f", operands);
2700	      output_asm_insn (shift_one, operands);
2701	      output_asm_insn ("nop", operands);
2702	      fprintf (asm_out_file, "2:\t%s end single insn loop\n",
2703		       ASM_COMMENT_START);
2704	    }
2705	}
2706    }
2707
2708  return "";
2709}
2710
2711/* Nested function support.  */
2712
2713/* Directly store VALUE into memory object BLOCK at OFFSET.  */
2714
2715static void
2716emit_store_direct (rtx block, int offset, int value)
2717{
2718  emit_insn (gen_store_direct (adjust_address (block, SImode, offset),
2719			       force_reg (SImode,
2720					  gen_int_mode (value, SImode))));
2721}
2722
2723/* Emit RTL insns to initialize the variable parts of a trampoline.
2724   FNADDR is an RTX for the address of the function's pure code.
2725   CXT is an RTX for the static chain value for the function.  */
2726/* With potentially multiple shared objects loaded, and multiple stacks
2727   present for multiple thereds where trampolines might reside, a simple
2728   range check will likely not suffice for the profiler to tell if a callee
2729   is a trampoline.  We a speedier check by making the trampoline start at
2730   an address that is not 4-byte aligned.
2731   A trampoline looks like this:
2732
2733   nop_s	     0x78e0
2734entry:
2735   ld_s r12,[pcl,12] 0xd403
2736   ld   r11,[pcl,12] 0x170c 700b
2737   j_s [r12]         0x7c00
2738   nop_s	     0x78e0
2739
2740   The fastest trampoline to execute for trampolines within +-8KB of CTX
2741   would be:
2742   add2 r11,pcl,s12
2743   j [limm]           0x20200f80 limm
2744   and that would also be faster to write to the stack by computing the offset
2745   from CTX to TRAMP at compile time.  However, it would really be better to
2746   get rid of the high cost of cache invalidation when generating trampolines,
2747   which requires that the code part of trampolines stays constant, and
2748   additionally either
2749   - making sure that no executable code but trampolines is on the stack,
2750     no icache entries linger for the area of the stack from when before the
2751     stack was allocated, and allocating trampolines in trampoline-only
2752     cache lines
2753  or
2754   - allocate trampolines fram a special pool of pre-allocated trampolines.  */
2755
2756static void
2757arc_initialize_trampoline (rtx tramp, tree fndecl, rtx cxt)
2758{
2759  rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
2760
2761  emit_store_direct (tramp, 0, TARGET_BIG_ENDIAN ? 0x78e0d403 : 0xd40378e0);
2762  emit_store_direct (tramp, 4, TARGET_BIG_ENDIAN ? 0x170c700b : 0x700b170c);
2763  emit_store_direct (tramp, 8, TARGET_BIG_ENDIAN ? 0x7c0078e0 : 0x78e07c00);
2764  emit_move_insn (adjust_address (tramp, SImode, 12), fnaddr);
2765  emit_move_insn (adjust_address (tramp, SImode, 16), cxt);
2766  emit_insn (gen_flush_icache (adjust_address (tramp, SImode, 0)));
2767}
2768
2769/* Allow the profiler to easily distinguish trampolines from normal
2770  functions.  */
2771
2772static rtx
2773arc_trampoline_adjust_address (rtx addr)
2774{
2775  return plus_constant (Pmode, addr, 2);
2776}
2777
2778/* This is set briefly to 1 when we output a ".as" address modifer, and then
2779   reset when we output the scaled address.  */
2780static int output_scaled = 0;
2781
2782/* Print operand X (an rtx) in assembler syntax to file FILE.
2783   CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
2784   For `%' followed by punctuation, CODE is the punctuation and X is null.  */
2785/* In final.c:output_asm_insn:
2786    'l' : label
2787    'a' : address
2788    'c' : constant address if CONSTANT_ADDRESS_P
2789    'n' : negative
2790   Here:
2791    'Z': log2(x+1)-1
2792    'z': log2
2793    'M': log2(~x)
2794    '#': condbranch delay slot suffix
2795    '*': jump delay slot suffix
2796    '?' : nonjump-insn suffix for conditional execution or short instruction
2797    '!' : jump / call suffix for conditional execution or short instruction
2798    '`': fold constant inside unary o-perator, re-recognize, and emit.
2799    'd'
2800    'D'
2801    'R': Second word
2802    'S'
2803    'B': Branch comparison operand - suppress sda reference
2804    'H': Most significant word
2805    'L': Least significant word
2806    'A': ASCII decimal representation of floating point value
2807    'U': Load/store update or scaling indicator
2808    'V': cache bypass indicator for volatile
2809    'P'
2810    'F'
2811    '^'
2812    'O': Operator
2813    'o': original symbol - no @ prepending.  */
2814
2815void
2816arc_print_operand (FILE *file, rtx x, int code)
2817{
2818  switch (code)
2819    {
2820    case 'Z':
2821      if (GET_CODE (x) == CONST_INT)
2822	fprintf (file, "%d",exact_log2(INTVAL (x) + 1) - 1 );
2823      else
2824	output_operand_lossage ("invalid operand to %%Z code");
2825
2826      return;
2827
2828    case 'z':
2829      if (GET_CODE (x) == CONST_INT)
2830	fprintf (file, "%d",exact_log2(INTVAL (x)) );
2831      else
2832	output_operand_lossage ("invalid operand to %%z code");
2833
2834      return;
2835
2836    case 'M':
2837      if (GET_CODE (x) == CONST_INT)
2838	fprintf (file, "%d",exact_log2(~INTVAL (x)) );
2839      else
2840	output_operand_lossage ("invalid operand to %%M code");
2841
2842      return;
2843
2844    case '#' :
2845      /* Conditional branches depending on condition codes.
2846	 Note that this is only for branches that were known to depend on
2847	 condition codes before delay slot scheduling;
2848	 out-of-range brcc / bbit expansions should use '*'.
2849	 This distinction is important because of the different
2850	 allowable delay slot insns and the output of the delay suffix
2851	 for TARGET_AT_DBR_COND_EXEC.  */
2852    case '*' :
2853      /* Unconditional branches / branches not depending on condition codes.
2854	 This could also be a CALL_INSN.
2855	 Output the appropriate delay slot suffix.  */
2856      if (final_sequence && final_sequence->len () != 1)
2857	{
2858	  rtx_insn *jump = final_sequence->insn (0);
2859	  rtx_insn *delay = final_sequence->insn (1);
2860
2861	  /* For TARGET_PAD_RETURN we might have grabbed the delay insn.  */
2862	  if (delay->deleted ())
2863	    return;
2864	  if (JUMP_P (jump) && INSN_ANNULLED_BRANCH_P (jump))
2865	    fputs (INSN_FROM_TARGET_P (delay) ? ".d"
2866		   : TARGET_AT_DBR_CONDEXEC && code == '#' ? ".d"
2867		   : get_attr_type (jump) == TYPE_RETURN && code == '#' ? ""
2868		   : ".nd",
2869		   file);
2870	  else
2871	    fputs (".d", file);
2872	}
2873      return;
2874    case '?' : /* with leading "." */
2875    case '!' : /* without leading "." */
2876      /* This insn can be conditionally executed.  See if the ccfsm machinery
2877	 says it should be conditionalized.
2878	 If it shouldn't, we'll check the compact attribute if this insn
2879	 has a short variant, which may be used depending on code size and
2880	 alignment considerations.  */
2881      if (current_insn_predicate)
2882	arc_ccfsm_current.cc
2883	  = get_arc_condition_code (current_insn_predicate);
2884      if (ARC_CCFSM_COND_EXEC_P (&arc_ccfsm_current))
2885	{
2886	  /* Is this insn in a delay slot sequence?  */
2887	  if (!final_sequence || XVECLEN (final_sequence, 0) < 2
2888	      || current_insn_predicate
2889	      || CALL_P (final_sequence->insn (0))
2890	      || simplejump_p (final_sequence->insn (0)))
2891	    {
2892	      /* This insn isn't in a delay slot sequence, or conditionalized
2893		 independently of its position in a delay slot.  */
2894	      fprintf (file, "%s%s",
2895		       code == '?' ? "." : "",
2896		       arc_condition_codes[arc_ccfsm_current.cc]);
2897	      /* If this is a jump, there are still short variants.  However,
2898		 only beq_s / bne_s have the same offset range as b_s,
2899		 and the only short conditional returns are jeq_s and jne_s.  */
2900	      if (code == '!'
2901		  && (arc_ccfsm_current.cc == ARC_CC_EQ
2902		      || arc_ccfsm_current.cc == ARC_CC_NE
2903		      || 0 /* FIXME: check if branch in 7 bit range.  */))
2904		output_short_suffix (file);
2905	    }
2906	  else if (code == '!') /* Jump with delay slot.  */
2907	    fputs (arc_condition_codes[arc_ccfsm_current.cc], file);
2908	  else /* An Instruction in a delay slot of a jump or call.  */
2909	    {
2910	      rtx jump = XVECEXP (final_sequence, 0, 0);
2911	      rtx insn = XVECEXP (final_sequence, 0, 1);
2912
2913	      /* If the insn is annulled and is from the target path, we need
2914		 to inverse the condition test.  */
2915	      if (JUMP_P (jump) && INSN_ANNULLED_BRANCH_P (jump))
2916		{
2917		  if (INSN_FROM_TARGET_P (insn))
2918		    fprintf (file, "%s%s",
2919			     code == '?' ? "." : "",
2920			     arc_condition_codes[ARC_INVERSE_CONDITION_CODE (arc_ccfsm_current.cc)]);
2921		  else
2922		    fprintf (file, "%s%s",
2923			     code == '?' ? "." : "",
2924			     arc_condition_codes[arc_ccfsm_current.cc]);
2925		  if (arc_ccfsm_current.state == 5)
2926		    arc_ccfsm_current.state = 0;
2927		}
2928	      else
2929		/* This insn is executed for either path, so don't
2930		   conditionalize it at all.  */
2931		output_short_suffix (file);
2932
2933	    }
2934	}
2935      else
2936	output_short_suffix (file);
2937      return;
2938    case'`':
2939      /* FIXME: fold constant inside unary operator, re-recognize, and emit.  */
2940      gcc_unreachable ();
2941    case 'd' :
2942      fputs (arc_condition_codes[get_arc_condition_code (x)], file);
2943      return;
2944    case 'D' :
2945      fputs (arc_condition_codes[ARC_INVERSE_CONDITION_CODE
2946				 (get_arc_condition_code (x))],
2947	     file);
2948      return;
2949    case 'R' :
2950      /* Write second word of DImode or DFmode reference,
2951	 register or memory.  */
2952      if (GET_CODE (x) == REG)
2953	fputs (reg_names[REGNO (x)+1], file);
2954      else if (GET_CODE (x) == MEM)
2955	{
2956	  fputc ('[', file);
2957
2958	  /* Handle possible auto-increment.  For PRE_INC / PRE_DEC /
2959	    PRE_MODIFY, we will have handled the first word already;
2960	    For POST_INC / POST_DEC / POST_MODIFY, the access to the
2961	    first word will be done later.  In either case, the access
2962	    to the first word will do the modify, and we only have
2963	    to add an offset of four here.  */
2964	  if (GET_CODE (XEXP (x, 0)) == PRE_INC
2965	      || GET_CODE (XEXP (x, 0)) == PRE_DEC
2966	      || GET_CODE (XEXP (x, 0)) == PRE_MODIFY
2967	      || GET_CODE (XEXP (x, 0)) == POST_INC
2968	      || GET_CODE (XEXP (x, 0)) == POST_DEC
2969	      || GET_CODE (XEXP (x, 0)) == POST_MODIFY)
2970	    output_address (plus_constant (Pmode, XEXP (XEXP (x, 0), 0), 4));
2971	  else if (output_scaled)
2972	    {
2973	      rtx addr = XEXP (x, 0);
2974	      int size = GET_MODE_SIZE (GET_MODE (x));
2975
2976	      output_address (plus_constant (Pmode, XEXP (addr, 0),
2977					     ((INTVAL (XEXP (addr, 1)) + 4)
2978					      >> (size == 2 ? 1 : 2))));
2979	      output_scaled = 0;
2980	    }
2981	  else
2982	    output_address (plus_constant (Pmode, XEXP (x, 0), 4));
2983	  fputc (']', file);
2984	}
2985      else
2986	output_operand_lossage ("invalid operand to %%R code");
2987      return;
2988    case 'S' :
2989	/* FIXME: remove %S option.  */
2990	break;
2991    case 'B' /* Branch or other LIMM ref - must not use sda references.  */ :
2992      if (CONSTANT_P (x))
2993	{
2994	  output_addr_const (file, x);
2995	  return;
2996	}
2997      break;
2998    case 'H' :
2999    case 'L' :
3000      if (GET_CODE (x) == REG)
3001	{
3002	  /* L = least significant word, H = most significant word.  */
3003	  if ((WORDS_BIG_ENDIAN != 0) ^ (code == 'L'))
3004	    fputs (reg_names[REGNO (x)], file);
3005	  else
3006	    fputs (reg_names[REGNO (x)+1], file);
3007	}
3008      else if (GET_CODE (x) == CONST_INT
3009	       || GET_CODE (x) == CONST_DOUBLE)
3010	{
3011	  rtx first, second;
3012
3013	  split_double (x, &first, &second);
3014
3015	  if((WORDS_BIG_ENDIAN) == 0)
3016	      fprintf (file, "0x%08" PRIx64,
3017		       code == 'L' ? INTVAL (first) : INTVAL (second));
3018	  else
3019	      fprintf (file, "0x%08" PRIx64,
3020		       code == 'L' ? INTVAL (second) : INTVAL (first));
3021
3022
3023	  }
3024      else
3025	output_operand_lossage ("invalid operand to %%H/%%L code");
3026      return;
3027    case 'A' :
3028      {
3029	char str[30];
3030
3031	gcc_assert (GET_CODE (x) == CONST_DOUBLE
3032		    && GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT);
3033
3034	real_to_decimal (str, CONST_DOUBLE_REAL_VALUE (x), sizeof (str), 0, 1);
3035	fprintf (file, "%s", str);
3036	return;
3037      }
3038    case 'U' :
3039      /* Output a load/store with update indicator if appropriate.  */
3040      if (GET_CODE (x) == MEM)
3041	{
3042	  rtx addr = XEXP (x, 0);
3043	  switch (GET_CODE (addr))
3044	    {
3045	    case PRE_INC: case PRE_DEC: case PRE_MODIFY:
3046	      fputs (".a", file); break;
3047	    case POST_INC: case POST_DEC: case POST_MODIFY:
3048	      fputs (".ab", file); break;
3049	    case PLUS:
3050	      /* Are we using a scaled index?  */
3051	      if (GET_CODE (XEXP (addr, 0)) == MULT)
3052		fputs (".as", file);
3053	      /* Can we use a scaled offset?  */
3054	      else if (CONST_INT_P (XEXP (addr, 1))
3055		       && GET_MODE_SIZE (GET_MODE (x)) > 1
3056		       && (!(INTVAL (XEXP (addr, 1))
3057			     & (GET_MODE_SIZE (GET_MODE (x)) - 1) & 3))
3058		       /* Does it make a difference?  */
3059		       && !SMALL_INT_RANGE(INTVAL (XEXP (addr, 1)),
3060					   GET_MODE_SIZE (GET_MODE (x)) - 2, 0))
3061		{
3062		  fputs (".as", file);
3063		  output_scaled = 1;
3064		}
3065	      break;
3066	    case REG:
3067	      break;
3068	    default:
3069	      gcc_assert (CONSTANT_P (addr)); break;
3070	    }
3071	}
3072      else
3073	output_operand_lossage ("invalid operand to %%U code");
3074      return;
3075    case 'V' :
3076      /* Output cache bypass indicator for a load/store insn.  Volatile memory
3077	 refs are defined to use the cache bypass mechanism.  */
3078      if (GET_CODE (x) == MEM)
3079	{
3080	  if (MEM_VOLATILE_P (x) && !TARGET_VOLATILE_CACHE_SET )
3081	    fputs (".di", file);
3082	}
3083      else
3084	output_operand_lossage ("invalid operand to %%V code");
3085      return;
3086      /* plt code.  */
3087    case 'P':
3088    case 0 :
3089      /* Do nothing special.  */
3090      break;
3091    case 'F':
3092      fputs (reg_names[REGNO (x)]+1, file);
3093      return;
3094    case '^':
3095	/* This punctuation character is needed because label references are
3096	printed in the output template using %l. This is a front end
3097	character, and when we want to emit a '@' before it, we have to use
3098	this '^'.  */
3099
3100	fputc('@',file);
3101	return;
3102    case 'O':
3103      /* Output an operator.  */
3104      switch (GET_CODE (x))
3105	{
3106	case PLUS:	fputs ("add", file); return;
3107	case SS_PLUS:	fputs ("adds", file); return;
3108	case AND:	fputs ("and", file); return;
3109	case IOR:	fputs ("or", file); return;
3110	case XOR:	fputs ("xor", file); return;
3111	case MINUS:	fputs ("sub", file); return;
3112	case SS_MINUS:	fputs ("subs", file); return;
3113	case ASHIFT:	fputs ("asl", file); return;
3114	case ASHIFTRT:	fputs ("asr", file); return;
3115	case LSHIFTRT:	fputs ("lsr", file); return;
3116	case ROTATERT:	fputs ("ror", file); return;
3117	case MULT:	fputs ("mpy", file); return;
3118	case ABS:	fputs ("abs", file); return; /* Unconditional.  */
3119	case NEG:	fputs ("neg", file); return;
3120	case SS_NEG:	fputs ("negs", file); return;
3121	case NOT:	fputs ("not", file); return; /* Unconditional.  */
3122	case ZERO_EXTEND:
3123	  fputs ("ext", file); /* bmsk allows predication.  */
3124	  goto size_suffix;
3125	case SIGN_EXTEND: /* Unconditional.  */
3126	  fputs ("sex", file);
3127	size_suffix:
3128	  switch (GET_MODE (XEXP (x, 0)))
3129	    {
3130	    case QImode: fputs ("b", file); return;
3131	    case HImode: fputs ("w", file); return;
3132	    default: break;
3133	    }
3134	  break;
3135	case SS_TRUNCATE:
3136	  if (GET_MODE (x) != HImode)
3137	    break;
3138	  fputs ("sat16", file);
3139	default: break;
3140	}
3141      output_operand_lossage ("invalid operand to %%O code"); return;
3142    case 'o':
3143      if (GET_CODE (x) == SYMBOL_REF)
3144	{
3145	  assemble_name (file, XSTR (x, 0));
3146	  return;
3147	}
3148      break;
3149    case '&':
3150      if (TARGET_ANNOTATE_ALIGN && cfun->machine->size_reason)
3151	fprintf (file, "; unalign: %d", cfun->machine->unalign);
3152      return;
3153    default :
3154      /* Unknown flag.  */
3155      output_operand_lossage ("invalid operand output code");
3156    }
3157
3158  switch (GET_CODE (x))
3159    {
3160    case REG :
3161      fputs (reg_names[REGNO (x)], file);
3162      break;
3163    case MEM :
3164      {
3165	rtx addr = XEXP (x, 0);
3166	int size = GET_MODE_SIZE (GET_MODE (x));
3167
3168	fputc ('[', file);
3169
3170	switch (GET_CODE (addr))
3171	  {
3172	  case PRE_INC: case POST_INC:
3173	    output_address (plus_constant (Pmode, XEXP (addr, 0), size)); break;
3174	  case PRE_DEC: case POST_DEC:
3175	    output_address (plus_constant (Pmode, XEXP (addr, 0), -size));
3176	    break;
3177	  case PRE_MODIFY: case POST_MODIFY:
3178	    output_address (XEXP (addr, 1)); break;
3179	  case PLUS:
3180	    if (output_scaled)
3181	      {
3182		output_address (plus_constant (Pmode, XEXP (addr, 0),
3183					       (INTVAL (XEXP (addr, 1))
3184						>> (size == 2 ? 1 : 2))));
3185		output_scaled = 0;
3186	      }
3187	    else
3188	      output_address (addr);
3189	    break;
3190	  default:
3191	    if (flag_pic && CONSTANT_ADDRESS_P (addr))
3192	      arc_output_pic_addr_const (file, addr, code);
3193	    else
3194	      output_address (addr);
3195	    break;
3196	  }
3197	fputc (']', file);
3198	break;
3199      }
3200    case CONST_DOUBLE :
3201      /* We handle SFmode constants here as output_addr_const doesn't.  */
3202      if (GET_MODE (x) == SFmode)
3203	{
3204	  REAL_VALUE_TYPE d;
3205	  long l;
3206
3207	  REAL_VALUE_FROM_CONST_DOUBLE (d, x);
3208	  REAL_VALUE_TO_TARGET_SINGLE (d, l);
3209	  fprintf (file, "0x%08lx", l);
3210	  break;
3211	}
3212      /* Fall through.  Let output_addr_const deal with it.  */
3213    default :
3214      if (flag_pic)
3215	arc_output_pic_addr_const (file, x, code);
3216      else
3217	{
3218	  /* FIXME: Dirty way to handle @var@sda+const. Shd be handled
3219	     with asm_output_symbol_ref */
3220	  if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
3221	    {
3222	      x = XEXP (x, 0);
3223	      output_addr_const (file, XEXP (x, 0));
3224	      if (GET_CODE (XEXP (x, 0)) == SYMBOL_REF && SYMBOL_REF_SMALL_P (XEXP (x, 0)))
3225		fprintf (file, "@sda");
3226
3227	      if (GET_CODE (XEXP (x, 1)) != CONST_INT
3228		  || INTVAL (XEXP (x, 1)) >= 0)
3229		fprintf (file, "+");
3230	      output_addr_const (file, XEXP (x, 1));
3231	    }
3232	  else
3233	    output_addr_const (file, x);
3234	}
3235      if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_SMALL_P (x))
3236	fprintf (file, "@sda");
3237      break;
3238    }
3239}
3240
3241/* Print a memory address as an operand to reference that memory location.  */
3242
3243void
3244arc_print_operand_address (FILE *file , rtx addr)
3245{
3246  register rtx base, index = 0;
3247
3248  switch (GET_CODE (addr))
3249    {
3250    case REG :
3251      fputs (reg_names[REGNO (addr)], file);
3252      break;
3253    case SYMBOL_REF :
3254      output_addr_const (file, addr);
3255      if (SYMBOL_REF_SMALL_P (addr))
3256	fprintf (file, "@sda");
3257      break;
3258    case PLUS :
3259      if (GET_CODE (XEXP (addr, 0)) == MULT)
3260	index = XEXP (XEXP (addr, 0), 0), base = XEXP (addr, 1);
3261      else if (CONST_INT_P (XEXP (addr, 0)))
3262	index = XEXP (addr, 0), base = XEXP (addr, 1);
3263      else
3264	base = XEXP (addr, 0), index = XEXP (addr, 1);
3265
3266      gcc_assert (OBJECT_P (base));
3267      arc_print_operand_address (file, base);
3268      if (CONSTANT_P (base) && CONST_INT_P (index))
3269	fputc ('+', file);
3270      else
3271	fputc (',', file);
3272      gcc_assert (OBJECT_P (index));
3273      arc_print_operand_address (file, index);
3274      break;
3275    case CONST:
3276      {
3277	rtx c = XEXP (addr, 0);
3278
3279	gcc_assert (GET_CODE (XEXP (c, 0)) == SYMBOL_REF);
3280	gcc_assert (GET_CODE (XEXP (c, 1)) == CONST_INT);
3281
3282	output_address(XEXP(addr,0));
3283
3284	break;
3285      }
3286    case PRE_INC :
3287    case PRE_DEC :
3288      /* We shouldn't get here as we've lost the mode of the memory object
3289	 (which says how much to inc/dec by.  */
3290      gcc_unreachable ();
3291      break;
3292    default :
3293      if (flag_pic)
3294	arc_output_pic_addr_const (file, addr, 0);
3295      else
3296	output_addr_const (file, addr);
3297      break;
3298    }
3299}
3300
3301/* Called via walk_stores.  DATA points to a hash table we can use to
3302   establish a unique SYMBOL_REF for each counter, which corresponds to
3303   a caller-callee pair.
3304   X is a store which we want to examine for an UNSPEC_PROF, which
3305   would be an address loaded into a register, or directly used in a MEM.
3306   If we found an UNSPEC_PROF, if we encounter a new counter the first time,
3307   write out a description and a data allocation for a 32 bit counter.
3308   Also, fill in the appropriate symbol_ref into each UNSPEC_PROF instance.  */
3309
3310static void
3311write_profile_sections (rtx dest ATTRIBUTE_UNUSED, rtx x, void *data)
3312{
3313  rtx *srcp, src;
3314  htab_t htab = (htab_t) data;
3315  rtx *slot;
3316
3317  if (GET_CODE (x) != SET)
3318    return;
3319  srcp = &SET_SRC (x);
3320  if (MEM_P (*srcp))
3321    srcp = &XEXP (*srcp, 0);
3322  else if (MEM_P (SET_DEST (x)))
3323    srcp = &XEXP (SET_DEST (x), 0);
3324  src = *srcp;
3325  if (GET_CODE (src) != CONST)
3326    return;
3327  src = XEXP (src, 0);
3328  if (GET_CODE (src) != UNSPEC || XINT (src, 1) != UNSPEC_PROF)
3329    return;
3330
3331  gcc_assert (XVECLEN (src, 0) == 3);
3332  if (!htab_elements (htab))
3333    {
3334      output_asm_insn (".section .__arc_profile_desc, \"a\"\n"
3335		       "\t.long %0 + 1\n",
3336		       &XVECEXP (src, 0, 0));
3337    }
3338  slot = (rtx *) htab_find_slot (htab, src, INSERT);
3339  if (*slot == HTAB_EMPTY_ENTRY)
3340    {
3341      static int count_nr;
3342      char buf[24];
3343      rtx count;
3344
3345      *slot = src;
3346      sprintf (buf, "__prof_count%d", count_nr++);
3347      count = gen_rtx_SYMBOL_REF (Pmode, xstrdup (buf));
3348      XVECEXP (src, 0, 2) = count;
3349      output_asm_insn (".section\t.__arc_profile_desc, \"a\"\n"
3350		       "\t.long\t%1\n"
3351		       "\t.section\t.__arc_profile_counters, \"aw\"\n"
3352		       "\t.type\t%o2, @object\n"
3353		       "\t.size\t%o2, 4\n"
3354		       "%o2:\t.zero 4",
3355		       &XVECEXP (src, 0, 0));
3356      *srcp = count;
3357    }
3358  else
3359    *srcp = XVECEXP (*slot, 0, 2);
3360}
3361
3362/* Hash function for UNSPEC_PROF htab.  Use both the caller's name and
3363   the callee's name (if known).  */
3364
3365static hashval_t
3366unspec_prof_hash (const void *x)
3367{
3368  const_rtx u = (const_rtx) x;
3369  const_rtx s1 = XVECEXP (u, 0, 1);
3370
3371  return (htab_hash_string (XSTR (XVECEXP (u, 0, 0), 0))
3372	  ^ (s1->code == SYMBOL_REF ? htab_hash_string (XSTR (s1, 0)) : 0));
3373}
3374
3375/* Equality function for UNSPEC_PROF htab.  Two pieces of UNSPEC_PROF rtl
3376   shall refer to the same counter if both caller name and callee rtl
3377   are identical.  */
3378
3379static int
3380unspec_prof_htab_eq (const void *x, const void *y)
3381{
3382  const_rtx u0 = (const_rtx) x;
3383  const_rtx u1 = (const_rtx) y;
3384  const_rtx s01 = XVECEXP (u0, 0, 1);
3385  const_rtx s11 = XVECEXP (u1, 0, 1);
3386
3387  return (!strcmp (XSTR (XVECEXP (u0, 0, 0), 0),
3388		   XSTR (XVECEXP (u1, 0, 0), 0))
3389	  && rtx_equal_p (s01, s11));
3390}
3391
3392/* Conditional execution support.
3393
3394   This is based on the ARM port but for now is much simpler.
3395
3396   A finite state machine takes care of noticing whether or not instructions
3397   can be conditionally executed, and thus decrease execution time and code
3398   size by deleting branch instructions.  The fsm is controlled by
3399   arc_ccfsm_advance (called by arc_final_prescan_insn), and controls the
3400   actions of PRINT_OPERAND.  The patterns in the .md file for the branch
3401   insns also have a hand in this.  */
3402/* The way we leave dealing with non-anulled or annull-false delay slot
3403   insns to the consumer is awkward.  */
3404
3405/* The state of the fsm controlling condition codes are:
3406   0: normal, do nothing special
3407   1: don't output this insn
3408   2: don't output this insn
3409   3: make insns conditional
3410   4: make insns conditional
3411   5: make insn conditional (only for outputting anulled delay slot insns)
3412
3413   special value for cfun->machine->uid_ccfsm_state:
3414   6: return with but one insn before it since function start / call
3415
3416   State transitions (state->state by whom, under what condition):
3417   0 -> 1 arc_ccfsm_advance, if insn is a conditional branch skipping over
3418          some instructions.
3419   0 -> 2 arc_ccfsm_advance, if insn is a conditional branch followed
3420          by zero or more non-jump insns and an unconditional branch with
3421	  the same target label as the condbranch.
3422   1 -> 3 branch patterns, after having not output the conditional branch
3423   2 -> 4 branch patterns, after having not output the conditional branch
3424   0 -> 5 branch patterns, for anulled delay slot insn.
3425   3 -> 0 ASM_OUTPUT_INTERNAL_LABEL, if the `target' label is reached
3426          (the target label has CODE_LABEL_NUMBER equal to
3427	  arc_ccfsm_target_label).
3428   4 -> 0 arc_ccfsm_advance, if `target' unconditional branch is reached
3429   3 -> 1 arc_ccfsm_advance, finding an 'else' jump skipping over some insns.
3430   5 -> 0 when outputting the delay slot insn
3431
3432   If the jump clobbers the conditions then we use states 2 and 4.
3433
3434   A similar thing can be done with conditional return insns.
3435
3436   We also handle separating branches from sets of the condition code.
3437   This is done here because knowledge of the ccfsm state is required,
3438   we may not be outputting the branch.  */
3439
3440/* arc_final_prescan_insn calls arc_ccfsm_advance to adjust arc_ccfsm_current,
3441   before letting final output INSN.  */
3442
3443static void
3444arc_ccfsm_advance (rtx_insn *insn, struct arc_ccfsm *state)
3445{
3446  /* BODY will hold the body of INSN.  */
3447  register rtx body;
3448
3449  /* This will be 1 if trying to repeat the trick (ie: do the `else' part of
3450     an if/then/else), and things need to be reversed.  */
3451  int reverse = 0;
3452
3453  /* If we start with a return insn, we only succeed if we find another one.  */
3454  int seeking_return = 0;
3455
3456  /* START_INSN will hold the insn from where we start looking.  This is the
3457     first insn after the following code_label if REVERSE is true.  */
3458  rtx_insn *start_insn = insn;
3459
3460  /* Type of the jump_insn. Brcc insns don't affect ccfsm changes,
3461     since they don't rely on a cmp preceding the.  */
3462  enum attr_type jump_insn_type;
3463
3464  /* Allow -mdebug-ccfsm to turn this off so we can see how well it does.
3465     We can't do this in macro FINAL_PRESCAN_INSN because its called from
3466     final_scan_insn which has `optimize' as a local.  */
3467  if (optimize < 2 || TARGET_NO_COND_EXEC)
3468    return;
3469
3470  /* Ignore notes and labels.  */
3471  if (!INSN_P (insn))
3472    return;
3473  body = PATTERN (insn);
3474  /* If in state 4, check if the target branch is reached, in order to
3475     change back to state 0.  */
3476  if (state->state == 4)
3477    {
3478      if (insn == state->target_insn)
3479	{
3480	  state->target_insn = NULL;
3481	  state->state = 0;
3482	}
3483      return;
3484    }
3485
3486  /* If in state 3, it is possible to repeat the trick, if this insn is an
3487     unconditional branch to a label, and immediately following this branch
3488     is the previous target label which is only used once, and the label this
3489     branch jumps to is not too far off.  Or in other words "we've done the
3490     `then' part, see if we can do the `else' part."  */
3491  if (state->state == 3)
3492    {
3493      if (simplejump_p (insn))
3494	{
3495	  start_insn = next_nonnote_insn (start_insn);
3496	  if (GET_CODE (start_insn) == BARRIER)
3497	    {
3498	      /* ??? Isn't this always a barrier?  */
3499	      start_insn = next_nonnote_insn (start_insn);
3500	    }
3501	  if (GET_CODE (start_insn) == CODE_LABEL
3502	      && CODE_LABEL_NUMBER (start_insn) == state->target_label
3503	      && LABEL_NUSES (start_insn) == 1)
3504	    reverse = TRUE;
3505	  else
3506	    return;
3507	}
3508      else if (GET_CODE (body) == SIMPLE_RETURN)
3509	{
3510	  start_insn = next_nonnote_insn (start_insn);
3511	  if (GET_CODE (start_insn) == BARRIER)
3512	    start_insn = next_nonnote_insn (start_insn);
3513	  if (GET_CODE (start_insn) == CODE_LABEL
3514	      && CODE_LABEL_NUMBER (start_insn) == state->target_label
3515	      && LABEL_NUSES (start_insn) == 1)
3516	    {
3517	      reverse = TRUE;
3518	      seeking_return = 1;
3519	    }
3520	  else
3521	    return;
3522	}
3523      else
3524	return;
3525    }
3526
3527  if (GET_CODE (insn) != JUMP_INSN
3528      || GET_CODE (PATTERN (insn)) == ADDR_VEC
3529      || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC)
3530    return;
3531
3532 /* We can't predicate BRCC or loop ends.
3533    Also, when generating PIC code, and considering a medium range call,
3534    we can't predicate the call.  */
3535  jump_insn_type = get_attr_type (insn);
3536  if (jump_insn_type == TYPE_BRCC
3537      || jump_insn_type == TYPE_BRCC_NO_DELAY_SLOT
3538      || jump_insn_type == TYPE_LOOP_END
3539      || (jump_insn_type == TYPE_CALL && !get_attr_predicable (insn)))
3540    return;
3541
3542  /* This jump might be paralleled with a clobber of the condition codes,
3543     the jump should always come first.  */
3544  if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
3545    body = XVECEXP (body, 0, 0);
3546
3547  if (reverse
3548      || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
3549	  && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
3550    {
3551      int insns_skipped = 0, fail = FALSE, succeed = FALSE;
3552      /* Flag which part of the IF_THEN_ELSE is the LABEL_REF.  */
3553      int then_not_else = TRUE;
3554      /* Nonzero if next insn must be the target label.  */
3555      int next_must_be_target_label_p;
3556      rtx_insn *this_insn = start_insn;
3557      rtx label = 0;
3558
3559      /* Register the insn jumped to.  */
3560      if (reverse)
3561	{
3562	  if (!seeking_return)
3563	    label = XEXP (SET_SRC (body), 0);
3564	}
3565      else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
3566	label = XEXP (XEXP (SET_SRC (body), 1), 0);
3567      else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
3568	{
3569	  label = XEXP (XEXP (SET_SRC (body), 2), 0);
3570	  then_not_else = FALSE;
3571	}
3572      else if (GET_CODE (XEXP (SET_SRC (body), 1)) == SIMPLE_RETURN)
3573	seeking_return = 1;
3574      else if (GET_CODE (XEXP (SET_SRC (body), 2)) == SIMPLE_RETURN)
3575	{
3576	  seeking_return = 1;
3577	  then_not_else = FALSE;
3578	}
3579      else
3580	gcc_unreachable ();
3581
3582      /* If this is a non-annulled branch with a delay slot, there is
3583	 no need to conditionalize the delay slot.  */
3584      if (NEXT_INSN (PREV_INSN (insn)) != insn
3585	  && state->state == 0 && !INSN_ANNULLED_BRANCH_P (insn))
3586	{
3587	  this_insn = NEXT_INSN (this_insn);
3588	  gcc_assert (NEXT_INSN (NEXT_INSN (PREV_INSN (start_insn)))
3589		      == NEXT_INSN (this_insn));
3590	}
3591      /* See how many insns this branch skips, and what kind of insns.  If all
3592	 insns are okay, and the label or unconditional branch to the same
3593	 label is not too far away, succeed.  */
3594      for (insns_skipped = 0, next_must_be_target_label_p = FALSE;
3595	   !fail && !succeed && insns_skipped < MAX_INSNS_SKIPPED;
3596	   insns_skipped++)
3597	{
3598	  rtx scanbody;
3599
3600	  this_insn = next_nonnote_insn (this_insn);
3601	  if (!this_insn)
3602	    break;
3603
3604	  if (next_must_be_target_label_p)
3605	    {
3606	      if (GET_CODE (this_insn) == BARRIER)
3607		continue;
3608	      if (GET_CODE (this_insn) == CODE_LABEL
3609		  && this_insn == label)
3610		{
3611		  state->state = 1;
3612		  succeed = TRUE;
3613		}
3614	      else
3615		fail = TRUE;
3616	      break;
3617	    }
3618
3619	  scanbody = PATTERN (this_insn);
3620
3621	  switch (GET_CODE (this_insn))
3622	    {
3623	    case CODE_LABEL:
3624	      /* Succeed if it is the target label, otherwise fail since
3625		 control falls in from somewhere else.  */
3626	      if (this_insn == label)
3627		{
3628		  state->state = 1;
3629		  succeed = TRUE;
3630		}
3631	      else
3632		fail = TRUE;
3633	      break;
3634
3635	    case BARRIER:
3636	      /* Succeed if the following insn is the target label.
3637		 Otherwise fail.
3638		 If return insns are used then the last insn in a function
3639		 will be a barrier.  */
3640	      next_must_be_target_label_p = TRUE;
3641	      break;
3642
3643	    case CALL_INSN:
3644	      /* Can handle a call insn if there are no insns after it.
3645		 IE: The next "insn" is the target label.  We don't have to
3646		 worry about delay slots as such insns are SEQUENCE's inside
3647		 INSN's.  ??? It is possible to handle such insns though.  */
3648	      if (get_attr_cond (this_insn) == COND_CANUSE)
3649		next_must_be_target_label_p = TRUE;
3650	      else
3651		fail = TRUE;
3652	      break;
3653
3654	    case JUMP_INSN:
3655	      /* If this is an unconditional branch to the same label, succeed.
3656		 If it is to another label, do nothing.  If it is conditional,
3657		 fail.  */
3658	      /* ??? Probably, the test for the SET and the PC are
3659		 unnecessary.  */
3660
3661	      if (GET_CODE (scanbody) == SET
3662		  && GET_CODE (SET_DEST (scanbody)) == PC)
3663		{
3664		  if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
3665		      && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
3666		    {
3667		      state->state = 2;
3668		      succeed = TRUE;
3669		    }
3670		  else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
3671		    fail = TRUE;
3672		  else if (get_attr_cond (this_insn) != COND_CANUSE)
3673		    fail = TRUE;
3674		}
3675	      else if (GET_CODE (scanbody) == SIMPLE_RETURN
3676		       && seeking_return)
3677		{
3678		  state->state = 2;
3679		  succeed = TRUE;
3680		}
3681	      else if (GET_CODE (scanbody) == PARALLEL)
3682		{
3683		  if (get_attr_cond (this_insn) != COND_CANUSE)
3684		    fail = TRUE;
3685		}
3686	      break;
3687
3688	    case INSN:
3689	      /* We can only do this with insns that can use the condition
3690		 codes (and don't set them).  */
3691	      if (GET_CODE (scanbody) == SET
3692		  || GET_CODE (scanbody) == PARALLEL)
3693		{
3694		  if (get_attr_cond (this_insn) != COND_CANUSE)
3695		    fail = TRUE;
3696		}
3697	      /* We can't handle other insns like sequences.  */
3698	      else
3699		fail = TRUE;
3700	      break;
3701
3702	    default:
3703	      break;
3704	    }
3705	}
3706
3707      if (succeed)
3708	{
3709	  if ((!seeking_return) && (state->state == 1 || reverse))
3710	    state->target_label = CODE_LABEL_NUMBER (label);
3711	  else if (seeking_return || state->state == 2)
3712	    {
3713	      while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
3714		{
3715		  this_insn = next_nonnote_insn (this_insn);
3716
3717		  gcc_assert (!this_insn ||
3718			      (GET_CODE (this_insn) != BARRIER
3719			       && GET_CODE (this_insn) != CODE_LABEL));
3720		}
3721	      if (!this_insn)
3722		{
3723		  /* Oh dear! we ran off the end, give up.  */
3724		  extract_insn_cached (insn);
3725		  state->state = 0;
3726		  state->target_insn = NULL;
3727		  return;
3728		}
3729	      state->target_insn = this_insn;
3730	    }
3731	  else
3732	    gcc_unreachable ();
3733
3734	  /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
3735	     what it was.  */
3736	  if (!reverse)
3737	    {
3738	      state->cond = XEXP (SET_SRC (body), 0);
3739	      state->cc = get_arc_condition_code (XEXP (SET_SRC (body), 0));
3740	    }
3741
3742	  if (reverse || then_not_else)
3743	    state->cc = ARC_INVERSE_CONDITION_CODE (state->cc);
3744	}
3745
3746      /* Restore recog_operand.  Getting the attributes of other insns can
3747	 destroy this array, but final.c assumes that it remains intact
3748	 across this call; since the insn has been recognized already we
3749	 call insn_extract direct.  */
3750      extract_insn_cached (insn);
3751    }
3752}
3753
3754/* Record that we are currently outputting label NUM with prefix PREFIX.
3755   It it's the label we're looking for, reset the ccfsm machinery.
3756
3757   Called from ASM_OUTPUT_INTERNAL_LABEL.  */
3758
3759static void
3760arc_ccfsm_at_label (const char *prefix, int num, struct arc_ccfsm *state)
3761{
3762  if (state->state == 3 && state->target_label == num
3763      && !strcmp (prefix, "L"))
3764    {
3765      state->state = 0;
3766      state->target_insn = NULL;
3767    }
3768}
3769
3770/* We are considering a conditional branch with the condition COND.
3771   Check if we want to conditionalize a delay slot insn, and if so modify
3772   the ccfsm state accordingly.
3773   REVERSE says branch will branch when the condition is false.  */
3774void
3775arc_ccfsm_record_condition (rtx cond, bool reverse, rtx_insn *jump,
3776			    struct arc_ccfsm *state)
3777{
3778  rtx_insn *seq_insn = NEXT_INSN (PREV_INSN (jump));
3779  if (!state)
3780    state = &arc_ccfsm_current;
3781
3782  gcc_assert (state->state == 0);
3783  if (seq_insn != jump)
3784    {
3785      rtx insn = XVECEXP (PATTERN (seq_insn), 0, 1);
3786
3787      if (!as_a<rtx_insn *> (insn)->deleted ()
3788	  && INSN_ANNULLED_BRANCH_P (jump)
3789	  && (TARGET_AT_DBR_CONDEXEC || INSN_FROM_TARGET_P (insn)))
3790	{
3791	  state->cond = cond;
3792	  state->cc = get_arc_condition_code (cond);
3793	  if (!reverse)
3794	    arc_ccfsm_current.cc
3795	      = ARC_INVERSE_CONDITION_CODE (state->cc);
3796	  rtx pat = PATTERN (insn);
3797	  if (GET_CODE (pat) == COND_EXEC)
3798	    gcc_assert ((INSN_FROM_TARGET_P (insn)
3799			 ? ARC_INVERSE_CONDITION_CODE (state->cc) : state->cc)
3800			== get_arc_condition_code (XEXP (pat, 0)));
3801	  else
3802	    state->state = 5;
3803	}
3804    }
3805}
3806
3807/* Update *STATE as we would when we emit INSN.  */
3808
3809static void
3810arc_ccfsm_post_advance (rtx_insn *insn, struct arc_ccfsm *state)
3811{
3812  enum attr_type type;
3813
3814  if (LABEL_P (insn))
3815    arc_ccfsm_at_label ("L", CODE_LABEL_NUMBER (insn), state);
3816  else if (JUMP_P (insn)
3817	   && GET_CODE (PATTERN (insn)) != ADDR_VEC
3818	   && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
3819	   && ((type = get_attr_type (insn)) == TYPE_BRANCH
3820	       || (type == TYPE_UNCOND_BRANCH
3821		   /* ??? Maybe should also handle TYPE_RETURN here,
3822		      but we don't have a testcase for that.  */
3823		   && ARC_CCFSM_BRANCH_DELETED_P (state))))
3824    {
3825      if (ARC_CCFSM_BRANCH_DELETED_P (state))
3826	ARC_CCFSM_RECORD_BRANCH_DELETED (state);
3827      else
3828	{
3829	  rtx src = SET_SRC (PATTERN (insn));
3830	  arc_ccfsm_record_condition (XEXP (src, 0), XEXP (src, 1) == pc_rtx,
3831				      insn, state);
3832	}
3833    }
3834  else if (arc_ccfsm_current.state == 5)
3835    arc_ccfsm_current.state = 0;
3836}
3837
3838/* Return true if the current insn, which is a conditional branch, is to be
3839   deleted.  */
3840
3841bool
3842arc_ccfsm_branch_deleted_p (void)
3843{
3844  return ARC_CCFSM_BRANCH_DELETED_P (&arc_ccfsm_current);
3845}
3846
3847/* Record a branch isn't output because subsequent insns can be
3848   conditionalized.  */
3849
3850void
3851arc_ccfsm_record_branch_deleted (void)
3852{
3853  ARC_CCFSM_RECORD_BRANCH_DELETED (&arc_ccfsm_current);
3854}
3855
3856/* During insn output, indicate if the current insn is predicated.  */
3857
3858bool
3859arc_ccfsm_cond_exec_p (void)
3860{
3861  return (cfun->machine->prescan_initialized
3862	  && ARC_CCFSM_COND_EXEC_P (&arc_ccfsm_current));
3863}
3864
3865/* Like next_active_insn, but return NULL if we find an ADDR_(DIFF_)VEC,
3866   and look inside SEQUENCEs.  */
3867
3868static rtx_insn *
3869arc_next_active_insn (rtx_insn *insn, struct arc_ccfsm *statep)
3870{
3871  rtx pat;
3872
3873  do
3874    {
3875      if (statep)
3876	arc_ccfsm_post_advance (insn, statep);
3877      insn = NEXT_INSN (insn);
3878      if (!insn || BARRIER_P (insn))
3879	return NULL;
3880      if (statep)
3881	arc_ccfsm_advance (insn, statep);
3882    }
3883  while (NOTE_P (insn)
3884	 || (cfun->machine->arc_reorg_started
3885	     && LABEL_P (insn) && !label_to_alignment (insn))
3886	 || (NONJUMP_INSN_P (insn)
3887	     && (GET_CODE (PATTERN (insn)) == USE
3888		 || GET_CODE (PATTERN (insn)) == CLOBBER)));
3889  if (!LABEL_P (insn))
3890    {
3891      gcc_assert (INSN_P (insn));
3892      pat = PATTERN (insn);
3893      if (GET_CODE (pat) == ADDR_VEC || GET_CODE (pat) == ADDR_DIFF_VEC)
3894	return NULL;
3895      if (GET_CODE (pat) == SEQUENCE)
3896	return as_a <rtx_insn *> (XVECEXP (pat, 0, 0));
3897    }
3898  return insn;
3899}
3900
3901/* When deciding if an insn should be output short, we want to know something
3902   about the following insns:
3903   - if another insn follows which we know we can output as a short insn
3904     before an alignment-sensitive point, we can output this insn short:
3905     the decision about the eventual alignment can be postponed.
3906   - if a to-be-aligned label comes next, we should output this insn such
3907     as to get / preserve 4-byte alignment.
3908   - if a likely branch without delay slot insn, or a call with an immediately
3909     following short insn comes next, we should out output this insn such as to
3910     get / preserve 2 mod 4 unalignment.
3911   - do the same for a not completely unlikely branch with a short insn
3912     following before any other branch / label.
3913   - in order to decide if we are actually looking at a branch, we need to
3914     call arc_ccfsm_advance.
3915   - in order to decide if we are looking at a short insn, we should know
3916     if it is conditionalized.  To a first order of approximation this is
3917     the case if the state from arc_ccfsm_advance from before this insn
3918     indicates the insn is conditionalized.  However, a further refinement
3919     could be to not conditionalize an insn if the destination register(s)
3920     is/are dead in the non-executed case.  */
3921/* Return non-zero if INSN should be output as a short insn.  UNALIGN is
3922   zero if the current insn is aligned to a 4-byte-boundary, two otherwise.
3923   If CHECK_ATTR is greater than 0, check the iscompact attribute first.  */
3924
3925int
3926arc_verify_short (rtx_insn *insn, int, int check_attr)
3927{
3928  enum attr_iscompact iscompact;
3929  struct machine_function *machine;
3930
3931  if (check_attr > 0)
3932    {
3933      iscompact = get_attr_iscompact (insn);
3934      if (iscompact == ISCOMPACT_FALSE)
3935	return 0;
3936    }
3937  machine = cfun->machine;
3938
3939  if (machine->force_short_suffix >= 0)
3940    return machine->force_short_suffix;
3941
3942  return (get_attr_length (insn) & 2) != 0;
3943}
3944
3945/* When outputting an instruction (alternative) that can potentially be short,
3946   output the short suffix if the insn is in fact short, and update
3947   cfun->machine->unalign accordingly.  */
3948
3949static void
3950output_short_suffix (FILE *file)
3951{
3952  rtx_insn *insn = current_output_insn;
3953
3954  if (arc_verify_short (insn, cfun->machine->unalign, 1))
3955    {
3956      fprintf (file, "_s");
3957      cfun->machine->unalign ^= 2;
3958    }
3959  /* Restore recog_operand.  */
3960  extract_insn_cached (insn);
3961}
3962
3963/* Implement FINAL_PRESCAN_INSN.  */
3964
3965void
3966arc_final_prescan_insn (rtx_insn *insn, rtx *opvec ATTRIBUTE_UNUSED,
3967			int noperands ATTRIBUTE_UNUSED)
3968{
3969  if (TARGET_DUMPISIZE)
3970    fprintf (asm_out_file, "\n; at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
3971
3972  /* Output a nop if necessary to prevent a hazard.
3973     Don't do this for delay slots: inserting a nop would
3974     alter semantics, and the only time we would find a hazard is for a
3975     call function result - and in that case, the hazard is spurious to
3976     start with.  */
3977  if (PREV_INSN (insn)
3978      && PREV_INSN (NEXT_INSN (insn)) == insn
3979      && arc_hazard (prev_real_insn (insn), insn))
3980    {
3981      current_output_insn =
3982	emit_insn_before (gen_nop (), NEXT_INSN (PREV_INSN (insn)));
3983      final_scan_insn (current_output_insn, asm_out_file, optimize, 1, NULL);
3984      current_output_insn = insn;
3985    }
3986  /* Restore extraction data which might have been clobbered by arc_hazard.  */
3987  extract_constrain_insn_cached (insn);
3988
3989  if (!cfun->machine->prescan_initialized)
3990    {
3991      /* Clear lingering state from branch shortening.  */
3992      memset (&arc_ccfsm_current, 0, sizeof arc_ccfsm_current);
3993      cfun->machine->prescan_initialized = 1;
3994    }
3995  arc_ccfsm_advance (insn, &arc_ccfsm_current);
3996
3997  cfun->machine->size_reason = 0;
3998}
3999
4000/* Given FROM and TO register numbers, say whether this elimination is allowed.
4001   Frame pointer elimination is automatically handled.
4002
4003   All eliminations are permissible. If we need a frame
4004   pointer, we must eliminate ARG_POINTER_REGNUM into
4005   FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM.  */
4006
4007static bool
4008arc_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
4009{
4010  return to == FRAME_POINTER_REGNUM || !arc_frame_pointer_required ();
4011}
4012
4013/* Define the offset between two registers, one to be eliminated, and
4014   the other its replacement, at the start of a routine.  */
4015
4016int
4017arc_initial_elimination_offset (int from, int to)
4018{
4019  if (! cfun->machine->frame_info.initialized)
4020     arc_compute_frame_size (get_frame_size ());
4021
4022  if (from == ARG_POINTER_REGNUM && to == FRAME_POINTER_REGNUM)
4023    {
4024      return (cfun->machine->frame_info.extra_size
4025	      + cfun->machine->frame_info.reg_size);
4026    }
4027
4028  if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
4029    {
4030      return (cfun->machine->frame_info.total_size
4031	      - cfun->machine->frame_info.pretend_size);
4032    }
4033
4034  if ((from == FRAME_POINTER_REGNUM) && (to == STACK_POINTER_REGNUM))
4035    {
4036      return (cfun->machine->frame_info.total_size
4037	      - (cfun->machine->frame_info.pretend_size
4038	      + cfun->machine->frame_info.extra_size
4039	      + cfun->machine->frame_info.reg_size));
4040    }
4041
4042  gcc_unreachable ();
4043}
4044
4045static bool
4046arc_frame_pointer_required (void)
4047{
4048 return cfun->calls_alloca;
4049}
4050
4051
4052/* Return the destination address of a branch.  */
4053
4054int
4055branch_dest (rtx branch)
4056{
4057  rtx pat = PATTERN (branch);
4058  rtx dest = (GET_CODE (pat) == PARALLEL
4059	      ? SET_SRC (XVECEXP (pat, 0, 0)) : SET_SRC (pat));
4060  int dest_uid;
4061
4062  if (GET_CODE (dest) == IF_THEN_ELSE)
4063    dest = XEXP (dest, XEXP (dest, 1) == pc_rtx ? 2 : 1);
4064
4065  dest = XEXP (dest, 0);
4066  dest_uid = INSN_UID (dest);
4067
4068  return INSN_ADDRESSES (dest_uid);
4069}
4070
4071
4072/* Implement TARGET_ENCODE_SECTION_INFO hook.  */
4073
4074static void
4075arc_encode_section_info (tree decl, rtx rtl, int first)
4076{
4077  /* For sdata, SYMBOL_FLAG_LOCAL and SYMBOL_FLAG_FUNCTION.
4078     This clears machine specific flags, so has to come first.  */
4079  default_encode_section_info (decl, rtl, first);
4080
4081  /* Check if it is a function, and whether it has the
4082     [long/medium/short]_call attribute specified.  */
4083  if (TREE_CODE (decl) == FUNCTION_DECL)
4084    {
4085      rtx symbol = XEXP (rtl, 0);
4086      int flags = SYMBOL_REF_FLAGS (symbol);
4087
4088      tree attr = (TREE_TYPE (decl) != error_mark_node
4089		   ? TYPE_ATTRIBUTES (TREE_TYPE (decl)) : NULL_TREE);
4090      tree long_call_attr = lookup_attribute ("long_call", attr);
4091      tree medium_call_attr = lookup_attribute ("medium_call", attr);
4092      tree short_call_attr = lookup_attribute ("short_call", attr);
4093
4094      if (long_call_attr != NULL_TREE)
4095	flags |= SYMBOL_FLAG_LONG_CALL;
4096      else if (medium_call_attr != NULL_TREE)
4097	flags |= SYMBOL_FLAG_MEDIUM_CALL;
4098      else if (short_call_attr != NULL_TREE)
4099	flags |= SYMBOL_FLAG_SHORT_CALL;
4100
4101      SYMBOL_REF_FLAGS (symbol) = flags;
4102    }
4103}
4104
4105/* This is how to output a definition of an internal numbered label where
4106   PREFIX is the class of label and NUM is the number within the class.  */
4107
4108static void arc_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
4109{
4110  if (cfun)
4111    arc_ccfsm_at_label (prefix, labelno, &arc_ccfsm_current);
4112  default_internal_label (stream, prefix, labelno);
4113}
4114
4115/* Set the cpu type and print out other fancy things,
4116   at the top of the file.  */
4117
4118static void arc_file_start (void)
4119{
4120  default_file_start ();
4121  fprintf (asm_out_file, "\t.cpu %s\n", arc_cpu_string);
4122}
4123
4124/* Cost functions.  */
4125
4126/* Compute a (partial) cost for rtx X.  Return true if the complete
4127   cost has been computed, and false if subexpressions should be
4128   scanned.  In either case, *TOTAL contains the cost result.  */
4129
4130static bool
4131arc_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
4132	       int *total, bool speed)
4133{
4134  switch (code)
4135    {
4136      /* Small integers are as cheap as registers.  */
4137    case CONST_INT:
4138      {
4139	bool nolimm = false; /* Can we do without long immediate?  */
4140	bool fast = false; /* Is the result available immediately?  */
4141	bool condexec = false; /* Does this allow conditiobnal execution?  */
4142	bool compact = false; /* Is a 16 bit opcode available?  */
4143	/* CONDEXEC also implies that we can have an unconditional
4144	   3-address operation.  */
4145
4146	nolimm = compact = condexec = false;
4147	if (UNSIGNED_INT6 (INTVAL (x)))
4148	  nolimm = condexec = compact = true;
4149	else
4150	  {
4151	    if (SMALL_INT (INTVAL (x)))
4152	      nolimm = fast = true;
4153	    switch (outer_code)
4154	      {
4155	      case AND: /* bclr, bmsk, ext[bw] */
4156		if (satisfies_constraint_Ccp (x) /* bclr */
4157		    || satisfies_constraint_C1p (x) /* bmsk */)
4158		  nolimm = fast = condexec = compact = true;
4159		break;
4160	      case IOR: /* bset */
4161		if (satisfies_constraint_C0p (x)) /* bset */
4162		  nolimm = fast = condexec = compact = true;
4163		break;
4164	      case XOR:
4165		if (satisfies_constraint_C0p (x)) /* bxor */
4166		  nolimm = fast = condexec = true;
4167		break;
4168	      case SET:
4169		if (satisfies_constraint_Crr (x)) /* ror b,u6 */
4170		  nolimm = true;
4171	      default:
4172		break;
4173	      }
4174	  }
4175	/* FIXME: Add target options to attach a small cost if
4176	   condexec / compact is not true.  */
4177	if (nolimm)
4178	  {
4179	    *total = 0;
4180	    return true;
4181	  }
4182      }
4183      /* FALLTHRU */
4184
4185      /*  4 byte values can be fetched as immediate constants -
4186	  let's give that the cost of an extra insn.  */
4187    case CONST:
4188    case LABEL_REF:
4189    case SYMBOL_REF:
4190      *total = COSTS_N_INSNS (1);
4191      return true;
4192
4193    case CONST_DOUBLE:
4194      {
4195	rtx high, low;
4196
4197	if (TARGET_DPFP)
4198	  {
4199	    *total = COSTS_N_INSNS (1);
4200	    return true;
4201	  }
4202	/* FIXME: correct the order of high,low */
4203	split_double (x, &high, &low);
4204	*total = COSTS_N_INSNS (!SMALL_INT (INTVAL (high))
4205				+ !SMALL_INT (INTVAL (low)));
4206	return true;
4207      }
4208
4209    /* Encourage synth_mult to find a synthetic multiply when reasonable.
4210       If we need more than 12 insns to do a multiply, then go out-of-line,
4211       since the call overhead will be < 10% of the cost of the multiply.  */
4212    case ASHIFT:
4213    case ASHIFTRT:
4214    case LSHIFTRT:
4215      if (TARGET_BARREL_SHIFTER)
4216	{
4217	  /* If we want to shift a constant, we need a LIMM.  */
4218	  /* ??? when the optimizers want to know if a constant should be
4219	     hoisted, they ask for the cost of the constant.  OUTER_CODE is
4220	     insufficient context for shifts since we don't know which operand
4221	     we are looking at.  */
4222	  if (CONSTANT_P (XEXP (x, 0)))
4223	    {
4224	      *total += (COSTS_N_INSNS (2)
4225			 + rtx_cost (XEXP (x, 1), (enum rtx_code) code, 0, speed));
4226	      return true;
4227	    }
4228	  *total = COSTS_N_INSNS (1);
4229	}
4230      else if (GET_CODE (XEXP (x, 1)) != CONST_INT)
4231	*total = COSTS_N_INSNS (16);
4232      else
4233	{
4234	  *total = COSTS_N_INSNS (INTVAL (XEXP ((x), 1)));
4235	  /* ??? want_to_gcse_p can throw negative shift counts at us,
4236	     and then panics when it gets a negative cost as result.
4237	     Seen for gcc.c-torture/compile/20020710-1.c -Os .  */
4238	  if (*total < 0)
4239	    *total = 0;
4240	}
4241      return false;
4242
4243    case DIV:
4244    case UDIV:
4245      if (speed)
4246	*total = COSTS_N_INSNS(30);
4247      else
4248	*total = COSTS_N_INSNS(1);
4249	return false;
4250
4251    case MULT:
4252      if ((TARGET_DPFP && GET_MODE (x) == DFmode))
4253	*total = COSTS_N_INSNS (1);
4254      else if (speed)
4255	*total= arc_multcost;
4256      /* We do not want synth_mult sequences when optimizing
4257	 for size.  */
4258      else if (TARGET_MUL64_SET || (TARGET_ARC700 && !TARGET_NOMPY_SET))
4259	*total = COSTS_N_INSNS (1);
4260      else
4261	*total = COSTS_N_INSNS (2);
4262      return false;
4263    case PLUS:
4264      if (GET_CODE (XEXP (x, 0)) == MULT
4265	  && _2_4_8_operand (XEXP (XEXP (x, 0), 1), VOIDmode))
4266	{
4267	  *total += (rtx_cost (XEXP (x, 1), PLUS, 0, speed)
4268		     + rtx_cost (XEXP (XEXP (x, 0), 0), PLUS, 1, speed));
4269	  return true;
4270	}
4271      return false;
4272    case MINUS:
4273      if (GET_CODE (XEXP (x, 1)) == MULT
4274	  && _2_4_8_operand (XEXP (XEXP (x, 1), 1), VOIDmode))
4275	{
4276	  *total += (rtx_cost (XEXP (x, 0), PLUS, 0, speed)
4277		     + rtx_cost (XEXP (XEXP (x, 1), 0), PLUS, 1, speed));
4278	  return true;
4279	}
4280      return false;
4281    case COMPARE:
4282      {
4283	rtx op0 = XEXP (x, 0);
4284	rtx op1 = XEXP (x, 1);
4285
4286	if (GET_CODE (op0) == ZERO_EXTRACT && op1 == const0_rtx
4287	    && XEXP (op0, 1) == const1_rtx)
4288	  {
4289	    /* btst / bbit0 / bbit1:
4290	       Small integers and registers are free; everything else can
4291	       be put in a register.  */
4292	    *total = (rtx_cost (XEXP (op0, 0), SET, 1, speed)
4293		      + rtx_cost (XEXP (op0, 2), SET, 1, speed));
4294	    return true;
4295	  }
4296	if (GET_CODE (op0) == AND && op1 == const0_rtx
4297	    && satisfies_constraint_C1p (XEXP (op0, 1)))
4298	  {
4299	    /* bmsk.f */
4300	    *total = rtx_cost (XEXP (op0, 0), SET, 1, speed);
4301	    return true;
4302	  }
4303	/* add.f  */
4304	if (GET_CODE (op1) == NEG)
4305	  {
4306	    /* op0 might be constant, the inside of op1 is rather
4307	       unlikely to be so.  So swapping the operands might lower
4308	       the cost.  */
4309	    *total = (rtx_cost (op0, PLUS, 1, speed)
4310		      + rtx_cost (XEXP (op1, 0), PLUS, 0, speed));
4311	  }
4312	return false;
4313      }
4314    case EQ: case NE:
4315      if (outer_code == IF_THEN_ELSE
4316	  && GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
4317	  && XEXP (x, 1) == const0_rtx
4318	  && XEXP (XEXP (x, 0), 1) == const1_rtx)
4319	{
4320	  /* btst / bbit0 / bbit1:
4321	     Small integers and registers are free; everything else can
4322	     be put in a register.  */
4323	  rtx op0 = XEXP (x, 0);
4324
4325	  *total = (rtx_cost (XEXP (op0, 0), SET, 1, speed)
4326		    + rtx_cost (XEXP (op0, 2), SET, 1, speed));
4327	  return true;
4328	}
4329      /* Fall through.  */
4330    /* scc_insn expands into two insns.  */
4331    case GTU: case GEU: case LEU:
4332      if (GET_MODE (x) == SImode)
4333	*total += COSTS_N_INSNS (1);
4334      return false;
4335    case LTU: /* might use adc.  */
4336      if (GET_MODE (x) == SImode)
4337	*total += COSTS_N_INSNS (1) - 1;
4338      return false;
4339    default:
4340      return false;
4341    }
4342}
4343
4344/* Return true if ADDR is an address that needs to be expressed as an
4345   explicit sum of pcl + offset.  */
4346
4347bool
4348arc_legitimate_pc_offset_p (rtx addr)
4349{
4350  if (GET_CODE (addr) != CONST)
4351    return false;
4352  addr = XEXP (addr, 0);
4353  if (GET_CODE (addr) == PLUS)
4354    {
4355      if (GET_CODE (XEXP (addr, 1)) != CONST_INT)
4356	return false;
4357      addr = XEXP (addr, 0);
4358    }
4359  return (GET_CODE (addr) == UNSPEC
4360	  && XVECLEN (addr, 0) == 1
4361	  && XINT (addr, 1) == ARC_UNSPEC_GOT
4362	  && GET_CODE (XVECEXP (addr, 0, 0)) == SYMBOL_REF);
4363}
4364
4365/* Return true if ADDR is a valid pic address.
4366   A valid pic address on arc should look like
4367   const (unspec (SYMBOL_REF/LABEL) (ARC_UNSPEC_GOTOFF/ARC_UNSPEC_GOT))  */
4368
4369bool
4370arc_legitimate_pic_addr_p (rtx addr)
4371{
4372  if (GET_CODE (addr) == LABEL_REF)
4373    return true;
4374  if (GET_CODE (addr) != CONST)
4375    return false;
4376
4377  addr = XEXP (addr, 0);
4378
4379
4380  if (GET_CODE (addr) == PLUS)
4381    {
4382      if (GET_CODE (XEXP (addr, 1)) != CONST_INT)
4383	return false;
4384      addr = XEXP (addr, 0);
4385    }
4386
4387  if (GET_CODE (addr) != UNSPEC
4388      || XVECLEN (addr, 0) != 1)
4389    return false;
4390
4391  /* Must be @GOT or @GOTOFF.  */
4392  if (XINT (addr, 1) != ARC_UNSPEC_GOT
4393      && XINT (addr, 1) != ARC_UNSPEC_GOTOFF)
4394    return false;
4395
4396  if (GET_CODE (XVECEXP (addr, 0, 0)) != SYMBOL_REF
4397      && GET_CODE (XVECEXP (addr, 0, 0)) != LABEL_REF)
4398    return false;
4399
4400  return true;
4401}
4402
4403
4404
4405/* Return true if OP contains a symbol reference.  */
4406
4407static bool
4408symbolic_reference_mentioned_p (rtx op)
4409{
4410  register const char *fmt;
4411  register int i;
4412
4413  if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
4414    return true;
4415
4416  fmt = GET_RTX_FORMAT (GET_CODE (op));
4417  for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4418    {
4419      if (fmt[i] == 'E')
4420	{
4421	  register int j;
4422
4423	  for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4424	    if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4425	      return true;
4426	}
4427
4428      else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
4429	return true;
4430    }
4431
4432  return false;
4433}
4434
4435/* Return true if OP contains a SYMBOL_REF that is not wrapped in an unspec.
4436   If SKIP_LOCAL is true, skip symbols that bind locally.
4437   This is used further down in this file, and, without SKIP_LOCAL,
4438   in the addsi3 / subsi3 expanders when generating PIC code.  */
4439
4440bool
4441arc_raw_symbolic_reference_mentioned_p (rtx op, bool skip_local)
4442{
4443  register const char *fmt;
4444  register int i;
4445
4446  if (GET_CODE(op) == UNSPEC)
4447    return false;
4448
4449  if (GET_CODE (op) == SYMBOL_REF)
4450    {
4451      tree decl = SYMBOL_REF_DECL (op);
4452      return !skip_local || !decl || !default_binds_local_p (decl);
4453    }
4454
4455  fmt = GET_RTX_FORMAT (GET_CODE (op));
4456  for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4457    {
4458      if (fmt[i] == 'E')
4459	{
4460	  register int j;
4461
4462	  for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4463	    if (arc_raw_symbolic_reference_mentioned_p (XVECEXP (op, i, j),
4464							skip_local))
4465	      return true;
4466	}
4467
4468      else if (fmt[i] == 'e'
4469	       && arc_raw_symbolic_reference_mentioned_p (XEXP (op, i),
4470							  skip_local))
4471	return true;
4472    }
4473
4474  return false;
4475}
4476
4477/* Legitimize a pic address reference in ORIG.
4478   The return value is the legitimated address.
4479   If OLDX is non-zero, it is the target to assign the address to first.  */
4480
4481rtx
4482arc_legitimize_pic_address (rtx orig, rtx oldx)
4483{
4484  rtx addr = orig;
4485  rtx pat = orig;
4486  rtx base;
4487
4488  if (oldx == orig)
4489    oldx = NULL;
4490
4491  if (GET_CODE (addr) == LABEL_REF)
4492    ; /* Do nothing.  */
4493  else if (GET_CODE (addr) == SYMBOL_REF
4494	   && (CONSTANT_POOL_ADDRESS_P (addr)
4495	       || SYMBOL_REF_LOCAL_P (addr)))
4496    {
4497      /* This symbol may be referenced via a displacement from the PIC
4498	 base address (@GOTOFF).  */
4499
4500      /* FIXME: if we had a way to emit pc-relative adds that don't
4501	 create a GOT entry, we could do without the use of the gp register.  */
4502      crtl->uses_pic_offset_table = 1;
4503      pat = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), ARC_UNSPEC_GOTOFF);
4504      pat = gen_rtx_CONST (Pmode, pat);
4505      pat = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, pat);
4506
4507      if (oldx == NULL)
4508	oldx = gen_reg_rtx (Pmode);
4509
4510      if (oldx != 0)
4511	{
4512	  emit_move_insn (oldx, pat);
4513	  pat = oldx;
4514	}
4515
4516    }
4517  else if (GET_CODE (addr) == SYMBOL_REF)
4518    {
4519      /* This symbol must be referenced via a load from the
4520	 Global Offset Table (@GOTPC).  */
4521
4522      pat = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), ARC_UNSPEC_GOT);
4523      pat = gen_rtx_CONST (Pmode, pat);
4524      pat = gen_const_mem (Pmode, pat);
4525
4526      if (oldx == 0)
4527	oldx = gen_reg_rtx (Pmode);
4528
4529      emit_move_insn (oldx, pat);
4530      pat = oldx;
4531    }
4532  else
4533    {
4534      if (GET_CODE (addr) == CONST)
4535	{
4536	  addr = XEXP (addr, 0);
4537	  if (GET_CODE (addr) == UNSPEC)
4538	    {
4539	      /* Check that the unspec is one of the ones we generate?  */
4540	    }
4541	  else
4542	    gcc_assert (GET_CODE (addr) == PLUS);
4543	}
4544
4545      if (GET_CODE (addr) == PLUS)
4546	{
4547	  rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
4548
4549	  /* Check first to see if this is a constant offset from a @GOTOFF
4550	     symbol reference.  */
4551	  if ((GET_CODE (op0) == LABEL_REF
4552	       || (GET_CODE (op0) == SYMBOL_REF
4553		   && (CONSTANT_POOL_ADDRESS_P (op0)
4554		       || SYMBOL_REF_LOCAL_P (op0))))
4555	      && GET_CODE (op1) == CONST_INT)
4556	    {
4557	      /* FIXME: like above, could do without gp reference.  */
4558	      crtl->uses_pic_offset_table = 1;
4559	      pat
4560		= gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0), ARC_UNSPEC_GOTOFF);
4561	      pat = gen_rtx_PLUS (Pmode, pat, op1);
4562	      pat = gen_rtx_CONST (Pmode, pat);
4563	      pat = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, pat);
4564
4565	      if (oldx != 0)
4566		{
4567		  emit_move_insn (oldx, pat);
4568		  pat = oldx;
4569		}
4570	    }
4571	  else
4572	    {
4573	      base = arc_legitimize_pic_address (XEXP (addr, 0), oldx);
4574	      pat  = arc_legitimize_pic_address (XEXP (addr, 1),
4575					     base == oldx ? NULL_RTX : oldx);
4576
4577	      if (GET_CODE (pat) == CONST_INT)
4578		pat = plus_constant (Pmode, base, INTVAL (pat));
4579	      else
4580		{
4581		  if (GET_CODE (pat) == PLUS && CONSTANT_P (XEXP (pat, 1)))
4582		    {
4583		      base = gen_rtx_PLUS (Pmode, base, XEXP (pat, 0));
4584		      pat = XEXP (pat, 1);
4585		    }
4586		  pat = gen_rtx_PLUS (Pmode, base, pat);
4587		}
4588	    }
4589	}
4590    }
4591
4592 return pat;
4593}
4594
4595/* Output address constant X to FILE, taking PIC into account.  */
4596
4597void
4598arc_output_pic_addr_const (FILE * file, rtx x, int code)
4599{
4600  char buf[256];
4601
4602 restart:
4603  switch (GET_CODE (x))
4604    {
4605    case PC:
4606      if (flag_pic)
4607	putc ('.', file);
4608      else
4609	gcc_unreachable ();
4610      break;
4611
4612    case SYMBOL_REF:
4613      output_addr_const (file, x);
4614
4615      /* Local functions do not get references through the PLT.  */
4616      if (code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
4617	fputs ("@plt", file);
4618      break;
4619
4620    case LABEL_REF:
4621      ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (XEXP (x, 0)));
4622      assemble_name (file, buf);
4623      break;
4624
4625    case CODE_LABEL:
4626      ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
4627      assemble_name (file, buf);
4628      break;
4629
4630    case CONST_INT:
4631      fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
4632      break;
4633
4634    case CONST:
4635      arc_output_pic_addr_const (file, XEXP (x, 0), code);
4636      break;
4637
4638    case CONST_DOUBLE:
4639      if (GET_MODE (x) == VOIDmode)
4640	{
4641	  /* We can use %d if the number is one word and positive.  */
4642	  if (CONST_DOUBLE_HIGH (x))
4643	    fprintf (file, HOST_WIDE_INT_PRINT_DOUBLE_HEX,
4644		     CONST_DOUBLE_HIGH (x), CONST_DOUBLE_LOW (x));
4645	  else if  (CONST_DOUBLE_LOW (x) < 0)
4646	    fprintf (file, HOST_WIDE_INT_PRINT_HEX, CONST_DOUBLE_LOW (x));
4647	  else
4648	    fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
4649	}
4650      else
4651	/* We can't handle floating point constants;
4652	   PRINT_OPERAND must handle them.  */
4653	output_operand_lossage ("floating constant misused");
4654      break;
4655
4656    case PLUS:
4657      /* FIXME: Not needed here.  */
4658      /* Some assemblers need integer constants to appear last (eg masm).  */
4659      if (GET_CODE (XEXP (x, 0)) == CONST_INT)
4660	{
4661	  arc_output_pic_addr_const (file, XEXP (x, 1), code);
4662	  fprintf (file, "+");
4663	  arc_output_pic_addr_const (file, XEXP (x, 0), code);
4664	}
4665      else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
4666	{
4667	  arc_output_pic_addr_const (file, XEXP (x, 0), code);
4668	  if (INTVAL (XEXP (x, 1)) >= 0)
4669	    fprintf (file, "+");
4670	  arc_output_pic_addr_const (file, XEXP (x, 1), code);
4671	}
4672      else
4673	gcc_unreachable();
4674      break;
4675
4676    case MINUS:
4677      /* Avoid outputting things like x-x or x+5-x,
4678	 since some assemblers can't handle that.  */
4679      x = simplify_subtraction (x);
4680      if (GET_CODE (x) != MINUS)
4681	goto restart;
4682
4683      arc_output_pic_addr_const (file, XEXP (x, 0), code);
4684      fprintf (file, "-");
4685      if (GET_CODE (XEXP (x, 1)) == CONST_INT
4686	  && INTVAL (XEXP (x, 1)) < 0)
4687	{
4688	  fprintf (file, "(");
4689	  arc_output_pic_addr_const (file, XEXP (x, 1), code);
4690	  fprintf (file, ")");
4691	}
4692      else
4693	arc_output_pic_addr_const (file, XEXP (x, 1), code);
4694      break;
4695
4696    case ZERO_EXTEND:
4697    case SIGN_EXTEND:
4698      arc_output_pic_addr_const (file, XEXP (x, 0), code);
4699      break;
4700
4701
4702    case UNSPEC:
4703      gcc_assert (XVECLEN (x, 0) == 1);
4704      if (XINT (x, 1) == ARC_UNSPEC_GOT)
4705	fputs ("pcl,", file);
4706      arc_output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
4707      switch (XINT (x, 1))
4708	{
4709	case ARC_UNSPEC_GOT:
4710	  fputs ("@gotpc", file);
4711	  break;
4712	case ARC_UNSPEC_GOTOFF:
4713	  fputs ("@gotoff", file);
4714	  break;
4715	case ARC_UNSPEC_PLT:
4716	  fputs ("@plt", file);
4717	  break;
4718	default:
4719	  output_operand_lossage ("invalid UNSPEC as operand: %d", XINT (x,1));
4720	  break;
4721	}
4722       break;
4723
4724    default:
4725      output_operand_lossage ("invalid expression as operand");
4726    }
4727}
4728
4729#define SYMBOLIC_CONST(X)	\
4730(GET_CODE (X) == SYMBOL_REF						\
4731 || GET_CODE (X) == LABEL_REF						\
4732 || (GET_CODE (X) == CONST && symbolic_reference_mentioned_p (X)))
4733
4734/* Emit insns to move operands[1] into operands[0].  */
4735
4736void
4737emit_pic_move (rtx *operands, machine_mode)
4738{
4739  rtx temp = reload_in_progress ? operands[0] : gen_reg_rtx (Pmode);
4740
4741  if (GET_CODE (operands[0]) == MEM && SYMBOLIC_CONST (operands[1]))
4742    operands[1] = force_reg (Pmode, operands[1]);
4743  else
4744    operands[1] = arc_legitimize_pic_address (operands[1], temp);
4745}
4746
4747
4748/* The function returning the number of words, at the beginning of an
4749   argument, must be put in registers.  The returned value must be
4750   zero for arguments that are passed entirely in registers or that
4751   are entirely pushed on the stack.
4752
4753   On some machines, certain arguments must be passed partially in
4754   registers and partially in memory.  On these machines, typically
4755   the first N words of arguments are passed in registers, and the
4756   rest on the stack.  If a multi-word argument (a `double' or a
4757   structure) crosses that boundary, its first few words must be
4758   passed in registers and the rest must be pushed.  This function
4759   tells the compiler when this occurs, and how many of the words
4760   should go in registers.
4761
4762   `FUNCTION_ARG' for these arguments should return the first register
4763   to be used by the caller for this argument; likewise
4764   `FUNCTION_INCOMING_ARG', for the called function.
4765
4766   The function is used to implement macro FUNCTION_ARG_PARTIAL_NREGS.  */
4767
4768/* If REGNO is the least arg reg available then what is the total number of arg
4769   regs available.  */
4770#define GPR_REST_ARG_REGS(REGNO) \
4771  ((REGNO) <= MAX_ARC_PARM_REGS ? MAX_ARC_PARM_REGS - (REGNO) : 0 )
4772
4773/* Since arc parm regs are contiguous.  */
4774#define ARC_NEXT_ARG_REG(REGNO) ( (REGNO) + 1 )
4775
4776/* Implement TARGET_ARG_PARTIAL_BYTES.  */
4777
4778static int
4779arc_arg_partial_bytes (cumulative_args_t cum_v, machine_mode mode,
4780		       tree type, bool named ATTRIBUTE_UNUSED)
4781{
4782  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
4783  int bytes = (mode == BLKmode
4784	       ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode));
4785  int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4786  int arg_num = *cum;
4787  int ret;
4788
4789  arg_num = ROUND_ADVANCE_CUM (arg_num, mode, type);
4790  ret = GPR_REST_ARG_REGS (arg_num);
4791
4792  /* ICEd at function.c:2361, and ret is copied to data->partial */
4793    ret = (ret >= words ? 0 : ret * UNITS_PER_WORD);
4794
4795  return ret;
4796}
4797
4798
4799
4800/* This function is used to control a function argument is passed in a
4801   register, and which register.
4802
4803   The arguments are CUM, of type CUMULATIVE_ARGS, which summarizes
4804   (in a way defined by INIT_CUMULATIVE_ARGS and FUNCTION_ARG_ADVANCE)
4805   all of the previous arguments so far passed in registers; MODE, the
4806   machine mode of the argument; TYPE, the data type of the argument
4807   as a tree node or 0 if that is not known (which happens for C
4808   support library functions); and NAMED, which is 1 for an ordinary
4809   argument and 0 for nameless arguments that correspond to `...' in
4810   the called function's prototype.
4811
4812   The returned value should either be a `reg' RTX for the hard
4813   register in which to pass the argument, or zero to pass the
4814   argument on the stack.
4815
4816   For machines like the Vax and 68000, where normally all arguments
4817   are pushed, zero suffices as a definition.
4818
4819   The usual way to make the ANSI library `stdarg.h' work on a machine
4820   where some arguments are usually passed in registers, is to cause
4821   nameless arguments to be passed on the stack instead.  This is done
4822   by making the function return 0 whenever NAMED is 0.
4823
4824   You may use the macro `MUST_PASS_IN_STACK (MODE, TYPE)' in the
4825   definition of this function to determine if this argument is of a
4826   type that must be passed in the stack.  If `REG_PARM_STACK_SPACE'
4827   is not defined and the function returns non-zero for such an
4828   argument, the compiler will abort.  If `REG_PARM_STACK_SPACE' is
4829   defined, the argument will be computed in the stack and then loaded
4830   into a register.
4831
4832   The function is used to implement macro FUNCTION_ARG.  */
4833/* On the ARC the first MAX_ARC_PARM_REGS args are normally in registers
4834   and the rest are pushed.  */
4835
4836static rtx
4837arc_function_arg (cumulative_args_t cum_v, machine_mode mode,
4838		  const_tree type ATTRIBUTE_UNUSED, bool named ATTRIBUTE_UNUSED)
4839{
4840  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
4841  int arg_num = *cum;
4842  rtx ret;
4843  const char *debstr ATTRIBUTE_UNUSED;
4844
4845  arg_num = ROUND_ADVANCE_CUM (arg_num, mode, type);
4846  /* Return a marker for use in the call instruction.  */
4847  if (mode == VOIDmode)
4848    {
4849      ret = const0_rtx;
4850      debstr = "<0>";
4851    }
4852  else if (GPR_REST_ARG_REGS (arg_num) > 0)
4853    {
4854      ret = gen_rtx_REG (mode, arg_num);
4855      debstr = reg_names [arg_num];
4856    }
4857  else
4858    {
4859      ret = NULL_RTX;
4860      debstr = "memory";
4861    }
4862  return ret;
4863}
4864
4865/* The function to update the summarizer variable *CUM to advance past
4866   an argument in the argument list.  The values MODE, TYPE and NAMED
4867   describe that argument.  Once this is done, the variable *CUM is
4868   suitable for analyzing the *following* argument with
4869   `FUNCTION_ARG', etc.
4870
4871   This function need not do anything if the argument in question was
4872   passed on the stack.  The compiler knows how to track the amount of
4873   stack space used for arguments without any special help.
4874
4875   The function is used to implement macro FUNCTION_ARG_ADVANCE.  */
4876/* For the ARC: the cum set here is passed on to function_arg where we
4877   look at its value and say which reg to use. Strategy: advance the
4878   regnumber here till we run out of arg regs, then set *cum to last
4879   reg. In function_arg, since *cum > last arg reg we would return 0
4880   and thus the arg will end up on the stack. For straddling args of
4881   course function_arg_partial_nregs will come into play.  */
4882
4883static void
4884arc_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
4885			  const_tree type, bool named ATTRIBUTE_UNUSED)
4886{
4887  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
4888  int bytes = (mode == BLKmode
4889	       ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode));
4890  int words = (bytes + UNITS_PER_WORD  - 1) / UNITS_PER_WORD;
4891  int i;
4892
4893  if (words)
4894    *cum = ROUND_ADVANCE_CUM (*cum, mode, type);
4895  for (i = 0; i < words; i++)
4896    *cum = ARC_NEXT_ARG_REG (*cum);
4897
4898}
4899
4900/* Define how to find the value returned by a function.
4901   VALTYPE is the data type of the value (as a tree).
4902   If the precise function being called is known, FN_DECL_OR_TYPE is its
4903   FUNCTION_DECL; otherwise, FN_DECL_OR_TYPE is its type.  */
4904
4905static rtx
4906arc_function_value (const_tree valtype,
4907		    const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
4908		    bool outgoing ATTRIBUTE_UNUSED)
4909{
4910  machine_mode mode = TYPE_MODE (valtype);
4911  int unsignedp ATTRIBUTE_UNUSED;
4912
4913  unsignedp = TYPE_UNSIGNED (valtype);
4914  if (INTEGRAL_TYPE_P (valtype) || TREE_CODE (valtype) == OFFSET_TYPE)
4915    PROMOTE_MODE (mode, unsignedp, valtype);
4916  return gen_rtx_REG (mode, 0);
4917}
4918
4919/* Returns the return address that is used by builtin_return_address.  */
4920
4921rtx
4922arc_return_addr_rtx (int count, ATTRIBUTE_UNUSED rtx frame)
4923{
4924  if (count != 0)
4925    return const0_rtx;
4926
4927  return get_hard_reg_initial_val (Pmode , RETURN_ADDR_REGNUM);
4928}
4929
4930/* Nonzero if the constant value X is a legitimate general operand
4931   when generating PIC code.  It is given that flag_pic is on and
4932   that X satisfies CONSTANT_P or is a CONST_DOUBLE.  */
4933
4934bool
4935arc_legitimate_pic_operand_p (rtx x)
4936{
4937  return !arc_raw_symbolic_reference_mentioned_p (x, true);
4938}
4939
4940/* Determine if a given RTX is a valid constant.  We already know this
4941   satisfies CONSTANT_P.  */
4942
4943bool
4944arc_legitimate_constant_p (machine_mode, rtx x)
4945{
4946  if (!flag_pic)
4947    return true;
4948
4949  switch (GET_CODE (x))
4950    {
4951    case CONST:
4952      x = XEXP (x, 0);
4953
4954      if (GET_CODE (x) == PLUS)
4955	{
4956	  if (GET_CODE (XEXP (x, 1)) != CONST_INT)
4957	    return false;
4958	  x = XEXP (x, 0);
4959	}
4960
4961      /* Only some unspecs are valid as "constants".  */
4962      if (GET_CODE (x) == UNSPEC)
4963	switch (XINT (x, 1))
4964	  {
4965	  case ARC_UNSPEC_PLT:
4966	  case ARC_UNSPEC_GOTOFF:
4967	  case ARC_UNSPEC_GOT:
4968	  case UNSPEC_PROF:
4969	    return true;
4970
4971	  default:
4972	    gcc_unreachable ();
4973	  }
4974
4975      /* We must have drilled down to a symbol.  */
4976      if (arc_raw_symbolic_reference_mentioned_p (x, false))
4977	return false;
4978
4979      /* Return true.  */
4980      break;
4981
4982    case LABEL_REF:
4983    case SYMBOL_REF:
4984      return false;
4985
4986    default:
4987      break;
4988    }
4989
4990  /* Otherwise we handle everything else in the move patterns.  */
4991  return true;
4992}
4993
4994static bool
4995arc_legitimate_address_p (machine_mode mode, rtx x, bool strict)
4996{
4997  if (RTX_OK_FOR_BASE_P (x, strict))
4998     return true;
4999  if (LEGITIMATE_OFFSET_ADDRESS_P (mode, x, TARGET_INDEXED_LOADS, strict))
5000     return true;
5001  if (LEGITIMATE_SCALED_ADDRESS_P (mode, x, strict))
5002    return true;
5003  if (LEGITIMATE_SMALL_DATA_ADDRESS_P (x))
5004     return true;
5005  if (GET_CODE (x) == CONST_INT && LARGE_INT (INTVAL (x)))
5006     return true;
5007  if ((GET_MODE_SIZE (mode) != 16)
5008      && (GET_CODE (x) == SYMBOL_REF
5009	  || GET_CODE (x) == LABEL_REF
5010	  || GET_CODE (x) == CONST))
5011    {
5012      if (!flag_pic || arc_legitimate_pic_addr_p (x))
5013	return true;
5014    }
5015  if ((GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC
5016       || GET_CODE (x) == POST_DEC || GET_CODE (x) == POST_INC)
5017      && RTX_OK_FOR_BASE_P (XEXP (x, 0), strict))
5018    return true;
5019      /* We're restricted here by the `st' insn.  */
5020  if ((GET_CODE (x) == PRE_MODIFY || GET_CODE (x) == POST_MODIFY)
5021      && GET_CODE (XEXP ((x), 1)) == PLUS
5022      && rtx_equal_p (XEXP ((x), 0), XEXP (XEXP (x, 1), 0))
5023      && LEGITIMATE_OFFSET_ADDRESS_P (QImode, XEXP (x, 1),
5024				      TARGET_AUTO_MODIFY_REG, strict))
5025    return true;
5026  return false;
5027}
5028
5029/* Return true iff ADDR (a legitimate address expression)
5030   has an effect that depends on the machine mode it is used for.  */
5031
5032static bool
5033arc_mode_dependent_address_p (const_rtx addr, addr_space_t)
5034{
5035  /* SYMBOL_REF is not mode dependent: it is either a small data reference,
5036     which is valid for loads and stores, or a limm offset, which is valid for
5037     loads.  */
5038  /* Scaled indices are scaled by the access mode; likewise for scaled
5039     offsets, which are needed for maximum offset stores.  */
5040  if (GET_CODE (addr) == PLUS
5041      && (GET_CODE (XEXP ((addr), 0)) == MULT
5042	  || (CONST_INT_P (XEXP ((addr), 1))
5043	      && !SMALL_INT (INTVAL (XEXP ((addr), 1))))))
5044    return true;
5045  return false;
5046}
5047
5048/* Determine if it's legal to put X into the constant pool.  */
5049
5050static bool
5051arc_cannot_force_const_mem (machine_mode mode, rtx x)
5052{
5053  return !arc_legitimate_constant_p (mode, x);
5054}
5055
5056
5057/* Generic function to define a builtin.  */
5058#define def_mbuiltin(MASK, NAME, TYPE, CODE)				\
5059  do									\
5060    {									\
5061       if (MASK)							\
5062	  add_builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, NULL, NULL_TREE); \
5063    }									\
5064  while (0)
5065
5066
5067static void
5068arc_init_builtins (void)
5069{
5070    tree endlink = void_list_node;
5071
5072    tree void_ftype_void
5073	= build_function_type (void_type_node,
5074			       endlink);
5075
5076    tree int_ftype_int
5077	= build_function_type (integer_type_node,
5078			   tree_cons (NULL_TREE, integer_type_node, endlink));
5079
5080    tree pcvoid_type_node
5081	= build_pointer_type (build_qualified_type (void_type_node, TYPE_QUAL_CONST));
5082    tree int_ftype_pcvoid_int
5083	= build_function_type (integer_type_node,
5084			   tree_cons (NULL_TREE, pcvoid_type_node,
5085			       tree_cons (NULL_TREE, integer_type_node,
5086				    endlink)));
5087
5088    tree int_ftype_short_int
5089	= build_function_type (integer_type_node,
5090			       tree_cons (NULL_TREE, short_integer_type_node, endlink));
5091
5092    tree void_ftype_int_int
5093	= build_function_type (void_type_node,
5094			       tree_cons (NULL_TREE, integer_type_node,
5095					  tree_cons (NULL_TREE, integer_type_node, endlink)));
5096    tree void_ftype_usint_usint
5097	= build_function_type (void_type_node,
5098			       tree_cons (NULL_TREE, long_unsigned_type_node,
5099					  tree_cons (NULL_TREE, long_unsigned_type_node, endlink)));
5100
5101    tree int_ftype_int_int
5102	= build_function_type (integer_type_node,
5103			       tree_cons (NULL_TREE, integer_type_node,
5104					  tree_cons (NULL_TREE, integer_type_node, endlink)));
5105
5106    tree usint_ftype_usint
5107	= build_function_type (long_unsigned_type_node,
5108			   tree_cons (NULL_TREE, long_unsigned_type_node, endlink));
5109
5110    tree void_ftype_usint
5111	= build_function_type (void_type_node,
5112			   tree_cons (NULL_TREE, long_unsigned_type_node, endlink));
5113
5114    /* Add the builtins.  */
5115    def_mbuiltin (1,"__builtin_arc_nop", void_ftype_void, ARC_BUILTIN_NOP);
5116    def_mbuiltin (TARGET_NORM, "__builtin_arc_norm", int_ftype_int, ARC_BUILTIN_NORM);
5117    def_mbuiltin (TARGET_NORM, "__builtin_arc_normw", int_ftype_short_int, ARC_BUILTIN_NORMW);
5118    def_mbuiltin (TARGET_SWAP, "__builtin_arc_swap", int_ftype_int, ARC_BUILTIN_SWAP);
5119    def_mbuiltin (TARGET_MUL64_SET,"__builtin_arc_mul64", void_ftype_int_int, ARC_BUILTIN_MUL64);
5120    def_mbuiltin (TARGET_MUL64_SET,"__builtin_arc_mulu64", void_ftype_usint_usint, ARC_BUILTIN_MULU64);
5121    def_mbuiltin (1,"__builtin_arc_rtie", void_ftype_void, ARC_BUILTIN_RTIE);
5122    def_mbuiltin (TARGET_ARC700,"__builtin_arc_sync", void_ftype_void, ARC_BUILTIN_SYNC);
5123    def_mbuiltin ((TARGET_EA_SET),"__builtin_arc_divaw", int_ftype_int_int, ARC_BUILTIN_DIVAW);
5124    def_mbuiltin (1,"__builtin_arc_brk", void_ftype_void, ARC_BUILTIN_BRK);
5125    def_mbuiltin (1,"__builtin_arc_flag", void_ftype_usint, ARC_BUILTIN_FLAG);
5126    def_mbuiltin (1,"__builtin_arc_sleep", void_ftype_usint, ARC_BUILTIN_SLEEP);
5127    def_mbuiltin (1,"__builtin_arc_swi", void_ftype_void, ARC_BUILTIN_SWI);
5128    def_mbuiltin (1,"__builtin_arc_core_read", usint_ftype_usint, ARC_BUILTIN_CORE_READ);
5129    def_mbuiltin (1,"__builtin_arc_core_write", void_ftype_usint_usint, ARC_BUILTIN_CORE_WRITE);
5130    def_mbuiltin (1,"__builtin_arc_lr", usint_ftype_usint, ARC_BUILTIN_LR);
5131    def_mbuiltin (1,"__builtin_arc_sr", void_ftype_usint_usint, ARC_BUILTIN_SR);
5132    def_mbuiltin (TARGET_ARC700,"__builtin_arc_trap_s", void_ftype_usint, ARC_BUILTIN_TRAP_S);
5133    def_mbuiltin (TARGET_ARC700,"__builtin_arc_unimp_s", void_ftype_void, ARC_BUILTIN_UNIMP_S);
5134    def_mbuiltin (1,"__builtin_arc_aligned", int_ftype_pcvoid_int, ARC_BUILTIN_ALIGNED);
5135
5136    if (TARGET_SIMD_SET)
5137      arc_init_simd_builtins ();
5138}
5139
5140static rtx arc_expand_simd_builtin (tree, rtx, rtx, machine_mode, int);
5141
5142/* Expand an expression EXP that calls a built-in function,
5143   with result going to TARGET if that's convenient
5144   (and in mode MODE if that's convenient).
5145   SUBTARGET may be used as the target for computing one of EXP's operands.
5146   IGNORE is nonzero if the value is to be ignored.  */
5147
5148static rtx
5149arc_expand_builtin (tree exp,
5150		    rtx target,
5151		    rtx subtarget,
5152		    machine_mode mode,
5153		    int ignore)
5154{
5155  tree              fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
5156  tree              arg0;
5157  tree              arg1;
5158  rtx               op0;
5159  rtx               op1;
5160  int               fcode = DECL_FUNCTION_CODE (fndecl);
5161  int               icode;
5162  machine_mode mode0;
5163  machine_mode mode1;
5164
5165  if (fcode > ARC_SIMD_BUILTIN_BEGIN && fcode < ARC_SIMD_BUILTIN_END)
5166    return arc_expand_simd_builtin (exp, target, subtarget, mode, ignore);
5167
5168  switch (fcode)
5169    {
5170    case ARC_BUILTIN_NOP:
5171      emit_insn (gen_nop ());
5172      return NULL_RTX;
5173
5174    case ARC_BUILTIN_NORM:
5175      icode = CODE_FOR_clrsbsi2;
5176      arg0 = CALL_EXPR_ARG (exp, 0);
5177      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL);
5178      mode0 =  insn_data[icode].operand[1].mode;
5179      target = gen_reg_rtx (SImode);
5180
5181      if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
5182	op0 = copy_to_mode_reg (mode0, op0);
5183
5184      emit_insn (gen_clrsbsi2 (target, op0));
5185      return target;
5186
5187    case ARC_BUILTIN_NORMW:
5188
5189      /* FIXME : This should all be HImode, not SImode.  */
5190      icode = CODE_FOR_normw;
5191      arg0 = CALL_EXPR_ARG (exp, 0);
5192      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL);
5193      mode0 =  insn_data[icode].operand[1].mode;
5194      target = gen_reg_rtx (SImode);
5195
5196      if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
5197	op0 = copy_to_mode_reg (mode0, convert_to_mode (mode0, op0,0));
5198
5199      emit_insn (gen_normw (target, op0));
5200      return target;
5201
5202    case ARC_BUILTIN_MUL64:
5203      icode = CODE_FOR_mul64;
5204      arg0 = CALL_EXPR_ARG (exp, 0);
5205      arg1 = CALL_EXPR_ARG (exp, 1);
5206      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL);
5207      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, EXPAND_NORMAL);
5208
5209      mode0 =  insn_data[icode].operand[0].mode;
5210      mode1 =  insn_data[icode].operand[1].mode;
5211
5212      if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
5213	op0 = copy_to_mode_reg (mode0, op0);
5214
5215      if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
5216	op1 = copy_to_mode_reg (mode1, op1);
5217
5218      emit_insn (gen_mul64 (op0,op1));
5219      return NULL_RTX;
5220
5221    case ARC_BUILTIN_MULU64:
5222      icode = CODE_FOR_mulu64;
5223      arg0 = CALL_EXPR_ARG (exp, 0);
5224      arg1 = CALL_EXPR_ARG (exp, 1);
5225      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL);
5226      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, EXPAND_NORMAL);
5227
5228      mode0 =  insn_data[icode].operand[0].mode;
5229      mode1 =  insn_data[icode].operand[1].mode;
5230
5231      if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
5232	op0 = copy_to_mode_reg (mode0, op0);
5233
5234      if (! (*insn_data[icode].operand[0].predicate) (op1, mode1))
5235	op1 = copy_to_mode_reg (mode1, op1);
5236
5237      emit_insn (gen_mulu64 (op0,op1));
5238      return NULL_RTX;
5239
5240    case ARC_BUILTIN_RTIE:
5241      icode = CODE_FOR_rtie;
5242      emit_insn (gen_rtie (const1_rtx));
5243      return NULL_RTX;
5244
5245    case ARC_BUILTIN_SYNC:
5246      icode = CODE_FOR_sync;
5247      emit_insn (gen_sync (const1_rtx));
5248      return NULL_RTX;
5249
5250    case ARC_BUILTIN_SWAP:
5251      icode = CODE_FOR_swap;
5252      arg0 = CALL_EXPR_ARG (exp, 0);
5253      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL);
5254      mode0 =  insn_data[icode].operand[1].mode;
5255      target = gen_reg_rtx (SImode);
5256
5257      if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
5258	op0 = copy_to_mode_reg (mode0, op0);
5259
5260      emit_insn (gen_swap (target, op0));
5261      return target;
5262
5263    case ARC_BUILTIN_DIVAW:
5264      icode = CODE_FOR_divaw;
5265      arg0 = CALL_EXPR_ARG (exp, 0);
5266      arg1 = CALL_EXPR_ARG (exp, 1);
5267
5268      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL);
5269      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, EXPAND_NORMAL);
5270      target = gen_reg_rtx (SImode);
5271
5272      mode0 =  insn_data[icode].operand[0].mode;
5273      mode1 =  insn_data[icode].operand[1].mode;
5274
5275      if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
5276	op0 = copy_to_mode_reg (mode0, op0);
5277
5278      if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
5279	op1 = copy_to_mode_reg (mode1, op1);
5280
5281      emit_insn (gen_divaw (target, op0, op1));
5282      return target;
5283
5284    case ARC_BUILTIN_BRK:
5285      icode = CODE_FOR_brk;
5286      emit_insn (gen_brk (const1_rtx));
5287      return NULL_RTX;
5288
5289    case ARC_BUILTIN_SLEEP:
5290      icode = CODE_FOR_sleep;
5291      arg0 = CALL_EXPR_ARG (exp, 0);
5292
5293      fold (arg0);
5294
5295      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL);
5296      mode0 = insn_data[icode].operand[1].mode;
5297
5298      emit_insn (gen_sleep (op0));
5299      return NULL_RTX;
5300
5301    case ARC_BUILTIN_SWI:
5302      icode = CODE_FOR_swi;
5303      emit_insn (gen_swi (const1_rtx));
5304      return NULL_RTX;
5305
5306    case ARC_BUILTIN_FLAG:
5307      icode = CODE_FOR_flag;
5308      arg0 = CALL_EXPR_ARG (exp, 0);
5309      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL);
5310      mode0 =  insn_data[icode].operand[0].mode;
5311
5312      if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
5313	op0 = copy_to_mode_reg (mode0, op0);
5314
5315      emit_insn (gen_flag (op0));
5316      return NULL_RTX;
5317
5318    case ARC_BUILTIN_CORE_READ:
5319      icode = CODE_FOR_core_read;
5320      arg0 = CALL_EXPR_ARG (exp, 0);
5321      target = gen_reg_rtx (SImode);
5322
5323      fold (arg0);
5324
5325      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL);
5326      mode0 = insn_data[icode].operand[1].mode;
5327
5328      emit_insn (gen_core_read (target, op0));
5329      return target;
5330
5331    case ARC_BUILTIN_CORE_WRITE:
5332      icode = CODE_FOR_core_write;
5333      arg0 = CALL_EXPR_ARG (exp, 0);
5334      arg1 = CALL_EXPR_ARG (exp, 1);
5335
5336      fold (arg1);
5337
5338      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL);
5339      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, EXPAND_NORMAL);
5340
5341      mode0 = insn_data[icode].operand[0].mode;
5342      mode1 = insn_data[icode].operand[1].mode;
5343
5344      emit_insn (gen_core_write (op0, op1));
5345      return NULL_RTX;
5346
5347    case ARC_BUILTIN_LR:
5348      icode = CODE_FOR_lr;
5349      arg0 = CALL_EXPR_ARG (exp, 0);
5350      target = gen_reg_rtx (SImode);
5351
5352      fold (arg0);
5353
5354      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL);
5355      mode0 = insn_data[icode].operand[1].mode;
5356
5357      emit_insn (gen_lr (target, op0));
5358      return target;
5359
5360    case ARC_BUILTIN_SR:
5361      icode = CODE_FOR_sr;
5362      arg0 = CALL_EXPR_ARG (exp, 0);
5363      arg1 = CALL_EXPR_ARG (exp, 1);
5364
5365      fold (arg1);
5366
5367      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL);
5368      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, EXPAND_NORMAL);
5369
5370      mode0 = insn_data[icode].operand[0].mode;
5371      mode1 = insn_data[icode].operand[1].mode;
5372
5373      emit_insn (gen_sr (op0, op1));
5374      return NULL_RTX;
5375
5376    case ARC_BUILTIN_TRAP_S:
5377      icode = CODE_FOR_trap_s;
5378      arg0 = CALL_EXPR_ARG (exp, 0);
5379
5380      fold (arg0);
5381
5382      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL);
5383      mode0 = insn_data[icode].operand[1].mode;
5384
5385      /* We don't give an error for non-cost values here because
5386	 we still want to allow things to be fixed up by later inlining /
5387	 constant folding / dead code elimination.  */
5388      if  (CONST_INT_P (op0) && !satisfies_constraint_L (op0))
5389	{
5390	  /* Keep this message in sync with the one in arc.md:trap_s,
5391	     because *.md files don't get scanned by exgettext.  */
5392	  error ("operand to trap_s should be an unsigned 6-bit value");
5393	}
5394      emit_insn (gen_trap_s (op0));
5395      return NULL_RTX;
5396
5397    case ARC_BUILTIN_UNIMP_S:
5398      icode = CODE_FOR_unimp_s;
5399      emit_insn (gen_unimp_s (const1_rtx));
5400      return NULL_RTX;
5401
5402    case ARC_BUILTIN_ALIGNED:
5403      /* __builtin_arc_aligned (void* val, int alignval) */
5404      arg0 = CALL_EXPR_ARG (exp, 0);
5405      arg1 = CALL_EXPR_ARG (exp, 1);
5406      fold (arg1);
5407      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL);
5408      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, EXPAND_NORMAL);
5409      target = gen_reg_rtx (SImode);
5410
5411      if (!CONST_INT_P (op1))
5412	{
5413	  /* If we can't fold the alignment to a constant integer
5414	     whilst optimizing, this is probably a user error.  */
5415	  if (optimize)
5416	    warning (0, "__builtin_arc_aligned with non-constant alignment");
5417	}
5418      else
5419	{
5420	  HOST_WIDE_INT alignTest = INTVAL (op1);
5421	  /* Check alignTest is positive, and a power of two.  */
5422	  if (alignTest <= 0 || alignTest != (alignTest & -alignTest))
5423	    {
5424	      error ("invalid alignment value for __builtin_arc_aligned");
5425	      return NULL_RTX;
5426	    }
5427
5428	  if (CONST_INT_P (op0))
5429	    {
5430	      HOST_WIDE_INT pnt = INTVAL (op0);
5431
5432	      if ((pnt & (alignTest - 1)) == 0)
5433		return const1_rtx;
5434	    }
5435	  else
5436	    {
5437	      unsigned  align = get_pointer_alignment (arg0);
5438	      unsigned  numBits = alignTest * BITS_PER_UNIT;
5439
5440	      if (align && align >= numBits)
5441		return const1_rtx;
5442	      /* Another attempt to ascertain alignment.  Check the type
5443		 we are pointing to.  */
5444	      if (POINTER_TYPE_P (TREE_TYPE (arg0))
5445		  && TYPE_ALIGN (TREE_TYPE (TREE_TYPE (arg0))) >= numBits)
5446		return const1_rtx;
5447	    }
5448	}
5449
5450      /* Default to false.  */
5451      return const0_rtx;
5452
5453    default:
5454      break;
5455    }
5456
5457  /* @@@ Should really do something sensible here.  */
5458  return NULL_RTX;
5459}
5460
5461/* Returns true if the operands[opno] is a valid compile-time constant to be
5462   used as register number in the code for builtins.  Else it flags an error
5463   and returns false.  */
5464
5465bool
5466check_if_valid_regno_const (rtx *operands, int opno)
5467{
5468
5469  switch (GET_CODE (operands[opno]))
5470    {
5471    case SYMBOL_REF :
5472    case CONST :
5473    case CONST_INT :
5474      return true;
5475    default:
5476	error ("register number must be a compile-time constant. Try giving higher optimization levels");
5477	break;
5478    }
5479  return false;
5480}
5481
5482/* Check that after all the constant folding, whether the operand to
5483   __builtin_arc_sleep is an unsigned int of 6 bits.  If not, flag an error.  */
5484
5485bool
5486check_if_valid_sleep_operand (rtx *operands, int opno)
5487{
5488  switch (GET_CODE (operands[opno]))
5489    {
5490    case CONST :
5491    case CONST_INT :
5492	if( UNSIGNED_INT6 (INTVAL (operands[opno])))
5493	    return true;
5494    default:
5495	fatal_error (input_location,
5496		     "operand for sleep instruction must be an unsigned 6 bit compile-time constant");
5497	break;
5498    }
5499  return false;
5500}
5501
5502/* Return true if it is ok to make a tail-call to DECL.  */
5503
5504static bool
5505arc_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED,
5506			     tree exp ATTRIBUTE_UNUSED)
5507{
5508  /* Never tailcall from an ISR routine - it needs a special exit sequence.  */
5509  if (ARC_INTERRUPT_P (arc_compute_function_type (cfun)))
5510    return false;
5511
5512  /* Everything else is ok.  */
5513  return true;
5514}
5515
5516/* Output code to add DELTA to the first argument, and then jump
5517   to FUNCTION.  Used for C++ multiple inheritance.  */
5518
5519static void
5520arc_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
5521		     HOST_WIDE_INT delta,
5522		     HOST_WIDE_INT vcall_offset,
5523		     tree function)
5524{
5525  int mi_delta = delta;
5526  const char *const mi_op = mi_delta < 0 ? "sub" : "add";
5527  int shift = 0;
5528  int this_regno
5529    = aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function) ? 1 : 0;
5530  rtx fnaddr;
5531
5532  if (mi_delta < 0)
5533    mi_delta = - mi_delta;
5534
5535  /* Add DELTA.  When possible use a plain add, otherwise load it into
5536     a register first.  */
5537
5538  while (mi_delta != 0)
5539    {
5540      if ((mi_delta & (3 << shift)) == 0)
5541	shift += 2;
5542      else
5543	{
5544	  asm_fprintf (file, "\t%s\t%s, %s, %d\n",
5545		       mi_op, reg_names[this_regno], reg_names[this_regno],
5546		       mi_delta & (0xff << shift));
5547	  mi_delta &= ~(0xff << shift);
5548	  shift += 8;
5549	}
5550    }
5551
5552  /* If needed, add *(*THIS + VCALL_OFFSET) to THIS.  */
5553  if (vcall_offset != 0)
5554    {
5555      /* ld  r12,[this]           --> temp = *this
5556	 add r12,r12,vcall_offset --> temp = *(*this + vcall_offset)
5557	 ld r12,[r12]
5558	 add this,this,r12        --> this+ = *(*this + vcall_offset) */
5559      asm_fprintf (file, "\tld\t%s, [%s]\n",
5560		   ARC_TEMP_SCRATCH_REG, reg_names[this_regno]);
5561      asm_fprintf (file, "\tadd\t%s, %s, " HOST_WIDE_INT_PRINT_DEC "\n",
5562		   ARC_TEMP_SCRATCH_REG, ARC_TEMP_SCRATCH_REG, vcall_offset);
5563      asm_fprintf (file, "\tld\t%s, [%s]\n",
5564		   ARC_TEMP_SCRATCH_REG, ARC_TEMP_SCRATCH_REG);
5565      asm_fprintf (file, "\tadd\t%s, %s, %s\n", reg_names[this_regno],
5566		   reg_names[this_regno], ARC_TEMP_SCRATCH_REG);
5567    }
5568
5569  fnaddr = XEXP (DECL_RTL (function), 0);
5570
5571  if (arc_is_longcall_p (fnaddr))
5572    fputs ("\tj\t", file);
5573  else
5574    fputs ("\tb\t", file);
5575  assemble_name (file, XSTR (fnaddr, 0));
5576  fputc ('\n', file);
5577}
5578
5579/* Return true if a 32 bit "long_call" should be generated for
5580   this calling SYM_REF.  We generate a long_call if the function:
5581
5582        a.  has an __attribute__((long call))
5583     or b.  the -mlong-calls command line switch has been specified
5584
5585   However we do not generate a long call if the function has an
5586   __attribute__ ((short_call)) or __attribute__ ((medium_call))
5587
5588   This function will be called by C fragments contained in the machine
5589   description file.  */
5590
5591bool
5592arc_is_longcall_p (rtx sym_ref)
5593{
5594  if (GET_CODE (sym_ref) != SYMBOL_REF)
5595    return false;
5596
5597  return (SYMBOL_REF_LONG_CALL_P (sym_ref)
5598	  || (TARGET_LONG_CALLS_SET
5599	      && !SYMBOL_REF_SHORT_CALL_P (sym_ref)
5600	      && !SYMBOL_REF_MEDIUM_CALL_P (sym_ref)));
5601
5602}
5603
5604/* Likewise for short calls.  */
5605
5606bool
5607arc_is_shortcall_p (rtx sym_ref)
5608{
5609  if (GET_CODE (sym_ref) != SYMBOL_REF)
5610    return false;
5611
5612  return (SYMBOL_REF_SHORT_CALL_P (sym_ref)
5613	  || (!TARGET_LONG_CALLS_SET && !TARGET_MEDIUM_CALLS
5614	      && !SYMBOL_REF_LONG_CALL_P (sym_ref)
5615	      && !SYMBOL_REF_MEDIUM_CALL_P (sym_ref)));
5616
5617}
5618
5619/* Emit profiling code for calling CALLEE.  Return true if a special
5620   call pattern needs to be generated.  */
5621
5622bool
5623arc_profile_call (rtx callee)
5624{
5625  rtx from = XEXP (DECL_RTL (current_function_decl), 0);
5626
5627  if (TARGET_UCB_MCOUNT)
5628    /* Profiling is done by instrumenting the callee.  */
5629    return false;
5630
5631  if (CONSTANT_P (callee))
5632    {
5633      rtx count_ptr
5634	= gen_rtx_CONST (Pmode,
5635			 gen_rtx_UNSPEC (Pmode,
5636					 gen_rtvec (3, from, callee,
5637						    CONST0_RTX (Pmode)),
5638					 UNSPEC_PROF));
5639      rtx counter = gen_rtx_MEM (SImode, count_ptr);
5640      /* ??? The increment would better be done atomically, but as there is
5641	 no proper hardware support, that would be too expensive.  */
5642      emit_move_insn (counter,
5643		      force_reg (SImode, plus_constant (SImode, counter, 1)));
5644      return false;
5645    }
5646  else
5647    {
5648      rtx count_list_ptr
5649	= gen_rtx_CONST (Pmode,
5650			 gen_rtx_UNSPEC (Pmode,
5651					 gen_rtvec (3, from, CONST0_RTX (Pmode),
5652						    CONST0_RTX (Pmode)),
5653					 UNSPEC_PROF));
5654      emit_move_insn (gen_rtx_REG (Pmode, 8), count_list_ptr);
5655      emit_move_insn (gen_rtx_REG (Pmode, 9), callee);
5656      return true;
5657    }
5658}
5659
5660/* Worker function for TARGET_RETURN_IN_MEMORY.  */
5661
5662static bool
5663arc_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
5664{
5665  if (AGGREGATE_TYPE_P (type) || TREE_ADDRESSABLE (type))
5666    return true;
5667  else
5668    {
5669      HOST_WIDE_INT size = int_size_in_bytes (type);
5670      return (size == -1 || size > 8);
5671    }
5672}
5673
5674
5675/* This was in rtlanal.c, and can go in there when we decide we want
5676   to submit the change for inclusion in the GCC tree.  */
5677/* Like note_stores, but allow the callback to have side effects on the rtl
5678   (like the note_stores of yore):
5679   Call FUN on each register or MEM that is stored into or clobbered by X.
5680   (X would be the pattern of an insn).  DATA is an arbitrary pointer,
5681   ignored by note_stores, but passed to FUN.
5682   FUN may alter parts of the RTL.
5683
5684   FUN receives three arguments:
5685   1. the REG, MEM, CC0 or PC being stored in or clobbered,
5686   2. the SET or CLOBBER rtx that does the store,
5687   3. the pointer DATA provided to note_stores.
5688
5689  If the item being stored in or clobbered is a SUBREG of a hard register,
5690  the SUBREG will be passed.  */
5691
5692/* For now.  */ static
5693void
5694walk_stores (rtx x, void (*fun) (rtx, rtx, void *), void *data)
5695{
5696  int i;
5697
5698  if (GET_CODE (x) == COND_EXEC)
5699    x = COND_EXEC_CODE (x);
5700
5701  if (GET_CODE (x) == SET || GET_CODE (x) == CLOBBER)
5702    {
5703      rtx dest = SET_DEST (x);
5704
5705      while ((GET_CODE (dest) == SUBREG
5706	      && (!REG_P (SUBREG_REG (dest))
5707		  || REGNO (SUBREG_REG (dest)) >= FIRST_PSEUDO_REGISTER))
5708	     || GET_CODE (dest) == ZERO_EXTRACT
5709	     || GET_CODE (dest) == STRICT_LOW_PART)
5710	dest = XEXP (dest, 0);
5711
5712      /* If we have a PARALLEL, SET_DEST is a list of EXPR_LIST expressions,
5713	 each of whose first operand is a register.  */
5714      if (GET_CODE (dest) == PARALLEL)
5715	{
5716	  for (i = XVECLEN (dest, 0) - 1; i >= 0; i--)
5717	    if (XEXP (XVECEXP (dest, 0, i), 0) != 0)
5718	      (*fun) (XEXP (XVECEXP (dest, 0, i), 0), x, data);
5719	}
5720      else
5721	(*fun) (dest, x, data);
5722    }
5723
5724  else if (GET_CODE (x) == PARALLEL)
5725    for (i = XVECLEN (x, 0) - 1; i >= 0; i--)
5726      walk_stores (XVECEXP (x, 0, i), fun, data);
5727}
5728
5729static bool
5730arc_pass_by_reference (cumulative_args_t ca_v ATTRIBUTE_UNUSED,
5731		       machine_mode mode ATTRIBUTE_UNUSED,
5732		       const_tree type,
5733		       bool named ATTRIBUTE_UNUSED)
5734{
5735  return (type != 0
5736	  && (TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
5737	      || TREE_ADDRESSABLE (type)));
5738}
5739
5740/* Implement TARGET_CAN_USE_DOLOOP_P.  */
5741
5742static bool
5743arc_can_use_doloop_p (const widest_int &iterations, const widest_int &,
5744		      unsigned int loop_depth, bool entered_at_top)
5745{
5746  if (loop_depth > 1)
5747    return false;
5748  /* Setting up the loop with two sr instructions costs 6 cycles.  */
5749  if (TARGET_ARC700
5750      && !entered_at_top
5751      && wi::gtu_p (iterations, 0)
5752      && wi::leu_p (iterations, flag_pic ? 6 : 3))
5753    return false;
5754  return true;
5755}
5756
5757/* NULL if INSN insn is valid within a low-overhead loop.
5758   Otherwise return why doloop cannot be applied.  */
5759
5760static const char *
5761arc_invalid_within_doloop (const rtx_insn *insn)
5762{
5763  if (CALL_P (insn))
5764    return "Function call in the loop.";
5765  return NULL;
5766}
5767
5768static int arc_reorg_in_progress = 0;
5769
5770/* ARC's machince specific reorg function.  */
5771
5772static void
5773arc_reorg (void)
5774{
5775  rtx_insn *insn;
5776  rtx pattern;
5777  rtx pc_target;
5778  long offset;
5779  int changed;
5780
5781  cfun->machine->arc_reorg_started = 1;
5782  arc_reorg_in_progress = 1;
5783
5784  /* Emit special sections for profiling.  */
5785  if (crtl->profile)
5786    {
5787      section *save_text_section;
5788      rtx_insn *insn;
5789      int size = get_max_uid () >> 4;
5790      htab_t htab = htab_create (size, unspec_prof_hash, unspec_prof_htab_eq,
5791				 NULL);
5792
5793      save_text_section = in_section;
5794      for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
5795	if (NONJUMP_INSN_P (insn))
5796	  walk_stores (PATTERN (insn), write_profile_sections, htab);
5797      if (htab_elements (htab))
5798	in_section = 0;
5799      switch_to_section (save_text_section);
5800      htab_delete (htab);
5801    }
5802
5803  /* Link up loop ends with their loop start.  */
5804  {
5805    for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
5806      if (GET_CODE (insn) == JUMP_INSN
5807	  && recog_memoized (insn) == CODE_FOR_doloop_end_i)
5808	{
5809	  rtx_insn *top_label
5810	    = as_a <rtx_insn *> (XEXP (XEXP (SET_SRC (XVECEXP (PATTERN (insn), 0, 0)), 1), 0));
5811	  rtx num = GEN_INT (CODE_LABEL_NUMBER (top_label));
5812	  rtx_insn *lp, *prev = prev_nonnote_insn (top_label);
5813	  rtx_insn *lp_simple = NULL;
5814	  rtx_insn *next = NULL;
5815	  rtx op0 = XEXP (XVECEXP (PATTERN (insn), 0, 1), 0);
5816	  HOST_WIDE_INT loop_end_id
5817	    = -INTVAL (XEXP (XVECEXP (PATTERN (insn), 0, 4), 0));
5818	  int seen_label = 0;
5819
5820	  for (lp = prev;
5821	       (lp && NONJUMP_INSN_P (lp)
5822		&& recog_memoized (lp) != CODE_FOR_doloop_begin_i);
5823	       lp = prev_nonnote_insn (lp))
5824	    ;
5825	  if (!lp || !NONJUMP_INSN_P (lp)
5826	      || dead_or_set_regno_p (lp, LP_COUNT))
5827	    {
5828	      for (prev = next = insn, lp = NULL ; prev || next;)
5829		{
5830		  if (prev)
5831		    {
5832		      if (NONJUMP_INSN_P (prev)
5833			  && recog_memoized (prev) == CODE_FOR_doloop_begin_i
5834			  && (INTVAL (XEXP (XVECEXP (PATTERN (prev), 0, 5), 0))
5835			      == loop_end_id))
5836			{
5837			  lp = prev;
5838			  break;
5839			}
5840		      else if (LABEL_P (prev))
5841			seen_label = 1;
5842		      prev = prev_nonnote_insn (prev);
5843		    }
5844		  if (next)
5845		    {
5846		      if (NONJUMP_INSN_P (next)
5847			  && recog_memoized (next) == CODE_FOR_doloop_begin_i
5848			  && (INTVAL (XEXP (XVECEXP (PATTERN (next), 0, 5), 0))
5849			      == loop_end_id))
5850			{
5851			  lp = next;
5852			  break;
5853			}
5854		      next = next_nonnote_insn (next);
5855		    }
5856		}
5857	      prev = NULL;
5858	    }
5859	  else
5860	    lp_simple = lp;
5861	  if (lp && !dead_or_set_regno_p (lp, LP_COUNT))
5862	    {
5863	      rtx begin_cnt = XEXP (XVECEXP (PATTERN (lp), 0 ,3), 0);
5864	      if (INTVAL (XEXP (XVECEXP (PATTERN (lp), 0, 4), 0)))
5865		/* The loop end insn has been duplicated.  That can happen
5866		   when there is a conditional block at the very end of
5867		   the loop.  */
5868		goto failure;
5869	      /* If Register allocation failed to allocate to the right
5870		 register, There is no point into teaching reload to
5871		 fix this up with reloads, as that would cost more
5872		 than using an ordinary core register with the
5873		 doloop_fallback pattern.  */
5874	      if ((true_regnum (op0) != LP_COUNT || !REG_P (begin_cnt))
5875	      /* Likewise, if the loop setup is evidently inside the loop,
5876		 we loose.  */
5877		  || (!lp_simple && lp != next && !seen_label))
5878		{
5879		  remove_insn (lp);
5880		  goto failure;
5881		}
5882	      /* It is common that the optimizers copy the loop count from
5883		 another register, and doloop_begin_i is stuck with the
5884		 source of the move.  Making doloop_begin_i only accept "l"
5885		 is nonsentical, as this then makes reload evict the pseudo
5886		 used for the loop end.  The underlying cause is that the
5887		 optimizers don't understand that the register allocation for
5888		 doloop_begin_i should be treated as part of the loop.
5889		 Try to work around this problem by verifying the previous
5890		 move exists.  */
5891	      if (true_regnum (begin_cnt) != LP_COUNT)
5892		{
5893		  rtx_insn *mov;
5894		  rtx set, note;
5895
5896		  for (mov = prev_nonnote_insn (lp); mov;
5897		       mov = prev_nonnote_insn (mov))
5898		    {
5899		      if (!NONJUMP_INSN_P (mov))
5900			mov = 0;
5901		      else if ((set = single_set (mov))
5902			  && rtx_equal_p (SET_SRC (set), begin_cnt)
5903			  && rtx_equal_p (SET_DEST (set), op0))
5904			break;
5905		    }
5906		  if (mov)
5907		    {
5908		      XEXP (XVECEXP (PATTERN (lp), 0 ,3), 0) = op0;
5909		      note = find_regno_note (lp, REG_DEAD, REGNO (begin_cnt));
5910		      if (note)
5911			remove_note (lp, note);
5912		    }
5913		  else
5914		    {
5915		      remove_insn (lp);
5916		      goto failure;
5917		    }
5918		}
5919	      XEXP (XVECEXP (PATTERN (insn), 0, 4), 0) = num;
5920	      XEXP (XVECEXP (PATTERN (lp), 0, 4), 0) = num;
5921	      if (next == lp)
5922		XEXP (XVECEXP (PATTERN (lp), 0, 6), 0) = const2_rtx;
5923	      else if (!lp_simple)
5924		XEXP (XVECEXP (PATTERN (lp), 0, 6), 0) = const1_rtx;
5925	      else if (prev != lp)
5926		{
5927		  remove_insn (lp);
5928		  add_insn_after (lp, prev, NULL);
5929		}
5930	      if (!lp_simple)
5931		{
5932		  XEXP (XVECEXP (PATTERN (lp), 0, 7), 0)
5933		    = gen_rtx_LABEL_REF (Pmode, top_label);
5934		  add_reg_note (lp, REG_LABEL_OPERAND, top_label);
5935		  LABEL_NUSES (top_label)++;
5936		}
5937	      /* We can avoid tedious loop start / end setting for empty loops
5938		 be merely setting the loop count to its final value.  */
5939	      if (next_active_insn (top_label) == insn)
5940		{
5941		  rtx lc_set
5942		    = gen_rtx_SET (VOIDmode,
5943				   XEXP (XVECEXP (PATTERN (lp), 0, 3), 0),
5944				   const0_rtx);
5945
5946		  rtx_insn *lc_set_insn = emit_insn_before (lc_set, insn);
5947		  delete_insn (lp);
5948		  delete_insn (insn);
5949		  insn = lc_set_insn;
5950		}
5951	      /* If the loop is non-empty with zero length, we can't make it
5952		 a zero-overhead loop.  That can happen for empty asms.  */
5953	      else
5954		{
5955		  rtx_insn *scan;
5956
5957		  for (scan = top_label;
5958		       (scan && scan != insn
5959			&& (!NONJUMP_INSN_P (scan) || !get_attr_length (scan)));
5960		       scan = NEXT_INSN (scan));
5961		  if (scan == insn)
5962		    {
5963		      remove_insn (lp);
5964		      goto failure;
5965		    }
5966		}
5967	    }
5968	  else
5969	    {
5970	      /* Sometimes the loop optimizer makes a complete hash of the
5971		 loop.  If it were only that the loop is not entered at the
5972		 top, we could fix this up by setting LP_START with SR .
5973		 However, if we can't find the loop begin were it should be,
5974		 chances are that it does not even dominate the loop, but is
5975		 inside the loop instead.  Using SR there would kill
5976		 performance.
5977		 We use the doloop_fallback pattern here, which executes
5978		 in two cycles on the ARC700 when predicted correctly.  */
5979	    failure:
5980	      if (!REG_P (op0))
5981		{
5982		  rtx op3 = XEXP (XVECEXP (PATTERN (insn), 0, 5), 0);
5983
5984		  emit_insn_before (gen_move_insn (op3, op0), insn);
5985		  PATTERN (insn)
5986		    = gen_doloop_fallback_m (op3, JUMP_LABEL (insn), op0);
5987		}
5988	      else
5989		XVEC (PATTERN (insn), 0)
5990		  = gen_rtvec (2, XVECEXP (PATTERN (insn), 0, 0),
5991			       XVECEXP (PATTERN (insn), 0, 1));
5992	      INSN_CODE (insn) = -1;
5993	    }
5994	}
5995    }
5996
5997/* FIXME: should anticipate ccfsm action, generate special patterns for
5998   to-be-deleted branches that have no delay slot and have at least the
5999   length of the size increase forced on other insns that are conditionalized.
6000   This can also have an insn_list inside that enumerates insns which are
6001   not actually conditionalized because the destinations are dead in the
6002   not-execute case.
6003   Could also tag branches that we want to be unaligned if they get no delay
6004   slot, or even ones that we don't want to do delay slot sheduling for
6005   because we can unalign them.
6006
6007   However, there are cases when conditional execution is only possible after
6008   delay slot scheduling:
6009
6010   - If a delay slot is filled with a nocond/set insn from above, the previous
6011     basic block can become elegible for conditional execution.
6012   - If a delay slot is filled with a nocond insn from the fall-through path,
6013     the branch with that delay slot can become eligble for conditional
6014     execution (however, with the same sort of data flow analysis that dbr
6015     does, we could have figured out before that we don't need to
6016     conditionalize this insn.)
6017     - If a delay slot insn is filled with an insn from the target, the
6018       target label gets its uses decremented (even deleted if falling to zero),
6019   thus possibly creating more condexec opportunities there.
6020   Therefore, we should still be prepared to apply condexec optimization on
6021   non-prepared branches if the size increase of conditionalized insns is no
6022   more than the size saved from eliminating the branch.  An invocation option
6023   could also be used to reserve a bit of extra size for condbranches so that
6024   this'll work more often (could also test in arc_reorg if the block is
6025   'close enough' to be eligible for condexec to make this likely, and
6026   estimate required size increase).  */
6027  /* Generate BRcc insns, by combining cmp and Bcc insns wherever possible.  */
6028  if (TARGET_NO_BRCC_SET)
6029    return;
6030
6031  do
6032    {
6033      init_insn_lengths();
6034      changed = 0;
6035
6036      if (optimize > 1 && !TARGET_NO_COND_EXEC)
6037	{
6038	  arc_ifcvt ();
6039	  unsigned int flags = pass_data_arc_ifcvt.todo_flags_finish;
6040	  df_finish_pass ((flags & TODO_df_verify) != 0);
6041	}
6042
6043      /* Call shorten_branches to calculate the insn lengths.  */
6044      shorten_branches (get_insns());
6045      cfun->machine->ccfsm_current_insn = NULL_RTX;
6046
6047      if (!INSN_ADDRESSES_SET_P())
6048	  fatal_error (input_location, "Insn addresses not set after shorten_branches");
6049
6050      for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
6051	{
6052	  rtx label;
6053	  enum attr_type insn_type;
6054
6055	  /* If a non-jump insn (or a casesi jump table), continue.  */
6056	  if (GET_CODE (insn) != JUMP_INSN ||
6057	      GET_CODE (PATTERN (insn)) == ADDR_VEC
6058	      || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC)
6059	    continue;
6060
6061	  /* If we already have a brcc, note if it is suitable for brcc_s.
6062	     Be a bit generous with the brcc_s range so that we can take
6063	     advantage of any code shortening from delay slot scheduling.  */
6064	  if (recog_memoized (insn) == CODE_FOR_cbranchsi4_scratch)
6065	    {
6066	      rtx pat = PATTERN (insn);
6067	      rtx op = XEXP (SET_SRC (XVECEXP (pat, 0, 0)), 0);
6068	      rtx *ccp = &XEXP (XVECEXP (pat, 0, 1), 0);
6069
6070	      offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
6071	      if ((offset >= -140 && offset < 140)
6072		  && rtx_equal_p (XEXP (op, 1), const0_rtx)
6073		  && compact_register_operand (XEXP (op, 0), VOIDmode)
6074		  && equality_comparison_operator (op, VOIDmode))
6075		PUT_MODE (*ccp, CC_Zmode);
6076	      else if (GET_MODE (*ccp) == CC_Zmode)
6077		PUT_MODE (*ccp, CC_ZNmode);
6078	      continue;
6079	    }
6080	  if ((insn_type =  get_attr_type (insn)) == TYPE_BRCC
6081	      || insn_type == TYPE_BRCC_NO_DELAY_SLOT)
6082	    continue;
6083
6084	  /* OK. so we have a jump insn.  */
6085	  /* We need to check that it is a bcc.  */
6086	  /* Bcc => set (pc) (if_then_else ) */
6087	  pattern = PATTERN (insn);
6088	  if (GET_CODE (pattern) != SET
6089	      || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE
6090	      || ANY_RETURN_P (XEXP (SET_SRC (pattern), 1)))
6091	    continue;
6092
6093	  /* Now check if the jump is beyond the s9 range.  */
6094	  if (CROSSING_JUMP_P (insn))
6095	    continue;
6096	  offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
6097
6098	  if(offset > 253 || offset < -254)
6099	    continue;
6100
6101	  pc_target = SET_SRC (pattern);
6102
6103	  /* Now go back and search for the set cc insn.  */
6104
6105	  label = XEXP (pc_target, 1);
6106
6107	    {
6108	      rtx pat;
6109	      rtx_insn *scan, *link_insn = NULL;
6110
6111	      for (scan = PREV_INSN (insn);
6112		   scan && GET_CODE (scan) != CODE_LABEL;
6113		   scan = PREV_INSN (scan))
6114		{
6115		  if (! INSN_P (scan))
6116		    continue;
6117		  pat = PATTERN (scan);
6118		  if (GET_CODE (pat) == SET
6119		      && cc_register (SET_DEST (pat), VOIDmode))
6120		    {
6121		      link_insn = scan;
6122		      break;
6123		    }
6124		}
6125	      if (! link_insn)
6126		continue;
6127	      else
6128		/* Check if this is a data dependency.  */
6129		{
6130		  rtx op, cc_clob_rtx, op0, op1, brcc_insn, note;
6131		  rtx cmp0, cmp1;
6132
6133		  /* Ok this is the set cc. copy args here.  */
6134		  op = XEXP (pc_target, 0);
6135
6136		  op0 = cmp0 = XEXP (SET_SRC (pat), 0);
6137		  op1 = cmp1 = XEXP (SET_SRC (pat), 1);
6138		  if (GET_CODE (op0) == ZERO_EXTRACT
6139		      && XEXP (op0, 1) == const1_rtx
6140		      && (GET_CODE (op) == EQ
6141			  || GET_CODE (op) == NE))
6142		    {
6143		      /* btst / b{eq,ne} -> bbit{0,1} */
6144		      op0 = XEXP (cmp0, 0);
6145		      op1 = XEXP (cmp0, 2);
6146		    }
6147		  else if (!register_operand (op0, VOIDmode)
6148			  || !general_operand (op1, VOIDmode))
6149		    continue;
6150		  /* Be careful not to break what cmpsfpx_raw is
6151		     trying to create for checking equality of
6152		     single-precision floats.  */
6153		  else if (TARGET_SPFP
6154			   && GET_MODE (op0) == SFmode
6155			   && GET_MODE (op1) == SFmode)
6156		    continue;
6157
6158		  /* None of the two cmp operands should be set between the
6159		     cmp and the branch.  */
6160		  if (reg_set_between_p (op0, link_insn, insn))
6161		    continue;
6162
6163		  if (reg_set_between_p (op1, link_insn, insn))
6164		    continue;
6165
6166		  /* Since the MODE check does not work, check that this is
6167		     CC reg's last set location before insn, and also no
6168		     instruction between the cmp and branch uses the
6169		     condition codes.  */
6170		  if ((reg_set_between_p (SET_DEST (pat), link_insn, insn))
6171		      || (reg_used_between_p (SET_DEST (pat), link_insn, insn)))
6172		    continue;
6173
6174		  /* CC reg should be dead after insn.  */
6175		  if (!find_regno_note (insn, REG_DEAD, CC_REG))
6176		    continue;
6177
6178		  op = gen_rtx_fmt_ee (GET_CODE (op),
6179				       GET_MODE (op), cmp0, cmp1);
6180		  /* If we create a LIMM where there was none before,
6181		     we only benefit if we can avoid a scheduling bubble
6182		     for the ARC600.  Otherwise, we'd only forgo chances
6183		     at short insn generation, and risk out-of-range
6184		     branches.  */
6185		  if (!brcc_nolimm_operator (op, VOIDmode)
6186		      && !long_immediate_operand (op1, VOIDmode)
6187		      && (TARGET_ARC700
6188			  || next_active_insn (link_insn) != insn))
6189		    continue;
6190
6191		  /* Emit bbit / brcc (or brcc_s if possible).
6192		     CC_Zmode indicates that brcc_s is possible.  */
6193
6194		  if (op0 != cmp0)
6195		    cc_clob_rtx = gen_rtx_REG (CC_ZNmode, CC_REG);
6196		  else if ((offset >= -140 && offset < 140)
6197			   && rtx_equal_p (op1, const0_rtx)
6198			   && compact_register_operand (op0, VOIDmode)
6199			   && (GET_CODE (op) == EQ
6200			       || GET_CODE (op) == NE))
6201		    cc_clob_rtx = gen_rtx_REG (CC_Zmode, CC_REG);
6202		  else
6203		    cc_clob_rtx = gen_rtx_REG (CCmode, CC_REG);
6204
6205		  brcc_insn
6206		    = gen_rtx_IF_THEN_ELSE (VOIDmode, op, label, pc_rtx);
6207		  brcc_insn = gen_rtx_SET (VOIDmode, pc_rtx, brcc_insn);
6208		  cc_clob_rtx = gen_rtx_CLOBBER (VOIDmode, cc_clob_rtx);
6209		  brcc_insn
6210		    = gen_rtx_PARALLEL
6211			(VOIDmode, gen_rtvec (2, brcc_insn, cc_clob_rtx));
6212		  brcc_insn = emit_jump_insn_before (brcc_insn, insn);
6213
6214		  JUMP_LABEL (brcc_insn) = JUMP_LABEL (insn);
6215		  note = find_reg_note (insn, REG_BR_PROB, 0);
6216		  if (note)
6217		    {
6218		      XEXP (note, 1) = REG_NOTES (brcc_insn);
6219		      REG_NOTES (brcc_insn) = note;
6220		    }
6221		  note = find_reg_note (link_insn, REG_DEAD, op0);
6222		  if (note)
6223		    {
6224		      remove_note (link_insn, note);
6225		      XEXP (note, 1) = REG_NOTES (brcc_insn);
6226		      REG_NOTES (brcc_insn) = note;
6227		    }
6228		  note = find_reg_note (link_insn, REG_DEAD, op1);
6229		  if (note)
6230		    {
6231		      XEXP (note, 1) = REG_NOTES (brcc_insn);
6232		      REG_NOTES (brcc_insn) = note;
6233		    }
6234
6235		  changed = 1;
6236
6237		  /* Delete the bcc insn.  */
6238		  set_insn_deleted (insn);
6239
6240		  /* Delete the cmp insn.  */
6241		  set_insn_deleted (link_insn);
6242
6243		}
6244	    }
6245	}
6246      /* Clear out insn_addresses.  */
6247      INSN_ADDRESSES_FREE ();
6248
6249    } while (changed);
6250
6251  if (INSN_ADDRESSES_SET_P())
6252    fatal_error (input_location, "insn addresses not freed");
6253
6254  arc_reorg_in_progress = 0;
6255}
6256
6257 /* Check if the operands are valid for BRcc.d generation
6258    Valid Brcc.d patterns are
6259        Brcc.d b, c, s9
6260        Brcc.d b, u6, s9
6261
6262        For cc={GT, LE, GTU, LEU}, u6=63 can not be allowed,
6263      since they are encoded by the assembler as {GE, LT, HS, LS} 64, which
6264      does not have a delay slot
6265
6266  Assumed precondition: Second operand is either a register or a u6 value.  */
6267
6268bool
6269valid_brcc_with_delay_p (rtx *operands)
6270{
6271  if (optimize_size && GET_MODE (operands[4]) == CC_Zmode)
6272    return false;
6273  return brcc_nolimm_operator (operands[0], VOIDmode);
6274}
6275
6276/* ??? Hack.  This should no really be here.  See PR32143.  */
6277static bool
6278arc_decl_anon_ns_mem_p (const_tree decl)
6279{
6280  while (1)
6281    {
6282      if (decl == NULL_TREE || decl == error_mark_node)
6283	return false;
6284      if (TREE_CODE (decl) == NAMESPACE_DECL
6285	  && DECL_NAME (decl) == NULL_TREE)
6286	return true;
6287      /* Classes and namespaces inside anonymous namespaces have
6288	 TREE_PUBLIC == 0, so we can shortcut the search.  */
6289      else if (TYPE_P (decl))
6290	return (TREE_PUBLIC (TYPE_NAME (decl)) == 0);
6291      else if (TREE_CODE (decl) == NAMESPACE_DECL)
6292	return (TREE_PUBLIC (decl) == 0);
6293      else
6294	decl = DECL_CONTEXT (decl);
6295    }
6296}
6297
6298/* Implement TARGET_IN_SMALL_DATA_P.  Return true if it would be safe to
6299   access DECL using %gp_rel(...)($gp).  */
6300
6301static bool
6302arc_in_small_data_p (const_tree decl)
6303{
6304  HOST_WIDE_INT size;
6305
6306  if (TREE_CODE (decl) == STRING_CST || TREE_CODE (decl) == FUNCTION_DECL)
6307    return false;
6308
6309
6310  /* We don't yet generate small-data references for -mabicalls.  See related
6311     -G handling in override_options.  */
6312  if (TARGET_NO_SDATA_SET)
6313    return false;
6314
6315  if (TREE_CODE (decl) == VAR_DECL && DECL_SECTION_NAME (decl) != 0)
6316    {
6317      const char *name;
6318
6319      /* Reject anything that isn't in a known small-data section.  */
6320      name = DECL_SECTION_NAME (decl);
6321      if (strcmp (name, ".sdata") != 0 && strcmp (name, ".sbss") != 0)
6322	return false;
6323
6324      /* If a symbol is defined externally, the assembler will use the
6325	 usual -G rules when deciding how to implement macros.  */
6326      if (!DECL_EXTERNAL (decl))
6327	  return true;
6328    }
6329  /* Only global variables go into sdata section for now.  */
6330  else if (1)
6331    {
6332      /* Don't put constants into the small data section: we want them
6333	 to be in ROM rather than RAM.  */
6334      if (TREE_CODE (decl) != VAR_DECL)
6335	return false;
6336
6337      if (TREE_READONLY (decl)
6338	  && !TREE_SIDE_EFFECTS (decl)
6339	  && (!DECL_INITIAL (decl) || TREE_CONSTANT (DECL_INITIAL (decl))))
6340	return false;
6341
6342      /* TREE_PUBLIC might change after the first call, because of the patch
6343	 for PR19238.  */
6344      if (default_binds_local_p_1 (decl, 1)
6345	  || arc_decl_anon_ns_mem_p (decl))
6346	return false;
6347
6348      /* To ensure -mvolatile-cache works
6349	 ld.di does not have a gp-relative variant.  */
6350      if (TREE_THIS_VOLATILE (decl))
6351	return false;
6352    }
6353
6354  /* Disable sdata references to weak variables.  */
6355  if (DECL_WEAK (decl))
6356    return false;
6357
6358  size = int_size_in_bytes (TREE_TYPE (decl));
6359
6360/*   if (AGGREGATE_TYPE_P (TREE_TYPE (decl))) */
6361/*     return false; */
6362
6363  /* Allow only <=4B long data types into sdata.  */
6364  return (size > 0 && size <= 4);
6365}
6366
6367/* Return true if X is a small data address that can be rewritten
6368   as a gp+symref.  */
6369
6370static bool
6371arc_rewrite_small_data_p (const_rtx x)
6372{
6373  if (GET_CODE (x) == CONST)
6374    x = XEXP (x, 0);
6375
6376  if (GET_CODE (x) == PLUS)
6377    {
6378      if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6379	x = XEXP (x, 0);
6380    }
6381
6382  return (GET_CODE (x) ==  SYMBOL_REF
6383	  && SYMBOL_REF_SMALL_P(x));
6384}
6385
6386/* If possible, rewrite OP so that it refers to small data using
6387   explicit relocations.  */
6388
6389rtx
6390arc_rewrite_small_data (rtx op)
6391{
6392  op = copy_insn (op);
6393  subrtx_ptr_iterator::array_type array;
6394  FOR_EACH_SUBRTX_PTR (iter, array, &op, ALL)
6395    {
6396      rtx *loc = *iter;
6397      if (arc_rewrite_small_data_p (*loc))
6398	{
6399	  gcc_assert (SDATA_BASE_REGNUM == PIC_OFFSET_TABLE_REGNUM);
6400	  *loc = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, *loc);
6401	  if (loc != &op)
6402	    {
6403	      if (GET_CODE (op) == MEM && &XEXP (op, 0) == loc)
6404		; /* OK.  */
6405	      else if (GET_CODE (op) == MEM
6406		       && GET_CODE (XEXP (op, 0)) == PLUS
6407		       && GET_CODE (XEXP (XEXP (op, 0), 0)) == MULT)
6408		*loc = force_reg (Pmode, *loc);
6409	      else
6410		gcc_unreachable ();
6411	    }
6412	  iter.skip_subrtxes ();
6413	}
6414      else if (GET_CODE (*loc) == PLUS
6415	       && rtx_equal_p (XEXP (*loc, 0), pic_offset_table_rtx))
6416	iter.skip_subrtxes ();
6417    }
6418  return op;
6419}
6420
6421/* Return true if OP refers to small data symbols directly, not through
6422   a PLUS.  */
6423
6424bool
6425small_data_pattern (rtx op, machine_mode)
6426{
6427  if (GET_CODE (op) == SEQUENCE)
6428    return false;
6429  subrtx_iterator::array_type array;
6430  FOR_EACH_SUBRTX (iter, array, op, ALL)
6431    {
6432      const_rtx x = *iter;
6433      if (GET_CODE (x) == PLUS
6434	  && rtx_equal_p (XEXP (x, 0), pic_offset_table_rtx))
6435	iter.skip_subrtxes ();
6436      else if (arc_rewrite_small_data_p (x))
6437	return true;
6438    }
6439  return false;
6440}
6441
6442/* Return true if OP is an acceptable memory operand for ARCompact
6443   16-bit gp-relative load instructions.
6444   op shd look like : [r26, symref@sda]
6445   i.e. (mem (plus (reg 26) (symref with smalldata flag set))
6446  */
6447/* volatile cache option still to be handled.  */
6448
6449bool
6450compact_sda_memory_operand (rtx op, machine_mode mode)
6451{
6452  rtx addr;
6453  int size;
6454
6455  /* Eliminate non-memory operations.  */
6456  if (GET_CODE (op) != MEM)
6457    return false;
6458
6459  if (mode == VOIDmode)
6460    mode = GET_MODE (op);
6461
6462  size = GET_MODE_SIZE (mode);
6463
6464  /* dword operations really put out 2 instructions, so eliminate them.  */
6465  if (size > UNITS_PER_WORD)
6466    return false;
6467
6468  /* Decode the address now.  */
6469  addr = XEXP (op, 0);
6470
6471  return LEGITIMATE_SMALL_DATA_ADDRESS_P  (addr);
6472}
6473
6474/* Implement ASM_OUTPUT_ALIGNED_DECL_LOCAL.  */
6475
6476void
6477arc_asm_output_aligned_decl_local (FILE * stream, tree decl, const char * name,
6478				   unsigned HOST_WIDE_INT size,
6479				   unsigned HOST_WIDE_INT align,
6480				   unsigned HOST_WIDE_INT globalize_p)
6481{
6482  int in_small_data =   arc_in_small_data_p (decl);
6483
6484  if (in_small_data)
6485    switch_to_section (get_named_section (NULL, ".sbss", 0));
6486  /*    named_section (0,".sbss",0); */
6487  else
6488    switch_to_section (bss_section);
6489
6490  if (globalize_p)
6491    (*targetm.asm_out.globalize_label) (stream, name);
6492
6493  ASM_OUTPUT_ALIGN (stream, floor_log2 ((align) / BITS_PER_UNIT));
6494  ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "object");
6495  ASM_OUTPUT_SIZE_DIRECTIVE (stream, name, size);
6496  ASM_OUTPUT_LABEL (stream, name);
6497
6498  if (size != 0)
6499    ASM_OUTPUT_SKIP (stream, size);
6500}
6501
6502
6503
6504
6505
6506
6507
6508
6509
6510
6511
6512
6513
6514
6515
6516
6517
6518
6519
6520
6521
6522
6523
6524
6525
6526
6527
6528
6529
6530
6531
6532
6533
6534
6535/* SIMD builtins support.  */
6536enum simd_insn_args_type {
6537  Va_Vb_Vc,
6538  Va_Vb_rlimm,
6539  Va_Vb_Ic,
6540  Va_Vb_u6,
6541  Va_Vb_u8,
6542  Va_rlimm_u8,
6543
6544  Va_Vb,
6545
6546  void_rlimm,
6547  void_u6,
6548
6549  Da_u3_rlimm,
6550  Da_rlimm_rlimm,
6551
6552  Va_Ib_u8,
6553  void_Va_Ib_u8,
6554
6555  Va_Vb_Ic_u8,
6556  void_Va_u3_Ib_u8
6557};
6558
6559struct builtin_description
6560{
6561  enum simd_insn_args_type args_type;
6562  const enum insn_code     icode;
6563  const char * const       name;
6564  const enum arc_builtins  code;
6565};
6566
6567static const struct builtin_description arc_simd_builtin_desc_list[] =
6568{
6569  /* VVV builtins go first.  */
6570#define SIMD_BUILTIN(type, code, string, builtin) \
6571  { type,CODE_FOR_##code, "__builtin_arc_" string, \
6572    ARC_SIMD_BUILTIN_##builtin },
6573
6574  SIMD_BUILTIN (Va_Vb_Vc,    vaddaw_insn,   "vaddaw",     VADDAW)
6575  SIMD_BUILTIN (Va_Vb_Vc,     vaddw_insn,    "vaddw",      VADDW)
6576  SIMD_BUILTIN (Va_Vb_Vc,      vavb_insn,     "vavb",       VAVB)
6577  SIMD_BUILTIN (Va_Vb_Vc,     vavrb_insn,    "vavrb",      VAVRB)
6578  SIMD_BUILTIN (Va_Vb_Vc,    vdifaw_insn,   "vdifaw",     VDIFAW)
6579  SIMD_BUILTIN (Va_Vb_Vc,     vdifw_insn,    "vdifw",      VDIFW)
6580  SIMD_BUILTIN (Va_Vb_Vc,    vmaxaw_insn,   "vmaxaw",     VMAXAW)
6581  SIMD_BUILTIN (Va_Vb_Vc,     vmaxw_insn,    "vmaxw",      VMAXW)
6582  SIMD_BUILTIN (Va_Vb_Vc,    vminaw_insn,   "vminaw",     VMINAW)
6583  SIMD_BUILTIN (Va_Vb_Vc,     vminw_insn,    "vminw",      VMINW)
6584  SIMD_BUILTIN (Va_Vb_Vc,    vmulaw_insn,   "vmulaw",     VMULAW)
6585  SIMD_BUILTIN (Va_Vb_Vc,   vmulfaw_insn,  "vmulfaw",    VMULFAW)
6586  SIMD_BUILTIN (Va_Vb_Vc,    vmulfw_insn,   "vmulfw",     VMULFW)
6587  SIMD_BUILTIN (Va_Vb_Vc,     vmulw_insn,    "vmulw",      VMULW)
6588  SIMD_BUILTIN (Va_Vb_Vc,    vsubaw_insn,   "vsubaw",     VSUBAW)
6589  SIMD_BUILTIN (Va_Vb_Vc,     vsubw_insn,    "vsubw",      VSUBW)
6590  SIMD_BUILTIN (Va_Vb_Vc,    vsummw_insn,   "vsummw",     VSUMMW)
6591  SIMD_BUILTIN (Va_Vb_Vc,      vand_insn,     "vand",       VAND)
6592  SIMD_BUILTIN (Va_Vb_Vc,    vandaw_insn,   "vandaw",     VANDAW)
6593  SIMD_BUILTIN (Va_Vb_Vc,      vbic_insn,     "vbic",       VBIC)
6594  SIMD_BUILTIN (Va_Vb_Vc,    vbicaw_insn,   "vbicaw",     VBICAW)
6595  SIMD_BUILTIN (Va_Vb_Vc,       vor_insn,      "vor",        VOR)
6596  SIMD_BUILTIN (Va_Vb_Vc,      vxor_insn,     "vxor",       VXOR)
6597  SIMD_BUILTIN (Va_Vb_Vc,    vxoraw_insn,   "vxoraw",     VXORAW)
6598  SIMD_BUILTIN (Va_Vb_Vc,      veqw_insn,     "veqw",       VEQW)
6599  SIMD_BUILTIN (Va_Vb_Vc,      vlew_insn,     "vlew",       VLEW)
6600  SIMD_BUILTIN (Va_Vb_Vc,      vltw_insn,     "vltw",       VLTW)
6601  SIMD_BUILTIN (Va_Vb_Vc,      vnew_insn,     "vnew",       VNEW)
6602  SIMD_BUILTIN (Va_Vb_Vc,    vmr1aw_insn,   "vmr1aw",     VMR1AW)
6603  SIMD_BUILTIN (Va_Vb_Vc,     vmr1w_insn,    "vmr1w",      VMR1W)
6604  SIMD_BUILTIN (Va_Vb_Vc,    vmr2aw_insn,   "vmr2aw",     VMR2AW)
6605  SIMD_BUILTIN (Va_Vb_Vc,     vmr2w_insn,    "vmr2w",      VMR2W)
6606  SIMD_BUILTIN (Va_Vb_Vc,    vmr3aw_insn,   "vmr3aw",     VMR3AW)
6607  SIMD_BUILTIN (Va_Vb_Vc,     vmr3w_insn,    "vmr3w",      VMR3W)
6608  SIMD_BUILTIN (Va_Vb_Vc,    vmr4aw_insn,   "vmr4aw",     VMR4AW)
6609  SIMD_BUILTIN (Va_Vb_Vc,     vmr4w_insn,    "vmr4w",      VMR4W)
6610  SIMD_BUILTIN (Va_Vb_Vc,    vmr5aw_insn,   "vmr5aw",     VMR5AW)
6611  SIMD_BUILTIN (Va_Vb_Vc,     vmr5w_insn,    "vmr5w",      VMR5W)
6612  SIMD_BUILTIN (Va_Vb_Vc,    vmr6aw_insn,   "vmr6aw",     VMR6AW)
6613  SIMD_BUILTIN (Va_Vb_Vc,     vmr6w_insn,    "vmr6w",      VMR6W)
6614  SIMD_BUILTIN (Va_Vb_Vc,    vmr7aw_insn,   "vmr7aw",     VMR7AW)
6615  SIMD_BUILTIN (Va_Vb_Vc,     vmr7w_insn,    "vmr7w",      VMR7W)
6616  SIMD_BUILTIN (Va_Vb_Vc,      vmrb_insn,     "vmrb",       VMRB)
6617  SIMD_BUILTIN (Va_Vb_Vc,    vh264f_insn,   "vh264f",     VH264F)
6618  SIMD_BUILTIN (Va_Vb_Vc,   vh264ft_insn,  "vh264ft",    VH264FT)
6619  SIMD_BUILTIN (Va_Vb_Vc,   vh264fw_insn,  "vh264fw",    VH264FW)
6620  SIMD_BUILTIN (Va_Vb_Vc,     vvc1f_insn,    "vvc1f",      VVC1F)
6621  SIMD_BUILTIN (Va_Vb_Vc,    vvc1ft_insn,   "vvc1ft",     VVC1FT)
6622
6623  SIMD_BUILTIN (Va_Vb_rlimm,    vbaddw_insn,   "vbaddw",     VBADDW)
6624  SIMD_BUILTIN (Va_Vb_rlimm,    vbmaxw_insn,   "vbmaxw",     VBMAXW)
6625  SIMD_BUILTIN (Va_Vb_rlimm,    vbminw_insn,   "vbminw",     VBMINW)
6626  SIMD_BUILTIN (Va_Vb_rlimm,   vbmulaw_insn,  "vbmulaw",    VBMULAW)
6627  SIMD_BUILTIN (Va_Vb_rlimm,   vbmulfw_insn,  "vbmulfw",    VBMULFW)
6628  SIMD_BUILTIN (Va_Vb_rlimm,    vbmulw_insn,   "vbmulw",     VBMULW)
6629  SIMD_BUILTIN (Va_Vb_rlimm,   vbrsubw_insn,  "vbrsubw",    VBRSUBW)
6630  SIMD_BUILTIN (Va_Vb_rlimm,    vbsubw_insn,   "vbsubw",     VBSUBW)
6631
6632  /* Va, Vb, Ic instructions.  */
6633  SIMD_BUILTIN (Va_Vb_Ic,        vasrw_insn,    "vasrw",      VASRW)
6634  SIMD_BUILTIN (Va_Vb_Ic,         vsr8_insn,     "vsr8",       VSR8)
6635  SIMD_BUILTIN (Va_Vb_Ic,       vsr8aw_insn,   "vsr8aw",     VSR8AW)
6636
6637  /* Va, Vb, u6 instructions.  */
6638  SIMD_BUILTIN (Va_Vb_u6,      vasrrwi_insn,  "vasrrwi",    VASRRWi)
6639  SIMD_BUILTIN (Va_Vb_u6,     vasrsrwi_insn, "vasrsrwi",   VASRSRWi)
6640  SIMD_BUILTIN (Va_Vb_u6,       vasrwi_insn,   "vasrwi",     VASRWi)
6641  SIMD_BUILTIN (Va_Vb_u6,     vasrpwbi_insn, "vasrpwbi",   VASRPWBi)
6642  SIMD_BUILTIN (Va_Vb_u6,    vasrrpwbi_insn,"vasrrpwbi",  VASRRPWBi)
6643  SIMD_BUILTIN (Va_Vb_u6,      vsr8awi_insn,  "vsr8awi",    VSR8AWi)
6644  SIMD_BUILTIN (Va_Vb_u6,        vsr8i_insn,    "vsr8i",      VSR8i)
6645
6646  /* Va, Vb, u8 (simm) instructions.  */
6647  SIMD_BUILTIN (Va_Vb_u8,        vmvaw_insn,    "vmvaw",      VMVAW)
6648  SIMD_BUILTIN (Va_Vb_u8,         vmvw_insn,     "vmvw",       VMVW)
6649  SIMD_BUILTIN (Va_Vb_u8,        vmvzw_insn,    "vmvzw",      VMVZW)
6650  SIMD_BUILTIN (Va_Vb_u8,      vd6tapf_insn,  "vd6tapf",    VD6TAPF)
6651
6652  /* Va, rlimm, u8 (simm) instructions.  */
6653  SIMD_BUILTIN (Va_rlimm_u8,    vmovaw_insn,   "vmovaw",     VMOVAW)
6654  SIMD_BUILTIN (Va_rlimm_u8,     vmovw_insn,    "vmovw",      VMOVW)
6655  SIMD_BUILTIN (Va_rlimm_u8,    vmovzw_insn,   "vmovzw",     VMOVZW)
6656
6657  /* Va, Vb instructions.  */
6658  SIMD_BUILTIN (Va_Vb,          vabsaw_insn,   "vabsaw",     VABSAW)
6659  SIMD_BUILTIN (Va_Vb,           vabsw_insn,    "vabsw",      VABSW)
6660  SIMD_BUILTIN (Va_Vb,         vaddsuw_insn,  "vaddsuw",    VADDSUW)
6661  SIMD_BUILTIN (Va_Vb,          vsignw_insn,   "vsignw",     VSIGNW)
6662  SIMD_BUILTIN (Va_Vb,          vexch1_insn,   "vexch1",     VEXCH1)
6663  SIMD_BUILTIN (Va_Vb,          vexch2_insn,   "vexch2",     VEXCH2)
6664  SIMD_BUILTIN (Va_Vb,          vexch4_insn,   "vexch4",     VEXCH4)
6665  SIMD_BUILTIN (Va_Vb,          vupbaw_insn,   "vupbaw",     VUPBAW)
6666  SIMD_BUILTIN (Va_Vb,           vupbw_insn,    "vupbw",      VUPBW)
6667  SIMD_BUILTIN (Va_Vb,         vupsbaw_insn,  "vupsbaw",    VUPSBAW)
6668  SIMD_BUILTIN (Va_Vb,          vupsbw_insn,   "vupsbw",     VUPSBW)
6669
6670  /* DIb, rlimm, rlimm instructions.  */
6671  SIMD_BUILTIN (Da_rlimm_rlimm,  vdirun_insn,  "vdirun",     VDIRUN)
6672  SIMD_BUILTIN (Da_rlimm_rlimm,  vdorun_insn,  "vdorun",     VDORUN)
6673
6674  /* DIb, limm, rlimm instructions.  */
6675  SIMD_BUILTIN (Da_u3_rlimm,   vdiwr_insn,    "vdiwr",      VDIWR)
6676  SIMD_BUILTIN (Da_u3_rlimm,    vdowr_insn,    "vdowr",     VDOWR)
6677
6678  /* rlimm instructions.  */
6679  SIMD_BUILTIN (void_rlimm,        vrec_insn,     "vrec",      VREC)
6680  SIMD_BUILTIN (void_rlimm,        vrun_insn,     "vrun",      VRUN)
6681  SIMD_BUILTIN (void_rlimm,     vrecrun_insn,  "vrecrun",   VRECRUN)
6682  SIMD_BUILTIN (void_rlimm,     vendrec_insn,  "vendrec",   VENDREC)
6683
6684  /* Va, [Ib,u8] instructions.  */
6685  SIMD_BUILTIN (Va_Vb_Ic_u8,       vld32wh_insn,  "vld32wh",   VLD32WH)
6686  SIMD_BUILTIN (Va_Vb_Ic_u8,       vld32wl_insn,  "vld32wl",   VLD32WL)
6687  SIMD_BUILTIN (Va_Vb_Ic_u8,         vld64_insn,    "vld64",     VLD64)
6688  SIMD_BUILTIN (Va_Vb_Ic_u8,         vld32_insn,    "vld32",     VLD32)
6689
6690  SIMD_BUILTIN (Va_Ib_u8,           vld64w_insn,   "vld64w",   VLD64W)
6691  SIMD_BUILTIN (Va_Ib_u8,           vld128_insn,   "vld128",   VLD128)
6692  SIMD_BUILTIN (void_Va_Ib_u8,      vst128_insn,   "vst128",   VST128)
6693  SIMD_BUILTIN (void_Va_Ib_u8,       vst64_insn,    "vst64",    VST64)
6694
6695  /* Va, [Ib, u8] instructions.  */
6696  SIMD_BUILTIN (void_Va_u3_Ib_u8,  vst16_n_insn,  "vst16_n",   VST16_N)
6697  SIMD_BUILTIN (void_Va_u3_Ib_u8,  vst32_n_insn,  "vst32_n",   VST32_N)
6698
6699  SIMD_BUILTIN (void_u6,  vinti_insn,  "vinti",   VINTI)
6700};
6701
6702static void
6703arc_init_simd_builtins (void)
6704{
6705  int i;
6706  tree endlink = void_list_node;
6707  tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
6708
6709  tree v8hi_ftype_v8hi_v8hi
6710    = build_function_type (V8HI_type_node,
6711			   tree_cons (NULL_TREE, V8HI_type_node,
6712				      tree_cons (NULL_TREE, V8HI_type_node,
6713						 endlink)));
6714  tree v8hi_ftype_v8hi_int
6715    = build_function_type (V8HI_type_node,
6716			   tree_cons (NULL_TREE, V8HI_type_node,
6717				      tree_cons (NULL_TREE, integer_type_node,
6718						 endlink)));
6719
6720  tree v8hi_ftype_v8hi_int_int
6721    = build_function_type (V8HI_type_node,
6722			   tree_cons (NULL_TREE, V8HI_type_node,
6723				      tree_cons (NULL_TREE, integer_type_node,
6724						 tree_cons (NULL_TREE,
6725							    integer_type_node,
6726							    endlink))));
6727
6728  tree void_ftype_v8hi_int_int
6729    = build_function_type (void_type_node,
6730			   tree_cons (NULL_TREE, V8HI_type_node,
6731				      tree_cons (NULL_TREE, integer_type_node,
6732						 tree_cons (NULL_TREE,
6733							    integer_type_node,
6734							    endlink))));
6735
6736  tree void_ftype_v8hi_int_int_int
6737    = (build_function_type
6738	(void_type_node,
6739	 tree_cons (NULL_TREE, V8HI_type_node,
6740		    tree_cons (NULL_TREE, integer_type_node,
6741			       tree_cons (NULL_TREE, integer_type_node,
6742					  tree_cons (NULL_TREE,
6743						     integer_type_node,
6744						     endlink))))));
6745
6746  tree v8hi_ftype_int_int
6747    = build_function_type (V8HI_type_node,
6748			   tree_cons (NULL_TREE, integer_type_node,
6749				      tree_cons (NULL_TREE, integer_type_node,
6750						 endlink)));
6751
6752  tree void_ftype_int_int
6753    = build_function_type (void_type_node,
6754			   tree_cons (NULL_TREE, integer_type_node,
6755				      tree_cons (NULL_TREE, integer_type_node,
6756						 endlink)));
6757
6758  tree void_ftype_int
6759    = build_function_type (void_type_node,
6760			   tree_cons (NULL_TREE, integer_type_node, endlink));
6761
6762  tree v8hi_ftype_v8hi
6763    = build_function_type (V8HI_type_node, tree_cons (NULL_TREE, V8HI_type_node,
6764						      endlink));
6765
6766  /* These asserts have been introduced to ensure that the order of builtins
6767     does not get messed up, else the initialization goes wrong.  */
6768  gcc_assert (arc_simd_builtin_desc_list [0].args_type == Va_Vb_Vc);
6769  for (i=0; arc_simd_builtin_desc_list [i].args_type == Va_Vb_Vc; i++)
6770    def_mbuiltin (TARGET_SIMD_SET, arc_simd_builtin_desc_list[i].name,
6771		  v8hi_ftype_v8hi_v8hi, arc_simd_builtin_desc_list[i].code);
6772
6773  gcc_assert (arc_simd_builtin_desc_list [i].args_type == Va_Vb_rlimm);
6774  for (; arc_simd_builtin_desc_list [i].args_type == Va_Vb_rlimm; i++)
6775    def_mbuiltin (TARGET_SIMD_SET, arc_simd_builtin_desc_list[i].name,
6776		  v8hi_ftype_v8hi_int, arc_simd_builtin_desc_list[i].code);
6777
6778  gcc_assert (arc_simd_builtin_desc_list [i].args_type == Va_Vb_Ic);
6779  for (; arc_simd_builtin_desc_list [i].args_type == Va_Vb_Ic; i++)
6780    def_mbuiltin (TARGET_SIMD_SET, arc_simd_builtin_desc_list[i].name,
6781		  v8hi_ftype_v8hi_int, arc_simd_builtin_desc_list[i].code);
6782
6783  gcc_assert (arc_simd_builtin_desc_list [i].args_type == Va_Vb_u6);
6784  for (; arc_simd_builtin_desc_list [i].args_type == Va_Vb_u6; i++)
6785    def_mbuiltin (TARGET_SIMD_SET, arc_simd_builtin_desc_list[i].name,
6786		  v8hi_ftype_v8hi_int, arc_simd_builtin_desc_list[i].code);
6787
6788  gcc_assert (arc_simd_builtin_desc_list [i].args_type == Va_Vb_u8);
6789  for (; arc_simd_builtin_desc_list [i].args_type == Va_Vb_u8; i++)
6790    def_mbuiltin (TARGET_SIMD_SET, arc_simd_builtin_desc_list[i].name,
6791		  v8hi_ftype_v8hi_int, arc_simd_builtin_desc_list[i].code);
6792
6793  gcc_assert (arc_simd_builtin_desc_list [i].args_type == Va_rlimm_u8);
6794  for (; arc_simd_builtin_desc_list [i].args_type == Va_rlimm_u8; i++)
6795    def_mbuiltin (TARGET_SIMD_SET, arc_simd_builtin_desc_list[i].name,
6796		  v8hi_ftype_int_int, arc_simd_builtin_desc_list[i].code);
6797
6798  gcc_assert (arc_simd_builtin_desc_list [i].args_type == Va_Vb);
6799  for (; arc_simd_builtin_desc_list [i].args_type == Va_Vb; i++)
6800    def_mbuiltin (TARGET_SIMD_SET, arc_simd_builtin_desc_list[i].name,
6801		  v8hi_ftype_v8hi, arc_simd_builtin_desc_list[i].code);
6802
6803  gcc_assert (arc_simd_builtin_desc_list [i].args_type == Da_rlimm_rlimm);
6804  for (; arc_simd_builtin_desc_list [i].args_type == Da_rlimm_rlimm; i++)
6805    def_mbuiltin (TARGET_SIMD_SET, arc_simd_builtin_desc_list [i].name,
6806		  void_ftype_int_int, arc_simd_builtin_desc_list[i].code);
6807
6808  gcc_assert (arc_simd_builtin_desc_list [i].args_type == Da_u3_rlimm);
6809  for (; arc_simd_builtin_desc_list [i].args_type == Da_u3_rlimm; i++)
6810    def_mbuiltin (TARGET_SIMD_SET, arc_simd_builtin_desc_list[i].name,
6811		  void_ftype_int_int, arc_simd_builtin_desc_list[i].code);
6812
6813  gcc_assert (arc_simd_builtin_desc_list [i].args_type == void_rlimm);
6814  for (; arc_simd_builtin_desc_list [i].args_type == void_rlimm; i++)
6815    def_mbuiltin (TARGET_SIMD_SET, arc_simd_builtin_desc_list[i].name,
6816		  void_ftype_int, arc_simd_builtin_desc_list[i].code);
6817
6818  gcc_assert (arc_simd_builtin_desc_list [i].args_type == Va_Vb_Ic_u8);
6819  for (; arc_simd_builtin_desc_list [i].args_type == Va_Vb_Ic_u8; i++)
6820    def_mbuiltin (TARGET_SIMD_SET, arc_simd_builtin_desc_list[i].name,
6821		  v8hi_ftype_v8hi_int_int, arc_simd_builtin_desc_list[i].code);
6822
6823  gcc_assert (arc_simd_builtin_desc_list [i].args_type == Va_Ib_u8);
6824  for (; arc_simd_builtin_desc_list [i].args_type == Va_Ib_u8; i++)
6825    def_mbuiltin (TARGET_SIMD_SET, arc_simd_builtin_desc_list[i].name,
6826		  v8hi_ftype_int_int, arc_simd_builtin_desc_list[i].code);
6827
6828  gcc_assert (arc_simd_builtin_desc_list [i].args_type == void_Va_Ib_u8);
6829  for (; arc_simd_builtin_desc_list [i].args_type == void_Va_Ib_u8; i++)
6830    def_mbuiltin (TARGET_SIMD_SET, arc_simd_builtin_desc_list [i].name,
6831		  void_ftype_v8hi_int_int, arc_simd_builtin_desc_list[i].code);
6832
6833  gcc_assert (arc_simd_builtin_desc_list [i].args_type == void_Va_u3_Ib_u8);
6834  for (; arc_simd_builtin_desc_list [i].args_type == void_Va_u3_Ib_u8; i++)
6835    def_mbuiltin (TARGET_SIMD_SET, arc_simd_builtin_desc_list[i].name,
6836		  void_ftype_v8hi_int_int_int,
6837		  arc_simd_builtin_desc_list[i].code);
6838
6839  gcc_assert (arc_simd_builtin_desc_list [i].args_type == void_u6);
6840  for (; arc_simd_builtin_desc_list [i].args_type == void_u6; i++)
6841    def_mbuiltin (TARGET_SIMD_SET, arc_simd_builtin_desc_list[i].name,
6842		  void_ftype_int, arc_simd_builtin_desc_list[i].code);
6843
6844  gcc_assert(i == ARRAY_SIZE (arc_simd_builtin_desc_list));
6845}
6846
6847/* Helper function of arc_expand_builtin; has the same parameters,
6848   except that EXP is now known to be a call to a simd builtin.  */
6849
6850static rtx
6851arc_expand_simd_builtin (tree exp,
6852			 rtx target,
6853			 rtx subtarget ATTRIBUTE_UNUSED,
6854			 machine_mode mode ATTRIBUTE_UNUSED,
6855			 int ignore ATTRIBUTE_UNUSED)
6856{
6857  tree              fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
6858  tree              arg0;
6859  tree              arg1;
6860  tree              arg2;
6861  tree              arg3;
6862  rtx               op0;
6863  rtx               op1;
6864  rtx               op2;
6865  rtx               op3;
6866  rtx               op4;
6867  rtx pat;
6868  unsigned int         i;
6869  int               fcode = DECL_FUNCTION_CODE (fndecl);
6870  int               icode;
6871  machine_mode mode0;
6872  machine_mode mode1;
6873  machine_mode mode2;
6874  machine_mode mode3;
6875  machine_mode mode4;
6876  const struct builtin_description * d;
6877
6878  for (i = 0, d = arc_simd_builtin_desc_list;
6879       i < ARRAY_SIZE (arc_simd_builtin_desc_list); i++, d++)
6880    if (d->code == (const enum arc_builtins) fcode)
6881      break;
6882
6883  /* We must get an entry here.  */
6884  gcc_assert (i < ARRAY_SIZE (arc_simd_builtin_desc_list));
6885
6886  switch (d->args_type)
6887    {
6888    case Va_Vb_rlimm:
6889      icode = d->icode;
6890      arg0 = CALL_EXPR_ARG (exp, 0);
6891      arg1 = CALL_EXPR_ARG (exp, 1);
6892      op0 = expand_expr (arg0, NULL_RTX, V8HImode, EXPAND_NORMAL);
6893      op1 = expand_expr (arg1, NULL_RTX, SImode, EXPAND_NORMAL);
6894
6895      target = gen_reg_rtx (V8HImode);
6896      mode0 =  insn_data[icode].operand[1].mode;
6897      mode1 =  insn_data[icode].operand[2].mode;
6898
6899      if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
6900	op0 = copy_to_mode_reg (mode0, op0);
6901
6902      if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
6903	  op1 = copy_to_mode_reg (mode1, op1);
6904
6905      pat = GEN_FCN (icode) (target, op0, op1);
6906      if (! pat)
6907	return 0;
6908
6909      emit_insn (pat);
6910      return target;
6911
6912    case Va_Vb_u6:
6913    case Va_Vb_u8:
6914      icode = d->icode;
6915      arg0 = CALL_EXPR_ARG (exp, 0);
6916      arg1 = CALL_EXPR_ARG (exp, 1);
6917      op0 = expand_expr (arg0, NULL_RTX, V8HImode, EXPAND_NORMAL);
6918      op1 = expand_expr (arg1, NULL_RTX, SImode, EXPAND_NORMAL);
6919
6920      target = gen_reg_rtx (V8HImode);
6921      mode0 =  insn_data[icode].operand[1].mode;
6922      mode1 =  insn_data[icode].operand[2].mode;
6923
6924      if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
6925	op0 = copy_to_mode_reg (mode0, op0);
6926
6927      if (! (*insn_data[icode].operand[2].predicate) (op1, mode1)
6928	  ||  (d->args_type == Va_Vb_u6 && !UNSIGNED_INT6 (INTVAL (op1)))
6929	  ||  (d->args_type == Va_Vb_u8 && !UNSIGNED_INT8 (INTVAL (op1))))
6930	error ("operand 2 of %s instruction should be an unsigned %d-bit value",
6931	       d->name,
6932	       (d->args_type == Va_Vb_u6)? 6: 8);
6933
6934      pat = GEN_FCN (icode) (target, op0, op1);
6935      if (! pat)
6936	return 0;
6937
6938      emit_insn (pat);
6939      return target;
6940
6941    case Va_rlimm_u8:
6942      icode = d->icode;
6943      arg0 = CALL_EXPR_ARG (exp, 0);
6944      arg1 = CALL_EXPR_ARG (exp, 1);
6945      op0 = expand_expr (arg0, NULL_RTX, SImode, EXPAND_NORMAL);
6946      op1 = expand_expr (arg1, NULL_RTX, SImode, EXPAND_NORMAL);
6947
6948      target = gen_reg_rtx (V8HImode);
6949      mode0 =  insn_data[icode].operand[1].mode;
6950      mode1 =  insn_data[icode].operand[2].mode;
6951
6952      if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
6953	op0 = copy_to_mode_reg (mode0, op0);
6954
6955      if ( (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
6956	   || !(UNSIGNED_INT8 (INTVAL (op1))))
6957	error ("operand 2 of %s instruction should be an unsigned 8-bit value",
6958	       d->name);
6959
6960      pat = GEN_FCN (icode) (target, op0, op1);
6961      if (! pat)
6962	return 0;
6963
6964      emit_insn (pat);
6965      return target;
6966
6967    case Va_Vb_Ic:
6968      icode = d->icode;
6969      arg0 = CALL_EXPR_ARG (exp, 0);
6970      arg1 = CALL_EXPR_ARG (exp, 1);
6971      op0 = expand_expr (arg0, NULL_RTX, V8HImode, EXPAND_NORMAL);
6972      op1 = expand_expr (arg1, NULL_RTX, SImode, EXPAND_NORMAL);
6973      op2 = gen_rtx_REG (V8HImode, ARC_FIRST_SIMD_VR_REG);
6974
6975      target = gen_reg_rtx (V8HImode);
6976      mode0 =  insn_data[icode].operand[1].mode;
6977      mode1 =  insn_data[icode].operand[2].mode;
6978
6979      if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
6980	op0 = copy_to_mode_reg (mode0, op0);
6981
6982      if ( (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
6983	   || !(UNSIGNED_INT3 (INTVAL (op1))))
6984	error ("operand 2 of %s instruction should be an unsigned 3-bit value (I0-I7)",
6985	       d->name);
6986
6987      pat = GEN_FCN (icode) (target, op0, op1, op2);
6988      if (! pat)
6989	return 0;
6990
6991      emit_insn (pat);
6992      return target;
6993
6994    case Va_Vb_Vc:
6995      icode = d->icode;
6996      arg0 = CALL_EXPR_ARG (exp, 0);
6997      arg1 = CALL_EXPR_ARG (exp, 1);
6998      op0 = expand_expr (arg0, NULL_RTX, V8HImode, EXPAND_NORMAL);
6999      op1 = expand_expr (arg1, NULL_RTX, V8HImode, EXPAND_NORMAL);
7000
7001      target = gen_reg_rtx (V8HImode);
7002      mode0 =  insn_data[icode].operand[1].mode;
7003      mode1 =  insn_data[icode].operand[2].mode;
7004
7005      if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
7006	op0 = copy_to_mode_reg (mode0, op0);
7007
7008      if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
7009	op1 = copy_to_mode_reg (mode1, op1);
7010
7011      pat = GEN_FCN (icode) (target, op0, op1);
7012      if (! pat)
7013	return 0;
7014
7015      emit_insn (pat);
7016      return target;
7017
7018    case Va_Vb:
7019      icode = d->icode;
7020      arg0 = CALL_EXPR_ARG (exp, 0);
7021      op0 = expand_expr (arg0, NULL_RTX, V8HImode, EXPAND_NORMAL);
7022
7023      target = gen_reg_rtx (V8HImode);
7024      mode0 =  insn_data[icode].operand[1].mode;
7025
7026      if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
7027	op0 = copy_to_mode_reg (mode0, op0);
7028
7029      pat = GEN_FCN (icode) (target, op0);
7030      if (! pat)
7031	return 0;
7032
7033      emit_insn (pat);
7034      return target;
7035
7036    case Da_rlimm_rlimm:
7037      icode = d->icode;
7038      arg0 = CALL_EXPR_ARG (exp, 0);
7039      arg1 = CALL_EXPR_ARG (exp, 1);
7040      op0 = expand_expr (arg0, NULL_RTX, SImode, EXPAND_NORMAL);
7041      op1 = expand_expr (arg1, NULL_RTX, SImode, EXPAND_NORMAL);
7042
7043
7044      if (icode == CODE_FOR_vdirun_insn)
7045	target = gen_rtx_REG (SImode, 131);
7046      else if (icode == CODE_FOR_vdorun_insn)
7047	target = gen_rtx_REG (SImode, 139);
7048      else
7049	  gcc_unreachable ();
7050
7051      mode0 =  insn_data[icode].operand[1].mode;
7052      mode1 =  insn_data[icode].operand[2].mode;
7053
7054      if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
7055	op0 = copy_to_mode_reg (mode0, op0);
7056
7057      if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
7058	op1 = copy_to_mode_reg (mode1, op1);
7059
7060
7061      pat = GEN_FCN (icode) (target, op0, op1);
7062      if (! pat)
7063	return 0;
7064
7065      emit_insn (pat);
7066      return NULL_RTX;
7067
7068    case Da_u3_rlimm:
7069      icode = d->icode;
7070      arg0 = CALL_EXPR_ARG (exp, 0);
7071      arg1 = CALL_EXPR_ARG (exp, 1);
7072      op0 = expand_expr (arg0, NULL_RTX, SImode, EXPAND_NORMAL);
7073      op1 = expand_expr (arg1, NULL_RTX, SImode, EXPAND_NORMAL);
7074
7075
7076      if (! (GET_CODE (op0) == CONST_INT)
7077	  || !(UNSIGNED_INT3 (INTVAL (op0))))
7078	error ("operand 1 of %s instruction should be an unsigned 3-bit value (DR0-DR7)",
7079	       d->name);
7080
7081      mode1 =  insn_data[icode].operand[1].mode;
7082
7083      if (icode == CODE_FOR_vdiwr_insn)
7084	target = gen_rtx_REG (SImode,
7085			      ARC_FIRST_SIMD_DMA_CONFIG_IN_REG + INTVAL (op0));
7086      else if (icode == CODE_FOR_vdowr_insn)
7087	target = gen_rtx_REG (SImode,
7088			      ARC_FIRST_SIMD_DMA_CONFIG_OUT_REG + INTVAL (op0));
7089      else
7090	gcc_unreachable ();
7091
7092      if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
7093	op1 = copy_to_mode_reg (mode1, op1);
7094
7095      pat = GEN_FCN (icode) (target, op1);
7096      if (! pat)
7097	return 0;
7098
7099      emit_insn (pat);
7100      return NULL_RTX;
7101
7102    case void_u6:
7103      icode = d->icode;
7104      arg0 = CALL_EXPR_ARG (exp, 0);
7105
7106      fold (arg0);
7107
7108      op0 = expand_expr (arg0, NULL_RTX, SImode, EXPAND_NORMAL);
7109      mode0 = insn_data[icode].operand[0].mode;
7110
7111      /* op0 should be u6.  */
7112      if (! (*insn_data[icode].operand[0].predicate) (op0, mode0)
7113	  || !(UNSIGNED_INT6 (INTVAL (op0))))
7114	error ("operand of %s instruction should be an unsigned 6-bit value",
7115	       d->name);
7116
7117      pat = GEN_FCN (icode) (op0);
7118      if (! pat)
7119	return 0;
7120
7121      emit_insn (pat);
7122      return NULL_RTX;
7123
7124    case void_rlimm:
7125      icode = d->icode;
7126      arg0 = CALL_EXPR_ARG (exp, 0);
7127
7128      fold (arg0);
7129
7130      op0 = expand_expr (arg0, NULL_RTX, SImode, EXPAND_NORMAL);
7131      mode0 = insn_data[icode].operand[0].mode;
7132
7133      if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
7134	op0 = copy_to_mode_reg (mode0, op0);
7135
7136      pat = GEN_FCN (icode) (op0);
7137      if (! pat)
7138	return 0;
7139
7140      emit_insn (pat);
7141      return NULL_RTX;
7142
7143    case Va_Vb_Ic_u8:
7144      {
7145	rtx src_vreg;
7146	icode = d->icode;
7147	arg0 = CALL_EXPR_ARG (exp, 0); /* source vreg */
7148	arg1 = CALL_EXPR_ARG (exp, 1); /* [I]0-7 */
7149	arg2 = CALL_EXPR_ARG (exp, 2); /* u8 */
7150
7151	src_vreg = expand_expr (arg0, NULL_RTX, V8HImode, EXPAND_NORMAL);
7152	op0 = expand_expr (arg1, NULL_RTX, SImode, EXPAND_NORMAL);  /* [I]0-7 */
7153	op1 = expand_expr (arg2, NULL_RTX, SImode, EXPAND_NORMAL);  /* u8 */
7154	op2 = gen_rtx_REG (V8HImode, ARC_FIRST_SIMD_VR_REG);	    /* VR0 */
7155
7156	/* target <- src vreg */
7157	emit_insn (gen_move_insn (target, src_vreg));
7158
7159	/* target <- vec_concat: target, mem(Ib, u8) */
7160	mode0 =  insn_data[icode].operand[3].mode;
7161	mode1 =  insn_data[icode].operand[1].mode;
7162
7163	if ( (!(*insn_data[icode].operand[3].predicate) (op0, mode0))
7164	     || !(UNSIGNED_INT3 (INTVAL (op0))))
7165	  error ("operand 1 of %s instruction should be an unsigned 3-bit value (I0-I7)",
7166		 d->name);
7167
7168	if ( (!(*insn_data[icode].operand[1].predicate) (op1, mode1))
7169	     || !(UNSIGNED_INT8 (INTVAL (op1))))
7170	  error ("operand 2 of %s instruction should be an unsigned 8-bit value",
7171		 d->name);
7172
7173	pat = GEN_FCN (icode) (target, op1, op2, op0);
7174	if (! pat)
7175	  return 0;
7176
7177	emit_insn (pat);
7178	return target;
7179      }
7180
7181    case void_Va_Ib_u8:
7182      icode = d->icode;
7183      arg0 = CALL_EXPR_ARG (exp, 0); /* src vreg */
7184      arg1 = CALL_EXPR_ARG (exp, 1); /* [I]0-7 */
7185      arg2 = CALL_EXPR_ARG (exp, 2); /* u8 */
7186
7187      op0 = gen_rtx_REG (V8HImode, ARC_FIRST_SIMD_VR_REG);         /* VR0    */
7188      op1 = expand_expr (arg1, NULL_RTX, SImode, EXPAND_NORMAL);   /* I[0-7] */
7189      op2 = expand_expr (arg2, NULL_RTX, SImode, EXPAND_NORMAL);   /* u8     */
7190      op3 = expand_expr (arg0, NULL_RTX, V8HImode, EXPAND_NORMAL); /* Vdest  */
7191
7192      mode0 =  insn_data[icode].operand[0].mode;
7193      mode1 =  insn_data[icode].operand[1].mode;
7194      mode2 =  insn_data[icode].operand[2].mode;
7195      mode3 =  insn_data[icode].operand[3].mode;
7196
7197      if ( (!(*insn_data[icode].operand[1].predicate) (op1, mode1))
7198	   || !(UNSIGNED_INT3 (INTVAL (op1))))
7199	error ("operand 2 of %s instruction should be an unsigned 3-bit value (I0-I7)",
7200	       d->name);
7201
7202      if ( (!(*insn_data[icode].operand[2].predicate) (op2, mode2))
7203	   || !(UNSIGNED_INT8 (INTVAL (op2))))
7204	error ("operand 3 of %s instruction should be an unsigned 8-bit value",
7205	       d->name);
7206
7207      if (!(*insn_data[icode].operand[3].predicate) (op3, mode3))
7208	op3 = copy_to_mode_reg (mode3, op3);
7209
7210      pat = GEN_FCN (icode) (op0, op1, op2, op3);
7211      if (! pat)
7212	return 0;
7213
7214      emit_insn (pat);
7215      return NULL_RTX;
7216
7217    case Va_Ib_u8:
7218      icode = d->icode;
7219      arg0 = CALL_EXPR_ARG (exp, 0); /* dest vreg */
7220      arg1 = CALL_EXPR_ARG (exp, 1); /* [I]0-7 */
7221
7222      op0 = gen_rtx_REG (V8HImode, ARC_FIRST_SIMD_VR_REG);       /* VR0    */
7223      op1 = expand_expr (arg0, NULL_RTX, SImode, EXPAND_NORMAL); /* I[0-7] */
7224      op2 = expand_expr (arg1, NULL_RTX, SImode, EXPAND_NORMAL); /* u8     */
7225
7226      /* target <- src vreg */
7227      target = gen_reg_rtx (V8HImode);
7228
7229      /* target <- vec_concat: target, mem(Ib, u8) */
7230      mode0 =  insn_data[icode].operand[1].mode;
7231      mode1 =  insn_data[icode].operand[2].mode;
7232      mode2 =  insn_data[icode].operand[3].mode;
7233
7234      if ( (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
7235	   || !(UNSIGNED_INT3 (INTVAL (op1))))
7236	error ("operand 1 of %s instruction should be an unsigned 3-bit value (I0-I7)",
7237	       d->name);
7238
7239      if ( (!(*insn_data[icode].operand[3].predicate) (op2, mode2))
7240	   || !(UNSIGNED_INT8 (INTVAL (op2))))
7241	error ("operand 2 of %s instruction should be an unsigned 8-bit value",
7242	       d->name);
7243
7244      pat = GEN_FCN (icode) (target, op0, op1, op2);
7245      if (! pat)
7246	return 0;
7247
7248      emit_insn (pat);
7249      return target;
7250
7251    case void_Va_u3_Ib_u8:
7252      icode = d->icode;
7253      arg0 = CALL_EXPR_ARG (exp, 0); /* source vreg */
7254      arg1 = CALL_EXPR_ARG (exp, 1); /* u3 */
7255      arg2 = CALL_EXPR_ARG (exp, 2); /* [I]0-7 */
7256      arg3 = CALL_EXPR_ARG (exp, 3); /* u8 */
7257
7258      op0 = expand_expr (arg3, NULL_RTX, SImode, EXPAND_NORMAL); /* u8        */
7259      op1 = gen_rtx_REG (V8HImode, ARC_FIRST_SIMD_VR_REG);       /* VR        */
7260      op2 = expand_expr (arg2, NULL_RTX, SImode, EXPAND_NORMAL); /* [I]0-7    */
7261      op3 = expand_expr (arg0, NULL_RTX, V8HImode, EXPAND_NORMAL);/* vreg to be stored */
7262      op4 = expand_expr (arg1, NULL_RTX, SImode, EXPAND_NORMAL);  /* vreg 0-7 subreg no. */
7263
7264      mode0 =  insn_data[icode].operand[0].mode;
7265      mode2 =  insn_data[icode].operand[2].mode;
7266      mode3 =  insn_data[icode].operand[3].mode;
7267      mode4 =  insn_data[icode].operand[4].mode;
7268
7269      /* Do some correctness checks for the operands.  */
7270      if ( (!(*insn_data[icode].operand[0].predicate) (op0, mode0))
7271	   || !(UNSIGNED_INT8 (INTVAL (op0))))
7272	error ("operand 4 of %s instruction should be an unsigned 8-bit value (0-255)",
7273	       d->name);
7274
7275      if ( (!(*insn_data[icode].operand[2].predicate) (op2, mode2))
7276	   || !(UNSIGNED_INT3 (INTVAL (op2))))
7277	error ("operand 3 of %s instruction should be an unsigned 3-bit value (I0-I7)",
7278	       d->name);
7279
7280      if (!(*insn_data[icode].operand[3].predicate) (op3, mode3))
7281	op3 = copy_to_mode_reg (mode3, op3);
7282
7283      if ( (!(*insn_data[icode].operand[4].predicate) (op4, mode4))
7284	   || !(UNSIGNED_INT3 (INTVAL (op4))))
7285	error ("operand 2 of %s instruction should be an unsigned 3-bit value (subreg 0-7)",
7286	       d->name);
7287      else if (icode == CODE_FOR_vst32_n_insn
7288	       && ((INTVAL(op4) % 2 ) != 0))
7289	error ("operand 2 of %s instruction should be an even 3-bit value (subreg 0,2,4,6)",
7290	       d->name);
7291
7292      pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
7293      if (! pat)
7294	return 0;
7295
7296      emit_insn (pat);
7297      return NULL_RTX;
7298
7299    default:
7300      gcc_unreachable ();
7301    }
7302  return NULL_RTX;
7303}
7304
7305static bool
7306arc_preserve_reload_p (rtx in)
7307{
7308  return (GET_CODE (in) == PLUS
7309	  && RTX_OK_FOR_BASE_P (XEXP (in, 0), true)
7310	  && CONST_INT_P (XEXP (in, 1))
7311	  && !((INTVAL (XEXP (in, 1)) & 511)));
7312}
7313
7314int
7315arc_register_move_cost (machine_mode,
7316			enum reg_class from_class, enum reg_class to_class)
7317{
7318  /* The ARC600 has no bypass for extension registers, hence a nop might be
7319     needed to be inserted after a write so that reads are safe.  */
7320  if (TARGET_ARC600)
7321    {
7322      if (to_class == MPY_WRITABLE_CORE_REGS)
7323	return 3;
7324     /* Instructions modifying LP_COUNT need 4 additional cycles before
7325	the register will actually contain the value.  */
7326      else if (to_class == LPCOUNT_REG)
7327	return 6;
7328      else if (to_class == WRITABLE_CORE_REGS)
7329	return 6;
7330    }
7331
7332  /* The ARC700 stalls for 3 cycles when *reading* from lp_count.  */
7333  if (TARGET_ARC700
7334      && (from_class == LPCOUNT_REG || from_class == ALL_CORE_REGS
7335	  || from_class == WRITABLE_CORE_REGS))
7336    return 8;
7337
7338  /* Force an attempt to 'mov Dy,Dx' to spill.  */
7339  if (TARGET_ARC700 && TARGET_DPFP
7340      && from_class == DOUBLE_REGS && to_class == DOUBLE_REGS)
7341    return 100;
7342
7343  return 2;
7344}
7345
7346/* Emit code for an addsi3 instruction with OPERANDS.
7347   COND_P indicates if this will use conditional execution.
7348   Return the length of the instruction.
7349   If OUTPUT_P is false, don't actually output the instruction, just return
7350   its length.  */
7351int
7352arc_output_addsi (rtx *operands, bool cond_p, bool output_p)
7353{
7354  char format[32];
7355
7356  int match = operands_match_p (operands[0], operands[1]);
7357  int match2 = operands_match_p (operands[0], operands[2]);
7358  int intval = (REG_P (operands[2]) ? 1
7359		: CONST_INT_P (operands[2]) ? INTVAL (operands[2]) : 0xbadc057);
7360  int neg_intval = -intval;
7361  int short_0 = satisfies_constraint_Rcq (operands[0]);
7362  int short_p = (!cond_p && short_0 && satisfies_constraint_Rcq (operands[1]));
7363  int ret = 0;
7364
7365#define ADDSI_OUTPUT1(FORMAT) do {\
7366  if (output_p) \
7367    output_asm_insn (FORMAT, operands);\
7368  return ret; \
7369} while (0)
7370#define ADDSI_OUTPUT(LIST) do {\
7371  if (output_p) \
7372    sprintf LIST;\
7373  ADDSI_OUTPUT1 (format);\
7374  return ret; \
7375} while (0)
7376
7377  /* First try to emit a 16 bit insn.  */
7378  ret = 2;
7379  if (!cond_p
7380      /* If we are actually about to output this insn, don't try a 16 bit
7381	 variant if we already decided that we don't want that
7382	 (I.e. we upsized this insn to align some following insn.)
7383	 E.g. add_s r0,sp,70 is 16 bit, but add r0,sp,70 requires a LIMM -
7384	 but add1 r0,sp,35 doesn't.  */
7385      && (!output_p || (get_attr_length (current_output_insn) & 2)))
7386    {
7387      if (short_p
7388	  && (REG_P (operands[2])
7389	      ? (match || satisfies_constraint_Rcq (operands[2]))
7390	      : (unsigned) intval <= (match ? 127 : 7)))
7391	ADDSI_OUTPUT1 ("add%? %0,%1,%2");
7392      if (short_0 && REG_P (operands[1]) && match2)
7393	ADDSI_OUTPUT1 ("add%? %0,%2,%1");
7394      if ((short_0 || REGNO (operands[0]) == STACK_POINTER_REGNUM)
7395	  && REGNO (operands[1]) == STACK_POINTER_REGNUM && !(intval & ~124))
7396	ADDSI_OUTPUT1 ("add%? %0,%1,%2");
7397
7398      if ((short_p && (unsigned) neg_intval <= (match ? 31 : 7))
7399	  || (REGNO (operands[0]) == STACK_POINTER_REGNUM
7400	      && match && !(neg_intval & ~124)))
7401	ADDSI_OUTPUT1 ("sub%? %0,%1,%n2");
7402    }
7403
7404  /* Now try to emit a 32 bit insn without long immediate.  */
7405  ret = 4;
7406  if (!match && match2 && REG_P (operands[1]))
7407    ADDSI_OUTPUT1 ("add%? %0,%2,%1");
7408  if (match || !cond_p)
7409    {
7410      int limit = (match && !cond_p) ? 0x7ff : 0x3f;
7411      int range_factor = neg_intval & intval;
7412      int shift;
7413
7414      if (intval == -1 << 31)
7415	ADDSI_OUTPUT1 ("bxor%? %0,%1,31");
7416
7417      /* If we can use a straight add / sub instead of a {add,sub}[123] of
7418	 same size, do, so - the insn latency is lower.  */
7419      /* -0x800 is a 12-bit constant for add /add3 / sub / sub3, but
7420	 0x800 is not.  */
7421      if ((intval >= 0 && intval <= limit)
7422	       || (intval == -0x800 && limit == 0x7ff))
7423	ADDSI_OUTPUT1 ("add%? %0,%1,%2");
7424      else if ((intval < 0 && neg_intval <= limit)
7425	       || (intval == 0x800 && limit == 0x7ff))
7426	ADDSI_OUTPUT1 ("sub%? %0,%1,%n2");
7427      shift = range_factor >= 8 ? 3 : (range_factor >> 1);
7428      gcc_assert (shift == 0 || shift == 1 || shift == 2 || shift == 3);
7429      gcc_assert ((((1 << shift) - 1) & intval) == 0);
7430      if (((intval < 0 && intval != -0x4000)
7431	   /* sub[123] is slower than add_s / sub, only use it if it
7432	      avoids a long immediate.  */
7433	   && neg_intval <= limit << shift)
7434	  || (intval == 0x4000 && limit == 0x7ff))
7435	ADDSI_OUTPUT ((format, "sub%d%%? %%0,%%1,%d",
7436		       shift, neg_intval >> shift));
7437      else if ((intval >= 0 && intval <= limit << shift)
7438	       || (intval == -0x4000 && limit == 0x7ff))
7439	ADDSI_OUTPUT ((format, "add%d%%? %%0,%%1,%d", shift, intval >> shift));
7440    }
7441  /* Try to emit a 16 bit opcode with long immediate.  */
7442  ret = 6;
7443  if (short_p && match)
7444    ADDSI_OUTPUT1 ("add%? %0,%1,%S2");
7445
7446  /* We have to use a 32 bit opcode, and with a long immediate.  */
7447  ret = 8;
7448  ADDSI_OUTPUT1 (intval < 0 ? "sub%? %0,%1,%n2" : "add%? %0,%1,%S2");
7449}
7450
7451/* Emit code for an commutative_cond_exec instruction with OPERANDS.
7452   Return the length of the instruction.
7453   If OUTPUT_P is false, don't actually output the instruction, just return
7454   its length.  */
7455int
7456arc_output_commutative_cond_exec (rtx *operands, bool output_p)
7457{
7458  enum rtx_code commutative_op = GET_CODE (operands[3]);
7459  const char *pat = NULL;
7460
7461  /* Canonical rtl should not have a constant in the first operand position.  */
7462  gcc_assert (!CONSTANT_P (operands[1]));
7463
7464  switch (commutative_op)
7465    {
7466      case AND:
7467	if (satisfies_constraint_C1p (operands[2]))
7468	  pat = "bmsk%? %0,%1,%Z2";
7469	else if (satisfies_constraint_Ccp (operands[2]))
7470	  pat = "bclr%? %0,%1,%M2";
7471	else if (satisfies_constraint_CnL (operands[2]))
7472	  pat = "bic%? %0,%1,%n2-1";
7473	break;
7474      case IOR:
7475	if (satisfies_constraint_C0p (operands[2]))
7476	  pat = "bset%? %0,%1,%z2";
7477	break;
7478      case XOR:
7479	if (satisfies_constraint_C0p (operands[2]))
7480	  pat = "bxor%? %0,%1,%z2";
7481	break;
7482      case PLUS:
7483	return arc_output_addsi (operands, true, output_p);
7484      default: break;
7485    }
7486  if (output_p)
7487    output_asm_insn (pat ? pat : "%O3.%d5 %0,%1,%2", operands);
7488  if (pat || REG_P (operands[2]) || satisfies_constraint_L (operands[2]))
7489    return 4;
7490  return 8;
7491}
7492
7493/* Helper function of arc_expand_movmem.  ADDR points to a chunk of memory.
7494   Emit code and return an potentially modified address such that offsets
7495   up to SIZE are can be added to yield a legitimate address.
7496   if REUSE is set, ADDR is a register that may be modified.  */
7497
7498static rtx
7499force_offsettable (rtx addr, HOST_WIDE_INT size, bool reuse)
7500{
7501  rtx base = addr;
7502  rtx offs = const0_rtx;
7503
7504  if (GET_CODE (base) == PLUS)
7505    {
7506      offs = XEXP (base, 1);
7507      base = XEXP (base, 0);
7508    }
7509  if (!REG_P (base)
7510      || (REGNO (base) != STACK_POINTER_REGNUM
7511	  && REGNO_PTR_FRAME_P (REGNO (addr)))
7512      || !CONST_INT_P (offs) || !SMALL_INT (INTVAL (offs))
7513      || !SMALL_INT (INTVAL (offs) + size))
7514    {
7515      if (reuse)
7516	emit_insn (gen_add2_insn (addr, offs));
7517      else
7518	addr = copy_to_mode_reg (Pmode, addr);
7519    }
7520  return addr;
7521}
7522
7523/* Like move_by_pieces, but take account of load latency,
7524   and actual offset ranges.
7525   Return true on success.  */
7526
7527bool
7528arc_expand_movmem (rtx *operands)
7529{
7530  rtx dst = operands[0];
7531  rtx src = operands[1];
7532  rtx dst_addr, src_addr;
7533  HOST_WIDE_INT size;
7534  int align = INTVAL (operands[3]);
7535  unsigned n_pieces;
7536  int piece = align;
7537  rtx store[2];
7538  rtx tmpx[2];
7539  int i;
7540
7541  if (!CONST_INT_P (operands[2]))
7542    return false;
7543  size = INTVAL (operands[2]);
7544  /* move_by_pieces_ninsns is static, so we can't use it.  */
7545  if (align >= 4)
7546    n_pieces = (size + 2) / 4U + (size & 1);
7547  else if (align == 2)
7548    n_pieces = (size + 1) / 2U;
7549  else
7550    n_pieces = size;
7551  if (n_pieces >= (unsigned int) (optimize_size ? 3 : 15))
7552    return false;
7553  if (piece > 4)
7554    piece = 4;
7555  dst_addr = force_offsettable (XEXP (operands[0], 0), size, 0);
7556  src_addr = force_offsettable (XEXP (operands[1], 0), size, 0);
7557  store[0] = store[1] = NULL_RTX;
7558  tmpx[0] = tmpx[1] = NULL_RTX;
7559  for (i = 0; size > 0; i ^= 1, size -= piece)
7560    {
7561      rtx tmp;
7562      machine_mode mode;
7563
7564      if (piece > size)
7565	piece = size & -size;
7566      mode = smallest_mode_for_size (piece * BITS_PER_UNIT, MODE_INT);
7567      /* If we don't re-use temporaries, the scheduler gets carried away,
7568	 and the register pressure gets unnecessarily high.  */
7569      if (0 && tmpx[i] && GET_MODE (tmpx[i]) == mode)
7570	tmp = tmpx[i];
7571      else
7572	tmpx[i] = tmp = gen_reg_rtx (mode);
7573      dst_addr = force_offsettable (dst_addr, piece, 1);
7574      src_addr = force_offsettable (src_addr, piece, 1);
7575      if (store[i])
7576	emit_insn (store[i]);
7577      emit_move_insn (tmp, change_address (src, mode, src_addr));
7578      store[i] = gen_move_insn (change_address (dst, mode, dst_addr), tmp);
7579      dst_addr = plus_constant (Pmode, dst_addr, piece);
7580      src_addr = plus_constant (Pmode, src_addr, piece);
7581    }
7582  if (store[i])
7583    emit_insn (store[i]);
7584  if (store[i^1])
7585    emit_insn (store[i^1]);
7586  return true;
7587}
7588
7589/* Prepare operands for move in MODE.  Return true iff the move has
7590   been emitted.  */
7591
7592bool
7593prepare_move_operands (rtx *operands, machine_mode mode)
7594{
7595  /* We used to do this only for MODE_INT Modes, but addresses to floating
7596     point variables may well be in the small data section.  */
7597  if (1)
7598    {
7599      if (!TARGET_NO_SDATA_SET && small_data_pattern (operands[0], Pmode))
7600	operands[0] = arc_rewrite_small_data (operands[0]);
7601      else if (mode == SImode && flag_pic && SYMBOLIC_CONST (operands[1]))
7602	{
7603	  emit_pic_move (operands, SImode);
7604
7605	  /* Disable any REG_EQUALs associated with the symref
7606	     otherwise the optimization pass undoes the work done
7607	     here and references the variable directly.  */
7608	}
7609      else if (GET_CODE (operands[0]) != MEM
7610	       && !TARGET_NO_SDATA_SET
7611	       && small_data_pattern (operands[1], Pmode))
7612       {
7613	  /* This is to take care of address calculations involving sdata
7614	     variables.  */
7615	  operands[1] = arc_rewrite_small_data (operands[1]);
7616
7617	  emit_insn (gen_rtx_SET (mode, operands[0],operands[1]));
7618	  /* ??? This note is useless, since it only restates the set itself.
7619	     We should rather use the original SYMBOL_REF.  However, there is
7620	     the problem that we are lying to the compiler about these
7621	     SYMBOL_REFs to start with.  symbol@sda should be encoded specially
7622	     so that we can tell it apart from an actual symbol.  */
7623	  set_unique_reg_note (get_last_insn (), REG_EQUAL, operands[1]);
7624
7625	  /* Take care of the REG_EQUAL note that will be attached to mark the
7626	     output reg equal to the initial symbol_ref after this code is
7627	     executed.  */
7628	  emit_move_insn (operands[0], operands[0]);
7629	  return true;
7630	}
7631    }
7632
7633  if (MEM_P (operands[0])
7634      && !(reload_in_progress || reload_completed))
7635    {
7636      operands[1] = force_reg (mode, operands[1]);
7637      if (!move_dest_operand (operands[0], mode))
7638	{
7639	  rtx addr = copy_to_mode_reg (Pmode, XEXP (operands[0], 0));
7640	  /* This is like change_address_1 (operands[0], mode, 0, 1) ,
7641	     except that we can't use that function because it is static.  */
7642	  rtx pat = change_address (operands[0], mode, addr);
7643	  MEM_COPY_ATTRIBUTES (pat, operands[0]);
7644	  operands[0] = pat;
7645	}
7646      if (!cse_not_expected)
7647	{
7648	  rtx pat = XEXP (operands[0], 0);
7649
7650	  pat = arc_legitimize_address_0 (pat, pat, mode);
7651	  if (pat)
7652	    {
7653	      pat = change_address (operands[0], mode, pat);
7654	      MEM_COPY_ATTRIBUTES (pat, operands[0]);
7655	      operands[0] = pat;
7656	    }
7657	}
7658    }
7659
7660  if (MEM_P (operands[1]) && !cse_not_expected)
7661    {
7662      rtx pat = XEXP (operands[1], 0);
7663
7664      pat = arc_legitimize_address_0 (pat, pat, mode);
7665      if (pat)
7666	{
7667	  pat = change_address (operands[1], mode, pat);
7668	  MEM_COPY_ATTRIBUTES (pat, operands[1]);
7669	  operands[1] = pat;
7670	}
7671    }
7672
7673  return false;
7674}
7675
7676/* Prepare OPERANDS for an extension using CODE to OMODE.
7677   Return true iff the move has been emitted.  */
7678
7679bool
7680prepare_extend_operands (rtx *operands, enum rtx_code code,
7681			 machine_mode omode)
7682{
7683  if (!TARGET_NO_SDATA_SET && small_data_pattern (operands[1], Pmode))
7684    {
7685      /* This is to take care of address calculations involving sdata
7686	 variables.  */
7687      operands[1]
7688	= gen_rtx_fmt_e (code, omode, arc_rewrite_small_data (operands[1]));
7689      emit_insn (gen_rtx_SET (omode, operands[0], operands[1]));
7690      set_unique_reg_note (get_last_insn (), REG_EQUAL, operands[1]);
7691
7692      /* Take care of the REG_EQUAL note that will be attached to mark the
7693	 output reg equal to the initial extension after this code is
7694	 executed.  */
7695      emit_move_insn (operands[0], operands[0]);
7696      return true;
7697    }
7698  return false;
7699}
7700
7701/* Output a library call to a function called FNAME that has been arranged
7702   to be local to any dso.  */
7703
7704const char *
7705arc_output_libcall (const char *fname)
7706{
7707  unsigned len = strlen (fname);
7708  static char buf[64];
7709
7710  gcc_assert (len < sizeof buf - 35);
7711  if (TARGET_LONG_CALLS_SET
7712     || (TARGET_MEDIUM_CALLS && arc_ccfsm_cond_exec_p ()))
7713    {
7714      if (flag_pic)
7715	sprintf (buf, "add r12,pcl,@%s-(.&-4)\n\tjl%%!%%* [r12]", fname);
7716      else
7717	sprintf (buf, "jl%%! @%s", fname);
7718    }
7719  else
7720    sprintf (buf, "bl%%!%%* @%s", fname);
7721  return buf;
7722}
7723
7724/* Return the SImode highpart of the DImode value IN.  */
7725
7726rtx
7727disi_highpart (rtx in)
7728{
7729  return simplify_gen_subreg (SImode, in, DImode, TARGET_BIG_ENDIAN ? 0 : 4);
7730}
7731
7732/* Return length adjustment for INSN.
7733   For ARC600:
7734   A write to a core reg greater or equal to 32 must not be immediately
7735   followed by a use.  Anticipate the length requirement to insert a nop
7736   between PRED and SUCC to prevent a hazard.  */
7737
7738static int
7739arc600_corereg_hazard (rtx_insn *pred, rtx_insn *succ)
7740{
7741  if (!TARGET_ARC600)
7742    return 0;
7743  /* If SUCC is a doloop_end_i with a preceding label, we must output a nop
7744     in front of SUCC anyway, so there will be separation between PRED and
7745     SUCC.  */
7746  if (recog_memoized (succ) == CODE_FOR_doloop_end_i
7747      && LABEL_P (prev_nonnote_insn (succ)))
7748    return 0;
7749  if (recog_memoized (succ) == CODE_FOR_doloop_begin_i)
7750    return 0;
7751  if (GET_CODE (PATTERN (pred)) == SEQUENCE)
7752    pred = as_a <rtx_sequence *> (PATTERN (pred))->insn (1);
7753  if (GET_CODE (PATTERN (succ)) == SEQUENCE)
7754    succ = as_a <rtx_sequence *> (PATTERN (succ))->insn (0);
7755  if (recog_memoized (pred) == CODE_FOR_mulsi_600
7756      || recog_memoized (pred) == CODE_FOR_umul_600
7757      || recog_memoized (pred) == CODE_FOR_mac_600
7758      || recog_memoized (pred) == CODE_FOR_mul64_600
7759      || recog_memoized (pred) == CODE_FOR_mac64_600
7760      || recog_memoized (pred) == CODE_FOR_umul64_600
7761      || recog_memoized (pred) == CODE_FOR_umac64_600)
7762    return 0;
7763  subrtx_iterator::array_type array;
7764  FOR_EACH_SUBRTX (iter, array, PATTERN (pred), NONCONST)
7765    {
7766      const_rtx x = *iter;
7767      switch (GET_CODE (x))
7768	{
7769	case SET: case POST_INC: case POST_DEC: case PRE_INC: case PRE_DEC:
7770	  break;
7771	default:
7772	  /* This is also fine for PRE/POST_MODIFY, because they
7773	     contain a SET.  */
7774	  continue;
7775	}
7776      rtx dest = XEXP (x, 0);
7777      /* Check if this sets a an extension register.  N.B. we use 61 for the
7778	 condition codes, which is definitely not an extension register.  */
7779      if (REG_P (dest) && REGNO (dest) >= 32 && REGNO (dest) < 61
7780	  /* Check if the same register is used by the PAT.  */
7781	  && (refers_to_regno_p
7782	      (REGNO (dest),
7783	       REGNO (dest) + (GET_MODE_SIZE (GET_MODE (dest)) + 3) / 4U,
7784	       PATTERN (succ), 0)))
7785	return 4;
7786    }
7787  return 0;
7788}
7789
7790/* For ARC600:
7791   A write to a core reg greater or equal to 32 must not be immediately
7792   followed by a use.  Anticipate the length requirement to insert a nop
7793   between PRED and SUCC to prevent a hazard.  */
7794
7795int
7796arc_hazard (rtx_insn *pred, rtx_insn *succ)
7797{
7798  if (!TARGET_ARC600)
7799    return 0;
7800  if (!pred || !INSN_P (pred) || !succ || !INSN_P (succ))
7801    return 0;
7802  /* We might have a CALL to a non-returning function before a loop end.
7803     ??? Although the manual says that's OK (the target is outside the loop,
7804     and the loop counter unused there), the assembler barfs on this, so we
7805     must instert a nop before such a call too.  */
7806  if (recog_memoized (succ) == CODE_FOR_doloop_end_i
7807      && (JUMP_P (pred) || CALL_P (pred)
7808	  || GET_CODE (PATTERN (pred)) == SEQUENCE))
7809    return 4;
7810  return arc600_corereg_hazard (pred, succ);
7811}
7812
7813/* Return length adjustment for INSN.  */
7814
7815int
7816arc_adjust_insn_length (rtx_insn *insn, int len, bool)
7817{
7818  if (!INSN_P (insn))
7819    return len;
7820  /* We already handle sequences by ignoring the delay sequence flag.  */
7821  if (GET_CODE (PATTERN (insn)) == SEQUENCE)
7822    return len;
7823
7824  /* It is impossible to jump to the very end of a Zero-Overhead Loop, as
7825     the ZOL mechanism only triggers when advancing to the end address,
7826     so if there's a label at the end of a ZOL, we need to insert a nop.
7827     The ARC600 ZOL also has extra restrictions on jumps at the end of a
7828     loop.  */
7829  if (recog_memoized (insn) == CODE_FOR_doloop_end_i)
7830    {
7831      rtx_insn *prev = prev_nonnote_insn (insn);
7832
7833      return ((LABEL_P (prev)
7834	       || (TARGET_ARC600
7835		   && (JUMP_P (prev)
7836		       || CALL_P (prev) /* Could be a noreturn call.  */
7837		       || (NONJUMP_INSN_P (prev)
7838			   && GET_CODE (PATTERN (prev)) == SEQUENCE))))
7839	      ? len + 4 : len);
7840    }
7841
7842  /* Check for return with but one preceding insn since function
7843     start / call.  */
7844  if (TARGET_PAD_RETURN
7845      && JUMP_P (insn)
7846      && GET_CODE (PATTERN (insn)) != ADDR_VEC
7847      && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
7848      && get_attr_type (insn) == TYPE_RETURN)
7849    {
7850      rtx_insn *prev = prev_active_insn (insn);
7851
7852      if (!prev || !(prev = prev_active_insn (prev))
7853	  || ((NONJUMP_INSN_P (prev)
7854	       && GET_CODE (PATTERN (prev)) == SEQUENCE)
7855	      ? CALL_ATTR (as_a <rtx_sequence *> (PATTERN (prev))->insn (0),
7856			   NON_SIBCALL)
7857	      : CALL_ATTR (prev, NON_SIBCALL)))
7858	return len + 4;
7859    }
7860  if (TARGET_ARC600)
7861    {
7862      rtx_insn *succ = next_real_insn (insn);
7863
7864      /* One the ARC600, a write to an extension register must be separated
7865	 from a read.  */
7866      if (succ && INSN_P (succ))
7867	len += arc600_corereg_hazard (insn, succ);
7868    }
7869
7870  /* Restore extracted operands - otherwise splitters like the addsi3_mixed one
7871     can go awry.  */
7872  extract_constrain_insn_cached (insn);
7873
7874  return len;
7875}
7876
7877/* Values for length_sensitive.  */
7878enum
7879{
7880  ARC_LS_NONE,// Jcc
7881  ARC_LS_25, // 25 bit offset, B
7882  ARC_LS_21, // 21 bit offset, Bcc
7883  ARC_LS_U13,// 13 bit unsigned offset, LP
7884  ARC_LS_10, // 10 bit offset, B_s, Beq_s, Bne_s
7885  ARC_LS_9,  //  9 bit offset, BRcc
7886  ARC_LS_8,  //  8 bit offset, BRcc_s
7887  ARC_LS_U7, //  7 bit unsigned offset, LPcc
7888  ARC_LS_7   //  7 bit offset, Bcc_s
7889};
7890
7891/* While the infrastructure patch is waiting for review, duplicate the
7892   struct definitions, to allow this file to compile.  */
7893#if 1
7894typedef struct
7895{
7896  unsigned align_set;
7897  /* Cost as a branch / call target or call return address.  */
7898  int target_cost;
7899  int fallthrough_cost;
7900  int branch_cost;
7901  int length;
7902  /* 0 for not length sensitive, 1 for largest offset range,
7903 *      2 for next smaller etc.  */
7904  unsigned length_sensitive : 8;
7905  bool enabled;
7906} insn_length_variant_t;
7907
7908typedef struct insn_length_parameters_s
7909{
7910  int align_unit_log;
7911  int align_base_log;
7912  int max_variants;
7913  int (*get_variants) (rtx_insn *, int, bool, bool, insn_length_variant_t *);
7914} insn_length_parameters_t;
7915
7916static void
7917arc_insn_length_parameters (insn_length_parameters_t *ilp) ATTRIBUTE_UNUSED;
7918#endif
7919
7920static int
7921arc_get_insn_variants (rtx_insn *insn, int len, bool, bool target_p,
7922		       insn_length_variant_t *ilv)
7923{
7924  if (!NONDEBUG_INSN_P (insn))
7925    return 0;
7926  enum attr_type type;
7927  /* shorten_branches doesn't take optimize_size into account yet for the
7928     get_variants mechanism, so turn this off for now.  */
7929  if (optimize_size)
7930    return 0;
7931  if (rtx_sequence *pat = dyn_cast <rtx_sequence *> (PATTERN (insn)))
7932    {
7933      /* The interaction of a short delay slot insn with a short branch is
7934	 too weird for shorten_branches to piece together, so describe the
7935	 entire SEQUENCE.  */
7936      rtx_insn *inner;
7937      if (TARGET_UPSIZE_DBR
7938	  && get_attr_length (pat->insn (1)) <= 2
7939	  && (((type = get_attr_type (inner = pat->insn (0)))
7940	       == TYPE_UNCOND_BRANCH)
7941	      || type == TYPE_BRANCH)
7942	  && get_attr_delay_slot_filled (inner) == DELAY_SLOT_FILLED_YES)
7943	{
7944	  int n_variants
7945	    = arc_get_insn_variants (inner, get_attr_length (inner), true,
7946				     target_p, ilv+1);
7947	  /* The short variant gets split into a higher-cost aligned
7948	     and a lower cost unaligned variant.  */
7949	  gcc_assert (n_variants);
7950	  gcc_assert (ilv[1].length_sensitive == ARC_LS_7
7951		      || ilv[1].length_sensitive == ARC_LS_10);
7952	  gcc_assert (ilv[1].align_set == 3);
7953	  ilv[0] = ilv[1];
7954	  ilv[0].align_set = 1;
7955	  ilv[0].branch_cost += 1;
7956	  ilv[1].align_set = 2;
7957	  n_variants++;
7958	  for (int i = 0; i < n_variants; i++)
7959	    ilv[i].length += 2;
7960	  /* In case an instruction with aligned size is wanted, and
7961	     the short variants are unavailable / too expensive, add
7962	     versions of long branch + long delay slot.  */
7963	  for (int i = 2, end = n_variants; i < end; i++, n_variants++)
7964	    {
7965	      ilv[n_variants] = ilv[i];
7966	      ilv[n_variants].length += 2;
7967	    }
7968	  return n_variants;
7969	}
7970      return 0;
7971    }
7972  insn_length_variant_t *first_ilv = ilv;
7973  type = get_attr_type (insn);
7974  bool delay_filled
7975    = (get_attr_delay_slot_filled (insn) == DELAY_SLOT_FILLED_YES);
7976  int branch_align_cost = delay_filled ? 0 : 1;
7977  int branch_unalign_cost = delay_filled ? 0 : TARGET_UNALIGN_BRANCH ? 0 : 1;
7978  /* If the previous instruction is an sfunc call, this insn is always
7979     a target, even though the middle-end is unaware of this.  */
7980  bool force_target = false;
7981  rtx_insn *prev = prev_active_insn (insn);
7982  if (prev && arc_next_active_insn (prev, 0) == insn
7983      && ((NONJUMP_INSN_P (prev) && GET_CODE (PATTERN (prev)) == SEQUENCE)
7984	  ? CALL_ATTR (as_a <rtx_sequence *> (PATTERN (prev))->insn (0),
7985		       NON_SIBCALL)
7986	  : (CALL_ATTR (prev, NON_SIBCALL)
7987	     && NEXT_INSN (PREV_INSN (prev)) == prev)))
7988    force_target = true;
7989
7990  switch (type)
7991    {
7992    case TYPE_BRCC:
7993      /* Short BRCC only comes in no-delay-slot version, and without limm  */
7994      if (!delay_filled)
7995	{
7996	  ilv->align_set = 3;
7997	  ilv->length = 2;
7998	  ilv->branch_cost = 1;
7999	  ilv->enabled = (len == 2);
8000	  ilv->length_sensitive = ARC_LS_8;
8001	  ilv++;
8002	}
8003      /* Fall through.  */
8004    case TYPE_BRCC_NO_DELAY_SLOT:
8005      /* doloop_fallback* patterns are TYPE_BRCC_NO_DELAY_SLOT for
8006	 (delay slot) scheduling purposes, but they are longer.  */
8007      if (GET_CODE (PATTERN (insn)) == PARALLEL
8008	  && GET_CODE (XVECEXP (PATTERN (insn), 0, 1)) == SET)
8009	return 0;
8010      /* Standard BRCC: 4 bytes, or 8 bytes with limm.  */
8011      ilv->length = ((type == TYPE_BRCC) ? 4 : 8);
8012      ilv->align_set = 3;
8013      ilv->branch_cost = branch_align_cost;
8014      ilv->enabled = (len <= ilv->length);
8015      ilv->length_sensitive = ARC_LS_9;
8016      if ((target_p || force_target)
8017	  || (!delay_filled && TARGET_UNALIGN_BRANCH))
8018	{
8019	  ilv[1] = *ilv;
8020	  ilv->align_set = 1;
8021	  ilv++;
8022	  ilv->align_set = 2;
8023	  ilv->target_cost = 1;
8024	  ilv->branch_cost = branch_unalign_cost;
8025	}
8026      ilv++;
8027
8028      rtx op, op0;
8029      op = XEXP (SET_SRC (XVECEXP (PATTERN (insn), 0, 0)), 0);
8030      op0 = XEXP (op, 0);
8031
8032      if (GET_CODE (op0) == ZERO_EXTRACT
8033	  && satisfies_constraint_L (XEXP (op0, 2)))
8034	op0 = XEXP (op0, 0);
8035      if (satisfies_constraint_Rcq (op0))
8036	{
8037	  ilv->length = ((type == TYPE_BRCC) ? 6 : 10);
8038	  ilv->align_set = 3;
8039	  ilv->branch_cost = 1 + branch_align_cost;
8040	  ilv->fallthrough_cost = 1;
8041	  ilv->enabled = true;
8042	  ilv->length_sensitive = ARC_LS_21;
8043	  if (!delay_filled && TARGET_UNALIGN_BRANCH)
8044	    {
8045	      ilv[1] = *ilv;
8046	      ilv->align_set = 1;
8047	      ilv++;
8048	      ilv->align_set = 2;
8049	      ilv->branch_cost = 1 + branch_unalign_cost;
8050	    }
8051	  ilv++;
8052	}
8053      ilv->length = ((type == TYPE_BRCC) ? 8 : 12);
8054      ilv->align_set = 3;
8055      ilv->branch_cost = 1 + branch_align_cost;
8056      ilv->fallthrough_cost = 1;
8057      ilv->enabled = true;
8058      ilv->length_sensitive = ARC_LS_21;
8059      if ((target_p || force_target)
8060	  || (!delay_filled && TARGET_UNALIGN_BRANCH))
8061	{
8062	  ilv[1] = *ilv;
8063	  ilv->align_set = 1;
8064	  ilv++;
8065	  ilv->align_set = 2;
8066	  ilv->target_cost = 1;
8067	  ilv->branch_cost = 1 + branch_unalign_cost;
8068	}
8069      ilv++;
8070      break;
8071
8072    case TYPE_SFUNC:
8073      ilv->length = 12;
8074      goto do_call;
8075    case TYPE_CALL_NO_DELAY_SLOT:
8076      ilv->length = 8;
8077      goto do_call;
8078    case TYPE_CALL:
8079      ilv->length = 4;
8080      ilv->length_sensitive
8081	= GET_CODE (PATTERN (insn)) == COND_EXEC ? ARC_LS_21 : ARC_LS_25;
8082    do_call:
8083      ilv->align_set = 3;
8084      ilv->fallthrough_cost = branch_align_cost;
8085      ilv->enabled = true;
8086      if ((target_p || force_target)
8087	  || (!delay_filled && TARGET_UNALIGN_BRANCH))
8088	{
8089	  ilv[1] = *ilv;
8090	  ilv->align_set = 1;
8091	  ilv++;
8092	  ilv->align_set = 2;
8093	  ilv->target_cost = 1;
8094	  ilv->fallthrough_cost = branch_unalign_cost;
8095	}
8096      ilv++;
8097      break;
8098    case TYPE_UNCOND_BRANCH:
8099      /* Strictly speaking, this should be ARC_LS_10 for equality comparisons,
8100	 but that makes no difference at the moment.  */
8101      ilv->length_sensitive = ARC_LS_7;
8102      ilv[1].length_sensitive = ARC_LS_25;
8103      goto do_branch;
8104    case TYPE_BRANCH:
8105      ilv->length_sensitive = ARC_LS_10;
8106      ilv[1].length_sensitive = ARC_LS_21;
8107    do_branch:
8108      ilv->align_set = 3;
8109      ilv->length = 2;
8110      ilv->branch_cost = branch_align_cost;
8111      ilv->enabled = (len == ilv->length);
8112      ilv++;
8113      ilv->length = 4;
8114      ilv->align_set = 3;
8115      ilv->branch_cost = branch_align_cost;
8116      ilv->enabled = true;
8117      if ((target_p || force_target)
8118	  || (!delay_filled && TARGET_UNALIGN_BRANCH))
8119	{
8120	  ilv[1] = *ilv;
8121	  ilv->align_set = 1;
8122	  ilv++;
8123	  ilv->align_set = 2;
8124	  ilv->target_cost = 1;
8125	  ilv->branch_cost = branch_unalign_cost;
8126	}
8127      ilv++;
8128      break;
8129    case TYPE_JUMP:
8130      return 0;
8131    default:
8132      /* For every short insn, there is generally also a long insn.
8133	 trap_s is an exception.  */
8134      if ((len & 2) == 0 || recog_memoized (insn) == CODE_FOR_trap_s)
8135	return 0;
8136      ilv->align_set = 3;
8137      ilv->length = len;
8138      ilv->enabled = 1;
8139      ilv++;
8140      ilv->align_set = 3;
8141      ilv->length = len + 2;
8142      ilv->enabled = 1;
8143      if (target_p || force_target)
8144	{
8145	  ilv[1] = *ilv;
8146	  ilv->align_set = 1;
8147	  ilv++;
8148	  ilv->align_set = 2;
8149	  ilv->target_cost = 1;
8150	}
8151      ilv++;
8152    }
8153  /* If the previous instruction is an sfunc call, this insn is always
8154     a target, even though the middle-end is unaware of this.
8155     Therefore, if we have a call predecessor, transfer the target cost
8156     to the fallthrough and branch costs.  */
8157  if (force_target)
8158    {
8159      for (insn_length_variant_t *p = first_ilv; p < ilv; p++)
8160	{
8161	  p->fallthrough_cost += p->target_cost;
8162	  p->branch_cost += p->target_cost;
8163	  p->target_cost = 0;
8164	}
8165    }
8166
8167  return ilv - first_ilv;
8168}
8169
8170static void
8171arc_insn_length_parameters (insn_length_parameters_t *ilp)
8172{
8173  ilp->align_unit_log = 1;
8174  ilp->align_base_log = 1;
8175  ilp->max_variants = 7;
8176  ilp->get_variants = arc_get_insn_variants;
8177}
8178
8179/* Return a copy of COND from *STATEP, inverted if that is indicated by the
8180   CC field of *STATEP.  */
8181
8182static rtx
8183arc_get_ccfsm_cond (struct arc_ccfsm *statep, bool reverse)
8184{
8185  rtx cond = statep->cond;
8186  int raw_cc = get_arc_condition_code (cond);
8187  if (reverse)
8188    raw_cc = ARC_INVERSE_CONDITION_CODE (raw_cc);
8189
8190  if (statep->cc == raw_cc)
8191    return copy_rtx (cond);
8192
8193  gcc_assert (ARC_INVERSE_CONDITION_CODE (raw_cc) == statep->cc);
8194
8195  machine_mode ccm = GET_MODE (XEXP (cond, 0));
8196  enum rtx_code code = reverse_condition (GET_CODE (cond));
8197  if (code == UNKNOWN || ccm == CC_FP_GTmode || ccm == CC_FP_GEmode)
8198    code = reverse_condition_maybe_unordered (GET_CODE (cond));
8199
8200  return gen_rtx_fmt_ee (code, GET_MODE (cond),
8201			 copy_rtx (XEXP (cond, 0)), copy_rtx (XEXP (cond, 1)));
8202}
8203
8204/* Return version of PAT conditionalized with COND, which is part of INSN.
8205   ANNULLED indicates if INSN is an annulled delay-slot insn.
8206   Register further changes if necessary.  */
8207static rtx
8208conditionalize_nonjump (rtx pat, rtx cond, rtx insn, bool annulled)
8209{
8210  /* For commutative operators, we generally prefer to have
8211     the first source match the destination.  */
8212  if (GET_CODE (pat) == SET)
8213    {
8214      rtx src = SET_SRC (pat);
8215
8216      if (COMMUTATIVE_P (src))
8217	{
8218	  rtx src0 = XEXP (src, 0);
8219	  rtx src1 = XEXP (src, 1);
8220	  rtx dst = SET_DEST (pat);
8221
8222	  if (rtx_equal_p (src1, dst) && !rtx_equal_p (src0, dst)
8223	      /* Leave add_n alone - the canonical form is to
8224		 have the complex summand first.  */
8225	      && REG_P (src0))
8226	    pat = gen_rtx_SET (VOIDmode, dst,
8227			       gen_rtx_fmt_ee (GET_CODE (src), GET_MODE (src),
8228					       src1, src0));
8229	}
8230    }
8231
8232  /* dwarf2out.c:dwarf2out_frame_debug_expr doesn't know
8233     what to do with COND_EXEC.  */
8234  if (RTX_FRAME_RELATED_P (insn))
8235    {
8236      /* If this is the delay slot insn of an anulled branch,
8237	 dwarf2out.c:scan_trace understands the anulling semantics
8238	 without the COND_EXEC.  */
8239      gcc_assert (annulled);
8240      rtx note = alloc_reg_note (REG_FRAME_RELATED_EXPR, pat,
8241				 REG_NOTES (insn));
8242      validate_change (insn, &REG_NOTES (insn), note, 1);
8243    }
8244  pat = gen_rtx_COND_EXEC (VOIDmode, cond, pat);
8245  return pat;
8246}
8247
8248/* Use the ccfsm machinery to do if conversion.  */
8249
8250static unsigned
8251arc_ifcvt (void)
8252{
8253  struct arc_ccfsm *statep = &cfun->machine->ccfsm_current;
8254  basic_block merge_bb = 0;
8255
8256  memset (statep, 0, sizeof *statep);
8257  for (rtx_insn *insn = get_insns (); insn; insn = next_insn (insn))
8258    {
8259      arc_ccfsm_advance (insn, statep);
8260
8261      switch (statep->state)
8262	{
8263	case 0:
8264	  if (JUMP_P (insn))
8265	    merge_bb = 0;
8266	  break;
8267	case 1: case 2:
8268	  {
8269	    /* Deleted branch.  */
8270	    gcc_assert (!merge_bb);
8271	    merge_bb = BLOCK_FOR_INSN (insn);
8272	    basic_block succ_bb
8273	      = BLOCK_FOR_INSN (NEXT_INSN (NEXT_INSN (PREV_INSN (insn))));
8274	    arc_ccfsm_post_advance (insn, statep);
8275	    gcc_assert (!IN_RANGE (statep->state, 1, 2));
8276	    rtx_insn *seq = NEXT_INSN (PREV_INSN (insn));
8277	    if (seq != insn)
8278	      {
8279		rtx slot = XVECEXP (PATTERN (seq), 0, 1);
8280		rtx pat = PATTERN (slot);
8281		if (INSN_ANNULLED_BRANCH_P (insn))
8282		  {
8283		    rtx cond
8284		      = arc_get_ccfsm_cond (statep, INSN_FROM_TARGET_P (slot));
8285		    pat = gen_rtx_COND_EXEC (VOIDmode, cond, pat);
8286		  }
8287		if (!validate_change (seq, &PATTERN (seq), pat, 0))
8288		  gcc_unreachable ();
8289		PUT_CODE (slot, NOTE);
8290		NOTE_KIND (slot) = NOTE_INSN_DELETED;
8291		if (merge_bb && succ_bb)
8292		  merge_blocks (merge_bb, succ_bb);
8293	      }
8294	    else if (merge_bb && succ_bb)
8295	      {
8296		set_insn_deleted (insn);
8297		merge_blocks (merge_bb, succ_bb);
8298	      }
8299	    else
8300	      {
8301		PUT_CODE (insn, NOTE);
8302		NOTE_KIND (insn) = NOTE_INSN_DELETED;
8303	      }
8304	    continue;
8305	  }
8306	case 3:
8307	  if (LABEL_P (insn)
8308	      && statep->target_label == CODE_LABEL_NUMBER (insn))
8309	    {
8310	      arc_ccfsm_post_advance (insn, statep);
8311	      basic_block succ_bb = BLOCK_FOR_INSN (insn);
8312	      if (merge_bb && succ_bb)
8313		merge_blocks (merge_bb, succ_bb);
8314	      else if (--LABEL_NUSES (insn) == 0)
8315		{
8316		  const char *name = LABEL_NAME (insn);
8317		  PUT_CODE (insn, NOTE);
8318		  NOTE_KIND (insn) = NOTE_INSN_DELETED_LABEL;
8319		  NOTE_DELETED_LABEL_NAME (insn) = name;
8320		}
8321	      merge_bb = 0;
8322	      continue;
8323	    }
8324	  /* Fall through.  */
8325	case 4: case 5:
8326	  if (!NONDEBUG_INSN_P (insn))
8327	    break;
8328
8329	  /* Conditionalized insn.  */
8330
8331	  rtx_insn *prev, *pprev;
8332	  rtx *patp, pat, cond;
8333	  bool annulled; annulled = false;
8334
8335	  /* If this is a delay slot insn in a non-annulled branch,
8336	     don't conditionalize it.  N.B., this should be fine for
8337	     conditional return too.  However, don't do this for
8338	     unconditional branches, as these would be encountered when
8339	     processing an 'else' part.  */
8340	  prev = PREV_INSN (insn);
8341	  pprev = PREV_INSN (prev);
8342	  if (pprev && NEXT_INSN (NEXT_INSN (pprev)) == NEXT_INSN (insn)
8343	      && JUMP_P (prev) && get_attr_cond (prev) == COND_USE)
8344	    {
8345	      if (!INSN_ANNULLED_BRANCH_P (prev))
8346		break;
8347	      annulled = true;
8348	    }
8349
8350	  patp = &PATTERN (insn);
8351	  pat = *patp;
8352	  cond = arc_get_ccfsm_cond (statep, INSN_FROM_TARGET_P (insn));
8353	  if (NONJUMP_INSN_P (insn) || CALL_P (insn))
8354	    {
8355	      /* ??? don't conditionalize if all side effects are dead
8356		 in the not-execute case.  */
8357
8358	      pat = conditionalize_nonjump (pat, cond, insn, annulled);
8359	    }
8360	  else if (simplejump_p (insn))
8361	    {
8362	      patp = &SET_SRC (pat);
8363	      pat = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, *patp, pc_rtx);
8364	    }
8365	  else if (JUMP_P (insn) && ANY_RETURN_P (PATTERN (insn)))
8366	    {
8367	      pat = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, pat, pc_rtx);
8368	      pat = gen_rtx_SET (VOIDmode, pc_rtx, pat);
8369	    }
8370	  else
8371	    gcc_unreachable ();
8372	  validate_change (insn, patp, pat, 1);
8373	  if (!apply_change_group ())
8374	    gcc_unreachable ();
8375	  if (JUMP_P (insn))
8376	    {
8377	      rtx_insn *next = next_nonnote_insn (insn);
8378	      if (GET_CODE (next) == BARRIER)
8379		delete_insn (next);
8380	      if (statep->state == 3)
8381		continue;
8382	    }
8383	  break;
8384	default:
8385	  gcc_unreachable ();
8386	}
8387      arc_ccfsm_post_advance (insn, statep);
8388    }
8389  return 0;
8390}
8391
8392/* Find annulled delay insns and convert them to use the appropriate predicate.
8393   This allows branch shortening to size up these insns properly.  */
8394
8395static unsigned
8396arc_predicate_delay_insns (void)
8397{
8398  for (rtx_insn *insn = get_insns (); insn; insn = NEXT_INSN (insn))
8399    {
8400      rtx pat, jump, dlay, src, cond, *patp;
8401      int reverse;
8402
8403      if (!NONJUMP_INSN_P (insn)
8404	  || GET_CODE (pat = PATTERN (insn)) != SEQUENCE)
8405	continue;
8406      jump = XVECEXP (pat, 0, 0);
8407      dlay = XVECEXP (pat, 0, 1);
8408      if (!JUMP_P (jump) || !INSN_ANNULLED_BRANCH_P (jump))
8409	continue;
8410      /* If the branch insn does the annulling, leave the delay insn alone.  */
8411      if (!TARGET_AT_DBR_CONDEXEC && !INSN_FROM_TARGET_P (dlay))
8412	continue;
8413      /* ??? Could also leave DLAY un-conditionalized if its target is dead
8414	 on the other path.  */
8415      gcc_assert (GET_CODE (PATTERN (jump)) == SET);
8416      gcc_assert (SET_DEST (PATTERN (jump)) == pc_rtx);
8417      src = SET_SRC (PATTERN (jump));
8418      gcc_assert (GET_CODE (src) == IF_THEN_ELSE);
8419      cond = XEXP (src, 0);
8420      if (XEXP (src, 2) == pc_rtx)
8421	reverse = 0;
8422      else if (XEXP (src, 1) == pc_rtx)
8423	reverse = 1;
8424      else
8425	gcc_unreachable ();
8426      if (reverse != !INSN_FROM_TARGET_P (dlay))
8427	{
8428	  machine_mode ccm = GET_MODE (XEXP (cond, 0));
8429	  enum rtx_code code = reverse_condition (GET_CODE (cond));
8430	  if (code == UNKNOWN || ccm == CC_FP_GTmode || ccm == CC_FP_GEmode)
8431	    code = reverse_condition_maybe_unordered (GET_CODE (cond));
8432
8433	  cond = gen_rtx_fmt_ee (code, GET_MODE (cond),
8434				 copy_rtx (XEXP (cond, 0)),
8435				 copy_rtx (XEXP (cond, 1)));
8436	}
8437      else
8438	cond = copy_rtx (cond);
8439      patp = &PATTERN (dlay);
8440      pat = *patp;
8441      pat = conditionalize_nonjump (pat, cond, dlay, true);
8442      validate_change (dlay, patp, pat, 1);
8443      if (!apply_change_group ())
8444	gcc_unreachable ();
8445    }
8446  return 0;
8447}
8448
8449/* For ARC600: If a write to a core reg >=32 appears in a delay slot
8450  (other than of a forward brcc), it creates a hazard when there is a read
8451  of the same register at the branch target.  We can't know what is at the
8452  branch target of calls, and for branches, we don't really know before the
8453  end of delay slot scheduling, either.  Not only can individual instruction
8454  be hoisted out into a delay slot, a basic block can also be emptied this
8455  way, and branch and/or fall through targets be redirected.  Hence we don't
8456  want such writes in a delay slot.  */
8457
8458/* Return nonzreo iff INSN writes to an extension core register.  */
8459
8460int
8461arc_write_ext_corereg (rtx insn)
8462{
8463  subrtx_iterator::array_type array;
8464  FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
8465    {
8466      const_rtx x = *iter;
8467      switch (GET_CODE (x))
8468	{
8469	case SET: case POST_INC: case POST_DEC: case PRE_INC: case PRE_DEC:
8470	  break;
8471	default:
8472	  /* This is also fine for PRE/POST_MODIFY, because they
8473	     contain a SET.  */
8474	  continue;
8475	}
8476      const_rtx dest = XEXP (x, 0);
8477      if (REG_P (dest) && REGNO (dest) >= 32 && REGNO (dest) < 61)
8478	return 1;
8479    }
8480  return 0;
8481}
8482
8483/* This is like the hook, but returns NULL when it can't / won't generate
8484   a legitimate address.  */
8485
8486static rtx
8487arc_legitimize_address_0 (rtx x, rtx oldx ATTRIBUTE_UNUSED,
8488			  machine_mode mode)
8489{
8490  rtx addr, inner;
8491
8492  if (flag_pic && SYMBOLIC_CONST (x))
8493     (x) =  arc_legitimize_pic_address (x, 0);
8494  addr = x;
8495  if (GET_CODE (addr) == CONST)
8496    addr = XEXP (addr, 0);
8497  if (GET_CODE (addr) == PLUS
8498      && CONST_INT_P (XEXP (addr, 1))
8499      && ((GET_CODE (XEXP (addr, 0)) == SYMBOL_REF
8500	   && !SYMBOL_REF_FUNCTION_P (XEXP (addr, 0)))
8501	  || (REG_P (XEXP (addr, 0))
8502	      && (INTVAL (XEXP (addr, 1)) & 252))))
8503    {
8504      HOST_WIDE_INT offs, upper;
8505      int size = GET_MODE_SIZE (mode);
8506
8507      offs = INTVAL (XEXP (addr, 1));
8508      upper = (offs + 256 * size) & ~511 * size;
8509      inner = plus_constant (Pmode, XEXP (addr, 0), upper);
8510#if 0 /* ??? this produces worse code for EEMBC idctrn01  */
8511      if (GET_CODE (x) == CONST)
8512	inner = gen_rtx_CONST (Pmode, inner);
8513#endif
8514      addr = plus_constant (Pmode, force_reg (Pmode, inner), offs - upper);
8515      x = addr;
8516    }
8517  else if (GET_CODE (addr) == SYMBOL_REF && !SYMBOL_REF_FUNCTION_P (addr))
8518    x = force_reg (Pmode, x);
8519  if (memory_address_p ((machine_mode) mode, x))
8520     return x;
8521  return NULL_RTX;
8522}
8523
8524static rtx
8525arc_legitimize_address (rtx orig_x, rtx oldx, machine_mode mode)
8526{
8527  rtx new_x = arc_legitimize_address_0 (orig_x, oldx, mode);
8528
8529  if (new_x)
8530    return new_x;
8531  return orig_x;
8532}
8533
8534static rtx
8535arc_delegitimize_address_0 (rtx x)
8536{
8537  rtx u, gp;
8538
8539  if (GET_CODE (x) == CONST && GET_CODE (u = XEXP (x, 0)) == UNSPEC)
8540    {
8541      if (XINT (u, 1) == ARC_UNSPEC_GOT)
8542	return XVECEXP (u, 0, 0);
8543    }
8544  else if (GET_CODE (x) == PLUS
8545	   && ((REG_P (gp = XEXP (x, 0))
8546		&& REGNO (gp) == PIC_OFFSET_TABLE_REGNUM)
8547	       || (GET_CODE (gp) == CONST
8548		   && GET_CODE (u = XEXP (gp, 0)) == UNSPEC
8549		   && XINT (u, 1) == ARC_UNSPEC_GOT
8550		   && GET_CODE (XVECEXP (u, 0, 0)) == SYMBOL_REF
8551		   && !strcmp (XSTR (XVECEXP (u, 0, 0), 0), "_DYNAMIC")))
8552	   && GET_CODE (XEXP (x, 1)) == CONST
8553	   && GET_CODE (u = XEXP (XEXP (x, 1), 0)) == UNSPEC
8554	   && XINT (u, 1) == ARC_UNSPEC_GOTOFF)
8555    return XVECEXP (u, 0, 0);
8556  else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
8557	   && ((REG_P (gp = XEXP (XEXP (x, 0), 1))
8558		&& REGNO (gp) == PIC_OFFSET_TABLE_REGNUM)
8559	       || (GET_CODE (gp) == CONST
8560		   && GET_CODE (u = XEXP (gp, 0)) == UNSPEC
8561		   && XINT (u, 1) == ARC_UNSPEC_GOT
8562		   && GET_CODE (XVECEXP (u, 0, 0)) == SYMBOL_REF
8563		   && !strcmp (XSTR (XVECEXP (u, 0, 0), 0), "_DYNAMIC")))
8564	   && GET_CODE (XEXP (x, 1)) == CONST
8565	   && GET_CODE (u = XEXP (XEXP (x, 1), 0)) == UNSPEC
8566	   && XINT (u, 1) == ARC_UNSPEC_GOTOFF)
8567    return gen_rtx_PLUS (GET_MODE (x), XEXP (XEXP (x, 0), 0),
8568			 XVECEXP (u, 0, 0));
8569  else if (GET_CODE (x) == PLUS
8570	   && (u = arc_delegitimize_address_0 (XEXP (x, 1))))
8571    return gen_rtx_PLUS (GET_MODE (x), XEXP (x, 0), u);
8572  return NULL_RTX;
8573}
8574
8575static rtx
8576arc_delegitimize_address (rtx x)
8577{
8578  rtx orig_x = x = delegitimize_mem_from_attrs (x);
8579  if (GET_CODE (x) == MEM)
8580    x = XEXP (x, 0);
8581  x = arc_delegitimize_address_0 (x);
8582  if (x)
8583    {
8584      if (MEM_P (orig_x))
8585	x = replace_equiv_address_nv (orig_x, x);
8586      return x;
8587    }
8588  return orig_x;
8589}
8590
8591/* Return a REG rtx for acc1.  N.B. the gcc-internal representation may
8592   differ from the hardware register number in order to allow the generic
8593   code to correctly split the concatenation of acc1 and acc2.  */
8594
8595rtx
8596gen_acc1 (void)
8597{
8598  return gen_rtx_REG (SImode, TARGET_BIG_ENDIAN ? 56: 57);
8599}
8600
8601/* Return a REG rtx for acc2.  N.B. the gcc-internal representation may
8602   differ from the hardware register number in order to allow the generic
8603   code to correctly split the concatenation of acc1 and acc2.  */
8604
8605rtx
8606gen_acc2 (void)
8607{
8608  return gen_rtx_REG (SImode, TARGET_BIG_ENDIAN ? 57: 56);
8609}
8610
8611/* Return a REG rtx for mlo.  N.B. the gcc-internal representation may
8612   differ from the hardware register number in order to allow the generic
8613   code to correctly split the concatenation of mhi and mlo.  */
8614
8615rtx
8616gen_mlo (void)
8617{
8618  return gen_rtx_REG (SImode, TARGET_BIG_ENDIAN ? 59: 58);
8619}
8620
8621/* Return a REG rtx for mhi.  N.B. the gcc-internal representation may
8622   differ from the hardware register number in order to allow the generic
8623   code to correctly split the concatenation of mhi and mlo.  */
8624
8625rtx
8626gen_mhi (void)
8627{
8628  return gen_rtx_REG (SImode, TARGET_BIG_ENDIAN ? 58: 59);
8629}
8630
8631/* FIXME: a parameter should be added, and code added to final.c,
8632   to reproduce this functionality in shorten_branches.  */
8633#if 0
8634/* Return nonzero iff BRANCH should be unaligned if possible by upsizing
8635   a previous instruction.  */
8636int
8637arc_unalign_branch_p (rtx branch)
8638{
8639  rtx note;
8640
8641  if (!TARGET_UNALIGN_BRANCH)
8642    return 0;
8643  /* Do not do this if we have a filled delay slot.  */
8644  if (get_attr_delay_slot_filled (branch) == DELAY_SLOT_FILLED_YES
8645      && !NEXT_INSN (branch)->deleted ())
8646    return 0;
8647  note = find_reg_note (branch, REG_BR_PROB, 0);
8648  return (!note
8649	  || (arc_unalign_prob_threshold && !br_prob_note_reliable_p (note))
8650	  || INTVAL (XEXP (note, 0)) < arc_unalign_prob_threshold);
8651}
8652#endif
8653
8654/* When estimating sizes during arc_reorg, when optimizing for speed, there
8655   are three reasons why we need to consider branches to be length 6:
8656   - annull-false delay slot insns are implemented using conditional execution,
8657     thus preventing short insn formation where used.
8658   - for ARC600: annul-true delay slot insns are implemented where possible
8659     using conditional execution, preventing short insn formation where used.
8660   - for ARC700: likely or somewhat likely taken branches are made long and
8661     unaligned if possible to avoid branch penalty.  */
8662
8663bool
8664arc_branch_size_unknown_p (void)
8665{
8666  return !optimize_size && arc_reorg_in_progress;
8667}
8668
8669/* We are about to output a return insn.  Add padding if necessary to avoid
8670   a mispredict.  A return could happen immediately after the function
8671   start, but after a call we know that there will be at least a blink
8672   restore.  */
8673
8674void
8675arc_pad_return (void)
8676{
8677  rtx_insn *insn = current_output_insn;
8678  rtx_insn *prev = prev_active_insn (insn);
8679  int want_long;
8680
8681  if (!prev)
8682    {
8683      fputs ("\tnop_s\n", asm_out_file);
8684      cfun->machine->unalign ^= 2;
8685      want_long = 1;
8686    }
8687  /* If PREV is a sequence, we know it must be a branch / jump or a tailcall,
8688     because after a call, we'd have to restore blink first.  */
8689  else if (GET_CODE (PATTERN (prev)) == SEQUENCE)
8690    return;
8691  else
8692    {
8693      want_long = (get_attr_length (prev) == 2);
8694      prev = prev_active_insn (prev);
8695    }
8696  if (!prev
8697      || ((NONJUMP_INSN_P (prev) && GET_CODE (PATTERN (prev)) == SEQUENCE)
8698	  ? CALL_ATTR (as_a <rtx_sequence *> (PATTERN (prev))->insn (0),
8699		       NON_SIBCALL)
8700	  : CALL_ATTR (prev, NON_SIBCALL)))
8701    {
8702      if (want_long)
8703	cfun->machine->size_reason
8704	  = "call/return and return/return must be 6 bytes apart to avoid mispredict";
8705      else if (TARGET_UNALIGN_BRANCH && cfun->machine->unalign)
8706	{
8707	  cfun->machine->size_reason
8708	    = "Long unaligned jump avoids non-delay slot penalty";
8709	  want_long = 1;
8710	}
8711      /* Disgorge delay insn, if there is any, and it may be moved.  */
8712      if (final_sequence
8713	  /* ??? Annulled would be OK if we can and do conditionalize
8714	     the delay slot insn accordingly.  */
8715	  && !INSN_ANNULLED_BRANCH_P (insn)
8716	  && (get_attr_cond (insn) != COND_USE
8717	      || !reg_set_p (gen_rtx_REG (CCmode, CC_REG),
8718			     XVECEXP (final_sequence, 0, 1))))
8719	{
8720	  prev = as_a <rtx_insn *> (XVECEXP (final_sequence, 0, 1));
8721	  gcc_assert (!prev_real_insn (insn)
8722		      || !arc_hazard (prev_real_insn (insn), prev));
8723	  cfun->machine->force_short_suffix = !want_long;
8724	  rtx save_pred = current_insn_predicate;
8725	  final_scan_insn (prev, asm_out_file, optimize, 1, NULL);
8726	  cfun->machine->force_short_suffix = -1;
8727	  prev->set_deleted ();
8728	  current_output_insn = insn;
8729	  current_insn_predicate = save_pred;
8730	}
8731      else if (want_long)
8732	fputs ("\tnop\n", asm_out_file);
8733      else
8734	{
8735	  fputs ("\tnop_s\n", asm_out_file);
8736	  cfun->machine->unalign ^= 2;
8737	}
8738    }
8739  return;
8740}
8741
8742/* The usual; we set up our machine_function data.  */
8743
8744static struct machine_function *
8745arc_init_machine_status (void)
8746{
8747  struct machine_function *machine;
8748  machine = ggc_cleared_alloc<machine_function> ();
8749  machine->fn_type = ARC_FUNCTION_UNKNOWN;
8750  machine->force_short_suffix = -1;
8751
8752  return machine;
8753}
8754
8755/* Implements INIT_EXPANDERS.  We just set up to call the above
8756   function.  */
8757
8758void
8759arc_init_expanders (void)
8760{
8761  init_machine_status = arc_init_machine_status;
8762}
8763
8764/* Check if OP is a proper parallel of a millicode call pattern.  OFFSET
8765   indicates a number of elements to ignore - that allows to have a
8766   sibcall pattern that starts with (return).  LOAD_P is zero for store
8767   multiple (for prologues), and one for load multiples (for epilogues),
8768   and two for load multiples where no final clobber of blink is required.
8769   We also skip the first load / store element since this is supposed to
8770   be checked in the instruction pattern.  */
8771
8772int
8773arc_check_millicode (rtx op, int offset, int load_p)
8774{
8775  int len = XVECLEN (op, 0) - offset;
8776  int i;
8777
8778  if (load_p == 2)
8779    {
8780      if (len < 2 || len > 13)
8781	return 0;
8782      load_p = 1;
8783    }
8784  else
8785    {
8786      rtx elt = XVECEXP (op, 0, --len);
8787
8788      if (GET_CODE (elt) != CLOBBER
8789	  || !REG_P (XEXP (elt, 0))
8790	  || REGNO (XEXP (elt, 0)) != RETURN_ADDR_REGNUM
8791	  || len < 3 || len > 13)
8792	return 0;
8793    }
8794  for (i = 1; i < len; i++)
8795    {
8796      rtx elt = XVECEXP (op, 0, i + offset);
8797      rtx reg, mem, addr;
8798
8799      if (GET_CODE (elt) != SET)
8800	return 0;
8801      mem = XEXP (elt, load_p);
8802      reg = XEXP (elt, 1-load_p);
8803      if (!REG_P (reg) || REGNO (reg) != 13U+i || !MEM_P (mem))
8804	return 0;
8805      addr = XEXP (mem, 0);
8806      if (GET_CODE (addr) != PLUS
8807	  || !rtx_equal_p (stack_pointer_rtx, XEXP (addr, 0))
8808	  || !CONST_INT_P (XEXP (addr, 1)) || INTVAL (XEXP (addr, 1)) != i*4)
8809	return 0;
8810    }
8811  return 1;
8812}
8813
8814/* Accessor functions for cfun->machine->unalign.  */
8815
8816int
8817arc_get_unalign (void)
8818{
8819  return cfun->machine->unalign;
8820}
8821
8822void
8823arc_clear_unalign (void)
8824{
8825  if (cfun)
8826    cfun->machine->unalign = 0;
8827}
8828
8829void
8830arc_toggle_unalign (void)
8831{
8832  cfun->machine->unalign ^= 2;
8833}
8834
8835/* Operands 0..2 are the operands of a addsi which uses a 12 bit
8836   constant in operand 2, but which would require a LIMM because of
8837   operand mismatch.
8838   operands 3 and 4 are new SET_SRCs for operands 0.  */
8839
8840void
8841split_addsi (rtx *operands)
8842{
8843  int val = INTVAL (operands[2]);
8844
8845  /* Try for two short insns first.  Lengths being equal, we prefer
8846     expansions with shorter register lifetimes.  */
8847  if (val > 127 && val <= 255
8848      && satisfies_constraint_Rcq (operands[0]))
8849    {
8850      operands[3] = operands[2];
8851      operands[4] = gen_rtx_PLUS (SImode, operands[0], operands[1]);
8852    }
8853  else
8854    {
8855      operands[3] = operands[1];
8856      operands[4] = gen_rtx_PLUS (SImode, operands[0], operands[2]);
8857    }
8858}
8859
8860/* Operands 0..2 are the operands of a subsi which uses a 12 bit
8861   constant in operand 1, but which would require a LIMM because of
8862   operand mismatch.
8863   operands 3 and 4 are new SET_SRCs for operands 0.  */
8864
8865void
8866split_subsi (rtx *operands)
8867{
8868  int val = INTVAL (operands[1]);
8869
8870  /* Try for two short insns first.  Lengths being equal, we prefer
8871     expansions with shorter register lifetimes.  */
8872  if (satisfies_constraint_Rcq (operands[0])
8873      && satisfies_constraint_Rcq (operands[2]))
8874    {
8875      if (val >= -31 && val <= 127)
8876	{
8877	  operands[3] = gen_rtx_NEG (SImode, operands[2]);
8878	  operands[4] = gen_rtx_PLUS (SImode, operands[0], operands[1]);
8879	  return;
8880	}
8881      else if (val >= 0 && val < 255)
8882	{
8883	  operands[3] = operands[1];
8884	  operands[4] = gen_rtx_MINUS (SImode, operands[0], operands[2]);
8885	  return;
8886	}
8887    }
8888  /* If the destination is not an ARCompact16 register, we might
8889     still have a chance to make a short insn if the source is;
8890      we need to start with a reg-reg move for this.  */
8891  operands[3] = operands[2];
8892  operands[4] = gen_rtx_MINUS (SImode, operands[1], operands[0]);
8893}
8894
8895/* Handle DOUBLE_REGS uses.
8896   Operand 0: destination register
8897   Operand 1: source register  */
8898
8899static rtx
8900arc_process_double_reg_moves (rtx *operands)
8901{
8902  rtx dest = operands[0];
8903  rtx src  = operands[1];
8904  rtx val;
8905
8906  enum usesDxState { none, srcDx, destDx, maxDx };
8907  enum usesDxState state = none;
8908
8909  if (refers_to_regno_p (40, 44, src, 0))
8910    state = srcDx;
8911  if (refers_to_regno_p (40, 44, dest, 0))
8912    {
8913      /* Via arc_register_move_cost, we should never see D,D moves.  */
8914      gcc_assert (state == none);
8915      state = destDx;
8916    }
8917
8918  if (state == none)
8919    return NULL_RTX;
8920
8921  start_sequence ();
8922
8923  if (state == srcDx)
8924    {
8925      /* Without the LR insn, we need to split this into a
8926	 sequence of insns which will use the DEXCLx and DADDHxy
8927	 insns to be able to read the Dx register in question.  */
8928      if (TARGET_DPFP_DISABLE_LRSR)
8929	{
8930	  /* gen *movdf_insn_nolrsr */
8931	  rtx set = gen_rtx_SET (VOIDmode, dest, src);
8932	  rtx use1 = gen_rtx_USE (VOIDmode, const1_rtx);
8933	  emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, use1)));
8934	}
8935      else
8936	{
8937	  /* When we have 'mov D, r' or 'mov D, D' then get the target
8938	     register pair for use with LR insn.  */
8939	  rtx destHigh = simplify_gen_subreg(SImode, dest, DFmode, 4);
8940	  rtx destLow  = simplify_gen_subreg(SImode, dest, DFmode, 0);
8941
8942	  /* Produce the two LR insns to get the high and low parts.  */
8943	  emit_insn (gen_rtx_SET (VOIDmode,
8944				  destHigh,
8945				  gen_rtx_UNSPEC_VOLATILE (Pmode, gen_rtvec (1, src),
8946				  VUNSPEC_LR_HIGH)));
8947	  emit_insn (gen_rtx_SET (VOIDmode,
8948				  destLow,
8949				  gen_rtx_UNSPEC_VOLATILE (Pmode, gen_rtvec (1, src),
8950				  VUNSPEC_LR)));
8951	}
8952    }
8953  else if (state == destDx)
8954    {
8955      /* When we have 'mov r, D' or 'mov D, D' and we have access to the
8956	 LR insn get the target register pair.  */
8957      rtx srcHigh = simplify_gen_subreg(SImode, src, DFmode, 4);
8958      rtx srcLow  = simplify_gen_subreg(SImode, src, DFmode, 0);
8959
8960      emit_insn (gen_rtx_UNSPEC_VOLATILE (Pmode,
8961					  gen_rtvec (3, dest, srcHigh, srcLow),
8962					  VUNSPEC_DEXCL_NORES));
8963
8964    }
8965  else
8966    gcc_unreachable ();
8967
8968  val = get_insns ();
8969  end_sequence ();
8970  return val;
8971}
8972
8973/* operands 0..1 are the operands of a 64 bit move instruction.
8974   split it into two moves with operands 2/3 and 4/5.  */
8975
8976rtx
8977arc_split_move (rtx *operands)
8978{
8979  machine_mode mode = GET_MODE (operands[0]);
8980  int i;
8981  int swap = 0;
8982  rtx xop[4];
8983  rtx val;
8984
8985  if (TARGET_DPFP)
8986  {
8987    val = arc_process_double_reg_moves (operands);
8988    if (val)
8989      return val;
8990  }
8991
8992  for (i = 0; i < 2; i++)
8993    {
8994      if (MEM_P (operands[i]) && auto_inc_p (XEXP (operands[i], 0)))
8995	{
8996	  rtx addr = XEXP (operands[i], 0);
8997	  rtx r, o;
8998	  enum rtx_code code;
8999
9000	  gcc_assert (!reg_overlap_mentioned_p (operands[0], addr));
9001	  switch (GET_CODE (addr))
9002	    {
9003	    case PRE_DEC: o = GEN_INT (-8); goto pre_modify;
9004	    case PRE_INC: o = GEN_INT (8); goto pre_modify;
9005	    case PRE_MODIFY: o = XEXP (XEXP (addr, 1), 1);
9006	    pre_modify:
9007	      code = PRE_MODIFY;
9008	      break;
9009	    case POST_DEC: o = GEN_INT (-8); goto post_modify;
9010	    case POST_INC: o = GEN_INT (8); goto post_modify;
9011	    case POST_MODIFY: o = XEXP (XEXP (addr, 1), 1);
9012	    post_modify:
9013	      code = POST_MODIFY;
9014	      swap = 2;
9015	      break;
9016	    default:
9017	      gcc_unreachable ();
9018	    }
9019	  r = XEXP (addr, 0);
9020	  xop[0+i] = adjust_automodify_address_nv
9021		      (operands[i], SImode,
9022		       gen_rtx_fmt_ee (code, Pmode, r,
9023				       gen_rtx_PLUS (Pmode, r, o)),
9024		       0);
9025	  xop[2+i] = adjust_automodify_address_nv
9026		      (operands[i], SImode, plus_constant (Pmode, r, 4), 4);
9027	}
9028      else
9029	{
9030	  xop[0+i] = operand_subword (operands[i], 0, 0, mode);
9031	  xop[2+i] = operand_subword (operands[i], 1, 0, mode);
9032	}
9033    }
9034  if (reg_overlap_mentioned_p (xop[0], xop[3]))
9035    {
9036      swap = 2;
9037      gcc_assert (!reg_overlap_mentioned_p (xop[2], xop[1]));
9038    }
9039  operands[2+swap] = xop[0];
9040  operands[3+swap] = xop[1];
9041  operands[4-swap] = xop[2];
9042  operands[5-swap] = xop[3];
9043
9044  start_sequence ();
9045  emit_insn (gen_rtx_SET (VOIDmode, operands[2], operands[3]));
9046  emit_insn (gen_rtx_SET (VOIDmode, operands[4], operands[5]));
9047  val = get_insns ();
9048  end_sequence ();
9049
9050  return val;
9051}
9052
9053/* Select between the instruction output templates s_tmpl (for short INSNs)
9054   and l_tmpl (for long INSNs).  */
9055
9056const char *
9057arc_short_long (rtx_insn *insn, const char *s_tmpl, const char *l_tmpl)
9058{
9059  int is_short = arc_verify_short (insn, cfun->machine->unalign, -1);
9060
9061  extract_constrain_insn_cached (insn);
9062  return is_short ? s_tmpl : l_tmpl;
9063}
9064
9065/* Searches X for any reference to REGNO, returning the rtx of the
9066   reference found if any.  Otherwise, returns NULL_RTX.  */
9067
9068rtx
9069arc_regno_use_in (unsigned int regno, rtx x)
9070{
9071  const char *fmt;
9072  int i, j;
9073  rtx tem;
9074
9075  if (REG_P (x) && refers_to_regno_p (regno, x))
9076    return x;
9077
9078  fmt = GET_RTX_FORMAT (GET_CODE (x));
9079  for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9080    {
9081      if (fmt[i] == 'e')
9082	{
9083	  if ((tem = regno_use_in (regno, XEXP (x, i))))
9084	    return tem;
9085	}
9086      else if (fmt[i] == 'E')
9087	for (j = XVECLEN (x, i) - 1; j >= 0; j--)
9088	  if ((tem = regno_use_in (regno , XVECEXP (x, i, j))))
9089	    return tem;
9090    }
9091
9092  return NULL_RTX;
9093}
9094
9095/* Return the integer value of the "type" attribute for INSN, or -1 if
9096   INSN can't have attributes.  */
9097
9098int
9099arc_attr_type (rtx_insn *insn)
9100{
9101  if (NONJUMP_INSN_P (insn)
9102      ? (GET_CODE (PATTERN (insn)) == USE
9103	 || GET_CODE (PATTERN (insn)) == CLOBBER)
9104      : JUMP_P (insn)
9105      ? (GET_CODE (PATTERN (insn)) == ADDR_VEC
9106	 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC)
9107      : !CALL_P (insn))
9108    return -1;
9109  return get_attr_type (insn);
9110}
9111
9112/* Return true if insn sets the condition codes.  */
9113
9114bool
9115arc_sets_cc_p (rtx_insn *insn)
9116{
9117  if (NONJUMP_INSN_P (insn))
9118    if (rtx_sequence *seq = dyn_cast <rtx_sequence *> (PATTERN (insn)))
9119      insn = seq->insn (seq->len () - 1);
9120  return arc_attr_type (insn) == TYPE_COMPARE;
9121}
9122
9123/* Return true if INSN is an instruction with a delay slot we may want
9124   to fill.  */
9125
9126bool
9127arc_need_delay (rtx_insn *insn)
9128{
9129  rtx_insn *next;
9130
9131  if (!flag_delayed_branch)
9132    return false;
9133  /* The return at the end of a function needs a delay slot.  */
9134  if (NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == USE
9135      && (!(next = next_active_insn (insn))
9136	  || ((!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) != SEQUENCE)
9137	      && arc_attr_type (next) == TYPE_RETURN))
9138      && (!TARGET_PAD_RETURN
9139	  || (prev_active_insn (insn)
9140	      && prev_active_insn (prev_active_insn (insn))
9141	      && prev_active_insn (prev_active_insn (prev_active_insn (insn))))))
9142    return true;
9143  if (NONJUMP_INSN_P (insn)
9144      ? (GET_CODE (PATTERN (insn)) == USE
9145	 || GET_CODE (PATTERN (insn)) == CLOBBER
9146	 || GET_CODE (PATTERN (insn)) == SEQUENCE)
9147      : JUMP_P (insn)
9148      ? (GET_CODE (PATTERN (insn)) == ADDR_VEC
9149	 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC)
9150      : !CALL_P (insn))
9151    return false;
9152  return num_delay_slots (insn) != 0;
9153}
9154
9155/* Return true if the scheduling pass(es) has/have already run,
9156   i.e. where possible, we should try to mitigate high latencies
9157   by different instruction selection.  */
9158
9159bool
9160arc_scheduling_not_expected (void)
9161{
9162  return cfun->machine->arc_reorg_started;
9163}
9164
9165/* Oddly enough, sometimes we get a zero overhead loop that branch
9166   shortening doesn't think is a loop - observed with compile/pr24883.c
9167   -O3 -fomit-frame-pointer -funroll-loops.  Make sure to include the
9168   alignment visible for branch shortening  (we actually align the loop
9169   insn before it, but that is equivalent since the loop insn is 4 byte
9170   long.)  */
9171
9172int
9173arc_label_align (rtx label)
9174{
9175  int loop_align = LOOP_ALIGN (LABEL);
9176
9177  if (loop_align > align_labels_log)
9178    {
9179      rtx_insn *prev = prev_nonnote_insn (label);
9180
9181      if (prev && NONJUMP_INSN_P (prev)
9182	  && GET_CODE (PATTERN (prev)) == PARALLEL
9183	  && recog_memoized (prev) == CODE_FOR_doloop_begin_i)
9184	return loop_align;
9185    }
9186  /* Code has a minimum p2 alignment of 1, which we must restore after an
9187     ADDR_DIFF_VEC.  */
9188  if (align_labels_log < 1)
9189    {
9190      rtx_insn *next = next_nonnote_nondebug_insn (label);
9191      if (INSN_P (next) && recog_memoized (next) >= 0)
9192	return 1;
9193    }
9194  return align_labels_log;
9195}
9196
9197/* Return true if LABEL is in executable code.  */
9198
9199bool
9200arc_text_label (rtx_insn *label)
9201{
9202  rtx_insn *next;
9203
9204  /* ??? We use deleted labels like they were still there, see
9205     gcc.c-torture/compile/20000326-2.c .  */
9206  gcc_assert (GET_CODE (label) == CODE_LABEL
9207	      || (GET_CODE (label) == NOTE
9208		  && NOTE_KIND (label) == NOTE_INSN_DELETED_LABEL));
9209  next = next_nonnote_insn (label);
9210  if (next)
9211    return (!JUMP_TABLE_DATA_P (next)
9212	    || GET_CODE (PATTERN (next)) != ADDR_VEC);
9213  else if (!PREV_INSN (label))
9214    /* ??? sometimes text labels get inserted very late, see
9215       gcc.dg/torture/stackalign/comp-goto-1.c */
9216    return true;
9217  return false;
9218}
9219
9220/* Return the size of the pretend args for DECL.  */
9221
9222int
9223arc_decl_pretend_args (tree decl)
9224{
9225  /* struct function is in DECL_STRUCT_FUNCTION (decl), but no
9226     pretend_args there...  See PR38391.  */
9227  gcc_assert (decl == current_function_decl);
9228  return crtl->args.pretend_args_size;
9229}
9230
9231/* Without this, gcc.dg/tree-prof/bb-reorg.c fails to assemble
9232  when compiling with -O2 -freorder-blocks-and-partition -fprofile-use
9233  -D_PROFILE_USE; delay branch scheduling then follows a crossing jump
9234  to redirect two breqs.  */
9235
9236static bool
9237arc_can_follow_jump (const rtx_insn *follower, const rtx_insn *followee)
9238{
9239  /* ??? get_attr_type is declared to take an rtx.  */
9240  union { const rtx_insn *c; rtx_insn *r; } u;
9241
9242  u.c = follower;
9243  if (CROSSING_JUMP_P (followee))
9244    switch (get_attr_type (u.r))
9245      {
9246      case TYPE_BRCC:
9247      case TYPE_BRCC_NO_DELAY_SLOT:
9248	return false;
9249      default:
9250	return true;
9251      }
9252  return true;
9253}
9254
9255/* Implement EPILOGUE__USES.
9256   Return true if REGNO should be added to the deemed uses of the epilogue.
9257
9258   We use the return address
9259   arc_return_address_regs[arc_compute_function_type (cfun)] .
9260   But also, we have to make sure all the register restore instructions
9261   are known to be live in interrupt functions.  */
9262
9263bool
9264arc_epilogue_uses (int regno)
9265{
9266  if (reload_completed)
9267    {
9268      if (ARC_INTERRUPT_P (cfun->machine->fn_type))
9269	{
9270	  if (!fixed_regs[regno])
9271	    return true;
9272	  return regno == arc_return_address_regs[cfun->machine->fn_type];
9273	}
9274      else
9275	return regno == RETURN_ADDR_REGNUM;
9276    }
9277  else
9278    return regno == arc_return_address_regs[arc_compute_function_type (cfun)];
9279}
9280
9281#ifndef TARGET_NO_LRA
9282#define TARGET_NO_LRA !TARGET_LRA
9283#endif
9284
9285static bool
9286arc_lra_p (void)
9287{
9288  return !TARGET_NO_LRA;
9289}
9290
9291/* ??? Should we define TARGET_REGISTER_PRIORITY?  We might perfer to use
9292   Rcq registers, because some insn are shorter with them.  OTOH we already
9293   have separate alternatives for this purpose, and other insns don't
9294   mind, so maybe we should rather prefer the other registers?
9295   We need more data, and we can only get that if we allow people to
9296   try all options.  */
9297static int
9298arc_register_priority (int r)
9299{
9300  switch (arc_lra_priority_tag)
9301    {
9302    case ARC_LRA_PRIORITY_NONE:
9303      return 0;
9304    case ARC_LRA_PRIORITY_NONCOMPACT:
9305      return ((((r & 7) ^ 4) - 4) & 15) != r;
9306    case ARC_LRA_PRIORITY_COMPACT:
9307      return ((((r & 7) ^ 4) - 4) & 15) == r;
9308    default:
9309      gcc_unreachable ();
9310    }
9311}
9312
9313static reg_class_t
9314arc_spill_class (reg_class_t /* orig_class */, machine_mode)
9315{
9316  return GENERAL_REGS;
9317}
9318
9319bool
9320arc_legitimize_reload_address (rtx *p, machine_mode mode, int opnum,
9321			       int itype)
9322{
9323  rtx x = *p;
9324  enum reload_type type = (enum reload_type) itype;
9325
9326  if (GET_CODE (x) == PLUS
9327      && CONST_INT_P (XEXP (x, 1))
9328      && (RTX_OK_FOR_BASE_P (XEXP (x, 0), true)
9329	  || (REG_P (XEXP (x, 0))
9330	      && reg_equiv_constant (REGNO (XEXP (x, 0))))))
9331    {
9332      int scale = GET_MODE_SIZE (mode);
9333      int shift;
9334      rtx index_rtx = XEXP (x, 1);
9335      HOST_WIDE_INT offset = INTVAL (index_rtx), offset_base;
9336      rtx reg, sum, sum2;
9337
9338      if (scale > 4)
9339	scale = 4;
9340      if ((scale-1) & offset)
9341	scale = 1;
9342      shift = scale >> 1;
9343      offset_base = (offset + (256 << shift)) & (-512 << shift);
9344      /* Sometimes the normal form does not suit DImode.  We
9345	 could avoid that by using smaller ranges, but that
9346	 would give less optimized code when SImode is
9347	 prevalent.  */
9348      if (GET_MODE_SIZE (mode) + offset - offset_base <= (256 << shift))
9349	{
9350	  int regno;
9351
9352	  reg = XEXP (x, 0);
9353	  regno = REGNO (reg);
9354	  sum2 = sum = plus_constant (Pmode, reg, offset_base);
9355
9356	  if (reg_equiv_constant (regno))
9357	    {
9358	      sum2 = plus_constant (Pmode, reg_equiv_constant (regno),
9359				    offset_base);
9360	      if (GET_CODE (sum2) == PLUS)
9361		sum2 = gen_rtx_CONST (Pmode, sum2);
9362	    }
9363	  *p = gen_rtx_PLUS (Pmode, sum, GEN_INT (offset - offset_base));
9364	  push_reload (sum2, NULL_RTX, &XEXP (*p, 0), NULL,
9365		       BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum,
9366		       type);
9367	  return true;
9368	}
9369    }
9370  /* We must re-recognize what we created before.  */
9371  else if (GET_CODE (x) == PLUS
9372	   && GET_CODE (XEXP (x, 0)) == PLUS
9373	   && CONST_INT_P (XEXP (XEXP (x, 0), 1))
9374	   && REG_P  (XEXP (XEXP (x, 0), 0))
9375	   && CONST_INT_P (XEXP (x, 1)))
9376    {
9377      /* Because this address is so complex, we know it must have
9378	 been created by LEGITIMIZE_RELOAD_ADDRESS before; thus,
9379	 it is already unshared, and needs no further unsharing.  */
9380      push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
9381		   BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9382      return true;
9383    }
9384  return false;
9385}
9386
9387/* Implement TARGET_USE_BY_PIECES_INFRASTRUCTURE_P.  */
9388
9389static bool
9390arc_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
9391				    unsigned int align,
9392				    enum by_pieces_operation op,
9393				    bool speed_p)
9394{
9395  /* Let the movmem expander handle small block moves.  */
9396  if (op == MOVE_BY_PIECES)
9397    return false;
9398
9399  return default_use_by_pieces_infrastructure_p (size, align, op, speed_p);
9400}
9401
9402struct gcc_target targetm = TARGET_INITIALIZER;
9403
9404#include "gt-arc.h"
9405